使用selenium登录雨课堂并获取课程信息

2020-12-05

tec

python http

登录 ⌛️

操作逻辑：运行程序 - 微信扫码登录 - 获取登录cookie，下次运行程序时即可跳过扫码直接登录

class Yuketang:
    def __init__(self):
        self.login_url = "https://sziit.yuketang.cn/pro/portal/home/"
        self.dataDic = dict()
        self.bar=ProgressBar(widgets=[Percentage(),Bar('#'), ' ', Timer(), ' ', ETA(), ' '])#,term_width=12,FileTransferSpeed(),
    def Explicit_Waits(self,driver, way, path):#显式等待
        try:
            ele = WebDriverWait(driver, 150).until(
                EC.presence_of_element_located((way, path)))
            return ele
        except Exception as e:
            print('元素寻找失败： ' + str(e))
    # 爬取数据
    def login(self):
        # 创建浏览器对象
        options = CO()
        options.add_argument('--headless')
        options.add_argument('--disable-gpu')
        self.driver = selenium.webdriver.Chrome(options=options)  #
        try:
            print("使用本地保存的cookies...")
            self.cookies = eval(open('./bd_login_cookies.txt', 'r').read())  #
            print(self.cookies)
            for cookie in self.cookies:
                if 'expiry' in cookie:
                    del cookie['expiry']
                try:self.driver.add_cookie(cookie)
                except:pass
            self.driver.refresh()
        except Exception as err:
            # print(err)

            print("正在扫码登录...")
            self.Explicit_Waits(self.driver,By.XPATH,'//*[@id="app"]/div[2]/div[2]/div[3]/div/div[1]/div/div/div[2]/button')
            self.driver.find_element_by_xpath('//*[@id="app"]/div[2]/div[2]/div[3]/div/div[1]/div/div/div[2]/button').click() # 点击登陆按钮
            self.Explicit_Waits(self.driver,By.XPATH,'//*[@id="app"]/div[2]/div[2]/div[3]/div/div[1]/div/div/div[2]/button')

            time.sleep(10) # 扫码时间为5秒，扫码不成功则报错，程序停止
            self.cookies = self.driver.get_cookies()  # 拿到登陆后的cookies，里面有很多参数后面会用到
            with open('./bd_login_cookies.txt', 'w')as f:f.write(str(self.cookies)) # 保存cookies至本地

            self.driver.find_element_by_xpath(
                '//*[@id="app"]/div[2]/div[2]/div[3]/div/div[1]/div/div/div[2]/button').click()  # 点击我的学习空间
            self.Explicit_Waits(self.driver, By.XPATH, '//*[@id="pane-student"]/div/div[1]/div/div')
            self.driver.find_element_by_xpath('//*[@id="pane-student"]/div/div[1]/div/div').click()  # 点击形势与政策
            chapter_list = self.driver.find_elements_by_class_name('chapter-list')
            # pprint(cookies)
            for chapter in chapter_list:  # 遍历每一个专题content
                contents = chapter.find_elements_by_class_name('content')
                for content in contents:
                    print(content)
                    content.find_elements_by_class_name('leaf-title text-ellipsis').click()

        print(len(self.cookies),self.cookies)

获取课程信息 🌟

上一步登录之后，最大的难题就解决了，获取到登录cookie，requests请求带上请求头，之后就可以访问具体的链接，拿到想要的数据了，后面就自由发挥吧，本来想实现一个刷视频的功能，万变不离其宗根本原理其实也是目标性地发送各种各样的post请求，到后面发现请求的参数实在太繁杂了，环环相扣解决了一个又一个，有一个参数始终找不出哪儿来的，就此作罢，乖乖手动刷视频了，以后有机会再弄吧。

    def get_message(self):

        headers = {
            'authority': 'sziit.yuketang.cn',
            'method': 'GET',
            'path': '/mooc-api/v1/lms/user/user-courses/?status=1&page=1&no_page=1&term=latest&uv_id={}'.format(self.cookies[4]['value']),
            'scheme': 'https',
            'accept': 'application/json, text/plain, */*',
            'accept-encoding': 'gzip, deflate, br',
            'accept-language': 'zh-CN,zh;q=0.9',
            'cookie': 'university_id={}; platform_id=3; user_role=3; csrftoken={}; sessionid={}; platform_id=3; university_id={}; user_role=3; sessionid={}'.format(
                self.cookies[4]['value'],self.cookies[1]['value'],self.cookies[2]['value'],self.cookies[4]['value'],self.cookies[2]['value'],
            ),
            'referer': 'https://sziit.yuketang.cn/pro/courselist',
            'sec-fetch-dest': 'empty',
            'sec-fetch-mode': 'cors',
            'sec-fetch-site': 'same-origin',
            'university-id': self.cookies[4]['value'],
            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
            'x-csrftoken': self.cookies[1]['value'],
            'xtbz': 'cloud'}

        # 必要参数的获取
        uv_id = self.cookies[-1]['value']  # university_id
        product_list = requests.get(
            url='https://sziit.yuketang.cn/mooc-api/v1/lms/user/user-courses/?status=1&page=1&no_page=1&term=latest&uv_id={}'.format(uv_id),
            headers=headers)  # 包含所有课程的详细参数信息
        product_list.encoding="utf-8"
        product_list = product_list.json()['data']['product_list']