使用selenium登录雨课堂并获取课程信息
登录 ⌛️
操作逻辑:运行程序 - 微信扫码登录 - 获取登录cookie,下次运行程序时即可跳过扫码直接登录
class Yuketang:
def __init__(self):
self.login_url = "https://sziit.yuketang.cn/pro/portal/home/"
self.dataDic = dict()
self.bar=ProgressBar(widgets=[Percentage(),Bar('#'), ' ', Timer(), ' ', ETA(), ' '])#,term_width=12,FileTransferSpeed(),
def Explicit_Waits(self,driver, way, path):#显式等待
try:
ele = WebDriverWait(driver, 150).until(
EC.presence_of_element_located((way, path)))
return ele
except Exception as e:
print('元素寻找失败: ' + str(e))
# 爬取数据
def login(self):
# 创建浏览器对象
options = CO()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
self.driver = selenium.webdriver.Chrome(options=options) #
try:
print("使用本地保存的cookies...")
self.cookies = eval(open('./bd_login_cookies.txt', 'r').read()) #
print(self.cookies)
for cookie in self.cookies:
if 'expiry' in cookie:
del cookie['expiry']
try:self.driver.add_cookie(cookie)
except:pass
self.driver.refresh()
except Exception as err:
# print(err)
print("正在扫码登录...")
self.Explicit_Waits(self.driver,By.XPATH,'//*[@id="app"]/div[2]/div[2]/div[3]/div/div[1]/div/div/div[2]/button')
self.driver.find_element_by_xpath('//*[@id="app"]/div[2]/div[2]/div[3]/div/div[1]/div/div/div[2]/button').click() # 点击登陆按钮
self.Explicit_Waits(self.driver,By.XPATH,'//*[@id="app"]/div[2]/div[2]/div[3]/div/div[1]/div/div/div[2]/button')
time.sleep(10) # 扫码时间为5秒,扫码不成功则报错,程序停止
self.cookies = self.driver.get_cookies() # 拿到登陆后的cookies,里面有很多参数后面会用到
with open('./bd_login_cookies.txt', 'w')as f:f.write(str(self.cookies)) # 保存cookies至本地
self.driver.find_element_by_xpath(
'//*[@id="app"]/div[2]/div[2]/div[3]/div/div[1]/div/div/div[2]/button').click() # 点击我的学习空间
self.Explicit_Waits(self.driver, By.XPATH, '//*[@id="pane-student"]/div/div[1]/div/div')
self.driver.find_element_by_xpath('//*[@id="pane-student"]/div/div[1]/div/div').click() # 点击形势与政策
chapter_list = self.driver.find_elements_by_class_name('chapter-list')
# pprint(cookies)
for chapter in chapter_list: # 遍历每一个专题content
contents = chapter.find_elements_by_class_name('content')
for content in contents:
print(content)
content.find_elements_by_class_name('leaf-title text-ellipsis').click()
print(len(self.cookies),self.cookies)
获取课程信息 🌟
上一步登录之后,最大的难题就解决了,获取到登录cookie,requests请求带上请求头,之后就可以访问具体的链接,拿到想要的数据了,后面就自由发挥吧,本来想实现一个刷视频的功能,万变不离其宗根本原理其实也是目标性地发送各种各样的post请求,到后面发现请求的参数实在太繁杂了,环环相扣解决了一个又一个,有一个参数始终找不出哪儿来的,就此作罢,乖乖手动刷视频了,以后有机会再弄吧。
def get_message(self):
headers = {
'authority': 'sziit.yuketang.cn',
'method': 'GET',
'path': '/mooc-api/v1/lms/user/user-courses/?status=1&page=1&no_page=1&term=latest&uv_id={}'.format(self.cookies[4]['value']),
'scheme': 'https',
'accept': 'application/json, text/plain, */*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9',
'cookie': 'university_id={}; platform_id=3; user_role=3; csrftoken={}; sessionid={}; platform_id=3; university_id={}; user_role=3; sessionid={}'.format(
self.cookies[4]['value'],self.cookies[1]['value'],self.cookies[2]['value'],self.cookies[4]['value'],self.cookies[2]['value'],
),
'referer': 'https://sziit.yuketang.cn/pro/courselist',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'university-id': self.cookies[4]['value'],
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
'x-csrftoken': self.cookies[1]['value'],
'xtbz': 'cloud'}
# 必要参数的获取
uv_id = self.cookies[-1]['value'] # university_id
product_list = requests.get(
url='https://sziit.yuketang.cn/mooc-api/v1/lms/user/user-courses/?status=1&page=1&no_page=1&term=latest&uv_id={}'.format(uv_id),
headers=headers) # 包含所有课程的详细参数信息
product_list.encoding="utf-8"
product_list = product_list.json()['data']['product_list']