Python 爬取学习通考试练习题目
2 min read
起因:临近期末没有题库,只有学习通一个可以考 100次
的考试练习题,所以打算爬取下来看看题得了,毕竟学习通考试太浪费时间了。
点击 考试详情
复制网址,将必要参数 courseId
、classId
、id
和 Cookie
填入代码即可
import re
import requests
from bs4 import BeautifulSoup
courseId = ''
classId = ''
id = ''
cookie = ''
url = 'http://mooc1.chaoxing.com/exam-ans/exam/test/reVersionPaperMarkContentNew?courseId=' + courseId + '&classId=' + classId + '&id=' + id
headers = {
'Cookie' : cookie,
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36'
}
data = requests.get(url, headers=headers, verify=False).text
soup = BeautifulSoup(data, 'html.parser')
question_list = soup.find_all('div', class_='TiMu', style='position:relative')
for i in question_list:
# 获取题目
question = i.find('div', class_='fl clearfix').text
# 获取题目选项
try:
option = i.find('ul', class_='Cy_ulTop').text
option = re.sub(r'\s+', '', option)
option = re.sub(r'([A-Z])、', r'\n\1、', option)
except:
option = ''
# 获取题目答案
answer = i.find('div', class_='Py_answer clearfix').text
answer = re.sub(r'\s+', '', answer)
answer = re.findall(r'正确答案:(.*?)我的答案', answer, re.S)[0]
with open('list.txt', 'a', encoding='utf-8') as f:
f.write("题目:" + question + '\n' + option + '\n' + "答案:" + answer + '\n' + '\n')