Python 爬取学习通考试练习题目

Nov 28, 2022 2 min read

起因：临近期末没有题库，只有学习通一个可以考 100次 的考试练习题，所以打算爬取下来看看题得了，毕竟学习通考试太浪费时间了。

点击 考试详情 复制网址，将必要参数 courseId 、classId 、id 和 Cookie 填入代码即可

import re
import requests
from bs4 import BeautifulSoup

courseId = ''
classId = ''
id = ''
cookie = ''

url = 'http://mooc1.chaoxing.com/exam-ans/exam/test/reVersionPaperMarkContentNew?courseId=' + courseId + '&classId=' + classId + '&id=' + id
headers = {
    'Cookie' : cookie,
    'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36'
}
data = requests.get(url, headers=headers, verify=False).text

soup = BeautifulSoup(data, 'html.parser')

question_list = soup.find_all('div', class_='TiMu', style='position:relative')

for i in question_list:
    # 获取题目
    question = i.find('div', class_='fl clearfix').text
    # 获取题目选项
    try:
        option = i.find('ul', class_='Cy_ulTop').text
        option = re.sub(r'\s+', '', option)
        option = re.sub(r'([A-Z])、', r'\n\1、', option)
    except:
        option = ''
    # 获取题目答案
    answer = i.find('div', class_='Py_answer clearfix').text
    answer = re.sub(r'\s+', '', answer)
    answer = re.findall(r'正确答案：(.*?)我的答案', answer, re.S)[0]

    with open('list.txt', 'a', encoding='utf-8') as f:
        f.write("题目：" + question + '\n' + option + '\n' + "答案：" + answer + '\n' + '\n')