Python 爬取学习通考试练习题目

2 min read

起因:临近期末没有题库,只有学习通一个可以考 100次 的考试练习题,所以打算爬取下来看看题得了,毕竟学习通考试太浪费时间了。

1

点击 考试详情 复制网址,将必要参数 courseIdclassIdidCookie 填入代码即可

2

import re
import requests
from bs4 import BeautifulSoup

courseId = ''
classId = ''
id = ''
cookie = ''

url = 'http://mooc1.chaoxing.com/exam-ans/exam/test/reVersionPaperMarkContentNew?courseId=' + courseId + '&classId=' + classId + '&id=' + id
headers = {
    'Cookie' : cookie,
    'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36'
}
data = requests.get(url, headers=headers, verify=False).text

soup = BeautifulSoup(data, 'html.parser')

question_list = soup.find_all('div', class_='TiMu', style='position:relative')

for i in question_list:
    # 获取题目
    question = i.find('div', class_='fl clearfix').text
    # 获取题目选项
    try:
        option = i.find('ul', class_='Cy_ulTop').text
        option = re.sub(r'\s+', '', option)
        option = re.sub(r'([A-Z])', r'\n\1', option)
    except:
        option = ''
    # 获取题目答案
    answer = i.find('div', class_='Py_answer clearfix').text
    answer = re.sub(r'\s+', '', answer)
    answer = re.findall(r'正确答案:(.*?)我的答案', answer, re.S)[0]

    with open('list.txt', 'a', encoding='utf-8') as f:
        f.write("题目:" + question + '\n' + option + '\n' + "答案:" + answer + '\n' + '\n')

3