小鹅快速刷题,根据selenium和xpath定位题干,使用模糊匹配fuzzywuzzy库查找题目匹配答案,自动点击,完成后更新题库
- 先导入基本包,准备好题库
from fuzzywuzzy import process
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
#%%
data = pd.read_csv('data.csv', header=None)
Q = data[0].tolist()
A = data[1].tolist()
#%%
# 启动浏览器
# driver = webdriver.Chrome()
driver = webdriver.Edge()
driver.get('答题链接')
- 获取答题的题目和选项路径
def ans(q_i):
_click = False
if q_i <51 or q_i > 70:
_q = driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div/section/section/div[{}]/section/div[1]/div[1]/div/div/div/div/div/p'.format(q_i)).text
_q_class = driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div/section/section/div[{}]/section/div[1]/div[2]'.format(q_i)).text
else:
_q = driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div/section/section/div[{}]/div/div[1]/div[1]/div/div/div/div/div'.format(q_i)).text
_q_class = driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div/section/section/div[{}]/div/div[1]/div[2]'.format(q_i)).text
_a = A[Q.index(_q)] if _q in Q else A[Q.index(process.extractOne(_q, Q)[0])]
print(q_i, _a, _q, _q_class)
print(process.extract(_q, Q))
_q_a = []
if '单选题' in _q_class:
for e in driver.find_elements(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div/section/section/div[{}]/section/div[2]/div/label'.format(q_i)):
_q_a.append(e.text)
if e.text[0] in _a or _a in e.text:
print('click'+e.text)
_click = True
e.click()
print(_q_a)
if '多选题' in _q_class:
for e in driver.find_elements(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div/section/section/div[{}]/section/div[2]/label'.format(q_i)):
_q_a.append(e.text)
for _c in _a:
if _c in e.text:
print('click'+e.text)
_click = True
e.click()
print(_q_a)
if '判断题' in _q_class:
_ = driver.find_elements(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div/section/section/div[{}]/div/div[2]/div'.format(q_i))
if '正确' in _a or '对' in _a or 'T' in _a:
_[0].click()
_click = True
print("T")
else:
_[1].click()
_click = True
print("F")
if '选择题' in _q_class:
for e in driver.find_elements(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div/section/section/div[{}]/section/div[2]/label'.format(q_i)):
_q_a.append(e.text)
for _c in _a:
if _c in e.text:
print('click'+e.text)
_click = True
e.click()
print(_q_a)
if not _click:
print('no click', q_i)
print('=====================================================')
raise Exception('no click')
import time, random
for i in range(1, 73):
try:
ans(i)
time.sleep(random.randint(1, 5))
except Exception as e:
print(e)
- 每次更新题库
def record(q_i):
if q_i <51 or q_i > 70:
_q = driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div[1]/section/div/section[2]/div[{}]/section/div[1]/div[1]/div/div/div/div/div/p'.format(q_i)).text
_q_class = driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div[1]/section/div/section[2]/div[{}]/section/div[1]/div[2]'.format(q_i)).text
else:
_q = driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div[1]/section/div/section[2]/div[{}]/div/div[1]/div[1]/div/div/div/div/div/p'.format(q_i)).text
_q_class = driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div[1]/section/div/section[2]/div[{}]/div/div[1]/div[2]'.format(q_i)).text
_a = A[Q.index(_q)] if _q in Q else A[Q.index(process.extractOne(_q, Q)[0])]
if '单选题' in _q_class:
_ans = driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div[1]/section/div/section[2]/div[{}]/section/div[3]/div/div[2]/div/div[1]/span'.format(q_i))
if '多选题' in _q_class:
_ans = driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div[1]/section/div/section[2]/div[{}]/section/div[3]/div/div[2]/div/div[1]/span'.format(q_i))
if '判断题' in _q_class:
_ans = driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div[1]/section/div/section[2]/div[{}]/div/div[3]/div/div[2]/div/div[1]/span'.format(q_i))
if '选择题' in _q_class:
_ans = driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div[1]/section/div/section[2]/div[{}]/section/div[3]/div/div[2]/div/div[1]/span'.format(q_i))
print(q_i, _q, _ans.text)
print(_a)
return _q, _ans.text
q = []
a = []
for i in range(1, 73):
_q, _a = record(i)
q.append(_q)
a.append(_a)
pd.DataFrame({'q':q, 'a':a}).to_csv('data_record.csv', index=False, header=False)