爬取类容:推荐知识点中所有的题目
爬取方式:appium模拟操作获取前端数据
入门级简单实现,针对题目和答案是文字内容的没有提取出来
适用场景;数据不多,参数加密,反爬严格等场景
from appium import webdriver
import time
import xlwt
# 定义夜神模拟器的 IP 地址和端口号
from selenium.common.exceptions import NoSuchElementException
desired_caps = {
#移动设备平台
'platformName': 'Android',
#平台OS版本号,写整数位即可
'platformVersion':'7.0.5.8',
#设备的名称--值可以随便写
'deviceName': '夜神模拟器',
#提供被测app的信息-包名,入口信息:
#adb shell dumpsys window | findstr mCurrentFocus
'appPackage': 'me.hxyfj.rk',
'appActivity': 'com.uzmap.pkg.EntranceActivity',
#确保自动化之后不重置app
'noReset': True,
'unicodeKeyboard': True,
'resetKeyboard': True,
#设置session的超时时间,单位秒,默认60s
'newConnabdTineout':6000,
}
# 连接到夜神模拟器
# 初始化driver对象,用于控制手机,启动被测应用
# IP:appium-server所在机器的网络IP地址;port:监听的端口号;path固定/wd/hub
driver = webdriver.Remote('http://localhost:4723/wd/hub', desired_caps)
driver.implicitly_wait(10)
# 获取屏幕宽度
width = driver.get_window_size()['width']
# 获取屏幕高度
height = driver.get_window_size()['height']
anwers = ["A","B","C","D"]
topics = ["序号","题目","A","B","C","D","答案","解析"]
#创建存储的excel
workbook=xlwt.Workbook(encoding='utf-8')
#根据题目类别列表
def get_questions(question_lists,content_name):
#excel每个类别创建一个sheet
booksheet=workbook.add_sheet(content_name)
for i in range(len(topics)):
booksheet.write(0,i,topics[i])
for i in range(len(question_lists)):#len(question_lists)
#题目处理----题目有2种方式
question_singles = ''
myanwers = ""
try:
question_texts = driver.find_elements_by_xpath("//android.view.View[@resource-id='question_list']/android.view.View[@index='"+str(i)+"']/android.view.View[1]/android.widget.TextView")
for question_single in question_texts:
question_singles = question_singles+question_single.get_attribute("text")
if question_singles.strip() == '':
raise NoSuchElementException('NoSuchElementException')
# print("try"+str(i)+question_singles)
except NoSuchElementException as e:
question_texts = driver.find_elements_by_xpath("//android.view.View[@resource-id='question_list']/android.view.View[@index='"+str(i)+"']/android.widget.TextView")
for question_single in question_texts:
question_singles = question_singles+question_single.get_attribute("text")
# print("except"+str(i)+question_singles)
# print(i)
# print(question_singles)
booksheet.write(i+1,0,i+1)
booksheet.write(i+1,1,question_singles)
#答案数据处理
for j in range(len(anwers)):
question_anw = driver.find_element_by_xpath("//android.view.View[@resource-id='question_list']/android.view.View[@index='"+str(i)+"']/android.view.View[@clickable='true']["+str(j+1)+"]/android.widget.Image").text
question_anwts = driver.find_elements_by_xpath("//android.view.View[@resource-id='question_list']/android.view.View[@index='"+str(i)+"']/android.view.View[@clickable='true']["+str(j+1)+"]/android.widget.TextView")
question_anwall = ''
for question_anwt in question_anwts:
question_anwall = question_anwall+question_anwt.get_attribute("text")
# print(anwers[j])
# print(question_anwall)
if question_anw == "ic_exam_answer_true":
# print("答案:"+anwers[j])
myanwers = anwers[j]
#写入A,B,C,D四个选项
booksheet.write(i+1,j+2,question_anwall)
#写入答案
booksheet.write(i+1,6,myanwers)
workbook.save('choice_question.xls')
# driver.find_element_by_xpath("//android.view.View[@resource-id='content-list']/android.view.View[3]").click()
# time.sleep(2)
#外层科目类别
content_lists = driver.find_elements_by_xpath("//android.view.View[@resource-id='content-list']/android.view.View[@clickable='true']")
for i in range(len(content_lists)):
time.sleep(1)
content_name = driver.find_element_by_xpath("//android.view.View[@resource-id='content-list']/android.view.View[@clickable='true']["+str(i+1)+"]/preceding-sibling::android.view.View[1]/android.view.View[2]").text
driver.find_element_by_xpath("//android.view.View[@resource-id='content-list']/android.view.View[@clickable='true']["+str(i+1)+"]").click()
time.sleep(1)
driver.find_element_by_xpath("//android.view.View[@resource-id='mode']/android.view.View[2]").click()
time.sleep(1)
question_lists = driver.find_elements_by_xpath("//android.view.View[@resource-id='question_list']/android.view.View")
get_questions(question_lists,content_name)
driver.keyevent(4)
time.sleep(1)
#向下滑动,显示出底下的类别
if i == 4:
driver.swipe(width * 0.5, height * 0.9, width * 0.5, height * 0.1, 1000)
time.sleep(1)
# 关闭夜神模拟器
driver.quit()
结果: