目录
前言
一、演示
二、流程简述
1.CSDN网站自动登入
2.登入查询接口网站+获取网页数据
3.处理HTML数据
4.完整业务执行程序
三、主程序
四、UI程序
前言
为了方便查看个人资源下载的数据,通过selenium控制浏览器 + HTML网页源代码数据获取 + 数据分析 三个流程来达到目的。
一、演示
二、流程简述
-
进入CSDN网站,登录账号和密码;
-
进入个人查询网站,获取网页数据;
-
分析网页数据,得到需要的数据;
-
将得到的数据,显示在界面;
1.CSDN网站自动登入
通过selenum + 对应浏览器驱动 完成成功登入CSDN网站的操作
代码如下(示例):
# 浏览器启动选项
option= webdriver.EdgeOptions()
#添加启动选项,指定为无界面模式
option.add_argument('--headless')
# option = False
browser=webdriver.Edge(options=option)
# 访问CSDN首页
browser.get(r'https://passport.csdn.net/login')
pson = browser.find_element(by=By.XPATH,value='/html/body/div[2]/div/div[2]/div[2]/div[2]/div/div[1]/span[4]')
pson.click()
time.sleep(0.5)
# 输入用户和密码
browser.find_element(by=By.XPATH,value='/html/body/div/div/div/div[2]/div[2]/div/div[2]/div[1]/div[1]/div/input').send_keys(self.msg[0])
browser.find_element(by=By.XPATH,value='/html/body/div/div/div/div[2]/div[2]/div/div[2]/div[1]/div[2]/div/input').send_keys(self.msg[1])
browser.find_element(by=By.XPATH,value='/html/body/div/div/div/div[2]/div[2]/div/div[2]/div[2]/div/i').click()
# 点击登录
browser.find_element(by=By.XPATH,value='/html/body/div/div/div/div[2]/div[2]/div/div[2]/div[1]/div[4]/button').click()
2.登入查询接口网站+获取网页数据
代码如下(示例):
# 登入查询网址
browser.get('https://download-console-api.csdn.net/v1/user/sources/getUploadListByUserName?status=2&pageNum=1&pageSize=100')
page_sources = browser.page_source
soup = BeautifulSoup(page_sources, 'html.parser')
shuju = soup.get_text()
# shuju = soup
print(shuju)
print(type(shuju))
3.处理HTML数据
代码如下(示例):
try:
# 提取状态码
res = r'code(.*?),'
result = re.findall(res,shuju)
print(result)
data = result[0].split(':')[-1] # 使用空格分割,获取最后一个元素(即数字)
print(data)
if data == "200":
pass
else:
self.finished.emit([2,"状态码返回错误!"])
return -1
# 提取资源标题
res = r'title(.*?),'
result_title = re.findall(res,shuju)
# print(result_title)
result_titles = []
for shujus in result_title:
result_title = shujus.split(':',1)[-1]
result_title = str(result_title).replace('"',"")
result_titles.append(result_title)
print(result_titles)
# 提取资源地址
res = r'sourceUrl(.*?),'
result_ziyuan = re.findall(res,shuju)
# print(result_ziyuan)
result_ziyuans = []
for shujus in result_ziyuan:
result_ziyuan = shujus.split(':',1)[-1]
result_ziyuan = str(result_ziyuan).replace('"',"")
result_ziyuans.append(result_ziyuan)
print(result_ziyuans)
# 提取资源下载次数
res = r'downloadNum(.*?),'
result_donum = re.findall(res,shuju)
# print(result_donum)
result_donums = []
for shujus in result_donum:
result_donum = shujus.split(':',1)[-1]
result_donum = str(result_donum).replace('"',"")
result_donums.append(result_donum)
print(result_donums)
time.sleep(2)
for i in range(0,len(result_titles)):
shuju1 = "资源名称{}-- ".format(i+1) + result_titles[i]
shuju2 = result_ziyuans[i]
shuju3 = "资源下载次数-- " + result_donums[i] +"\n"
shuju4 = "---------------------------------------------\n"
self.finished.emit([1,shuju1])
self.finished.emit([3,shuju2])
self.finished.emit([1,shuju3])
self.finished.emit([5,""])
self.finished.emit([2,shuju4])
self.finished.emit([5,""])
i = 4
self.finished.emit([i,"完成查询!"])
# 关闭浏览器
browser.quit()
except Exception as error:
print(error)
self.finisheds(2,"{}".format(error))
self.finished.emit([4,"查询失败!"])
# 关闭浏览器
browser.quit()
4.完整业务执行程序
import time,os,shutil,sys
from PyQt5.QtCore import QThread,pyqtSignal
from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import re
class Worker(QThread):
finished = pyqtSignal(list)
def __init__(self,msg=None):
super().__init__()
self.msg = msg
self.ret = "True"
def run(self):
self.get_python_path()
#浏览器启动选项
option= webdriver.EdgeOptions()
#添加启动选项,指定为无界面模式
option.add_argument('--headless')
# option = False
browser=webdriver.Edge(options=option)
# 访问CSDN首页
browser.get(r'https://passport.csdn.net/login')
pson = browser.find_element(by=By.XPATH,value='/html/body/div[2]/div/div[2]/div[2]/div[2]/div/div[1]/span[4]')
pson.click()
time.sleep(0.5)
# 输入用户和密码
browser.find_element(by=By.XPATH,value='/html/body/div/div/div/div[2]/div[2]/div/div[2]/div[1]/div[1]/div/input').send_keys(self.msg[0])
browser.find_element(by=By.XPATH,value='/html/body/div/div/div/div[2]/div[2]/div/div[2]/div[1]/div[2]/div/input').send_keys(self.msg[1])
browser.find_element(by=By.XPATH,value='/html/body/div/div/div/div[2]/div[2]/div/div[2]/div[2]/div/i').click()
# 点击登录
browser.find_element(by=By.XPATH,value='/html/body/div/div/div/div[2]/div[2]/div/div[2]/div[1]/div[4]/button').click()
time.sleep(2)
# 登入查询网址
browser.get('https://download-console-api.csdn.net/v1/user/sources/getUploadListByUserName?status=2&pageNum=1&pageSize=100')
page_sources = browser.page_source
soup = BeautifulSoup(page_sources, 'html.parser')
shuju = soup.get_text()
# shuju = soup
print(shuju)
print(type(shuju))
try:
# 提取状态码
res = r'code(.*?),'
result = re.findall(res,shuju)
print(result)
data = result[0].split(':')[-1] # 使用空格分割,获取最后一个元素(即数字)
print(data)
if data == "200":
pass
else:
self.finished.emit([2,"状态码返回错误!"])
return -1
# 提取资源标题
res = r'title(.*?),'
result_title = re.findall(res,shuju)
# print(result_title)
result_titles = []
for shujus in result_title:
result_title = shujus.split(':',1)[-1]
result_title = str(result_title).replace('"',"")
result_titles.append(result_title)
print(result_titles)
# 提取资源地址
res = r'sourceUrl(.*?),'
result_ziyuan = re.findall(res,shuju)
# print(result_ziyuan)
result_ziyuans = []
for shujus in result_ziyuan:
result_ziyuan = shujus.split(':',1)[-1]
result_ziyuan = str(result_ziyuan).replace('"',"")
result_ziyuans.append(result_ziyuan)
print(result_ziyuans)
# 提取资源下载次数
res = r'downloadNum(.*?),'
result_donum = re.findall(res,shuju)
# print(result_donum)
result_donums = []
for shujus in result_donum:
result_donum = shujus.split(':',1)[-1]
result_donum = str(result_donum).replace('"',"")
result_donums.append(result_donum)
print(result_donums)
time.sleep(2)
for i in range(0,len(result_titles)):
shuju1 = "资源名称{}-- ".format(i+1) + result_titles[i]
shuju2 = result_ziyuans[i]
shuju3 = "资源下载次数-- " + result_donums[i] +"\n"
shuju4 = "---------------------------------------------\n"
self.finished.emit([1,shuju1])
self.finished.emit([3,shuju2])
self.finished.emit([1,shuju3])
self.finished.emit([5,""])
self.finished.emit([2,shuju4])
self.finished.emit([5,""])
i = 4
self.finished.emit([i,"完成查询!"])
# 关闭浏览器
browser.quit()
except Exception as error:
print(error)
self.finisheds(2,"{}".format(error))
self.finished.emit([4,"查询失败!"])
# 关闭浏览器
browser.quit()
def finisheds(self,i,h=None):
self.finished.emit([i,h])
# 获取python安装路径
def get_python_path(self):
python_path = sys.prefix
# print(f"Python的安装路径: {python_path}")
python_paths = python_path +"/" + "Scripts\msedgedriver.exe"
driver_path_file = "./浏览器驱动/msedgedriver.exe"
# 判断驱动是否存在,不存在直接复制过去
if os.path.isfile(python_paths):
pass
else:
self.finished.emit([2,"缺少驱动文件...!"])
shutil.copy(driver_path_file, python_paths)
time.sleep(1)
self.finished.emit([1,"驱动文件自动复制成功!"])
三、主程序
import time
from Ui_down import Ui_MainWindow
import sys
from PyQt5.QtGui import QIcon,QKeySequence # 用于添加图标
from PyQt5.QtWidgets import QMainWindow,QApplication,QLineEdit
from PyQt5.QtCore import QUrl
from thread import Worker
'''
1、进入CSDN网站,登录用户和密码
2、进入个人查询网站,获取数据
3、分析数据和显示数据
'''
class show_window(QMainWindow,Ui_MainWindow): # 继承至界面文件的主窗口类
def __init__(self):
super().__init__() # 使用超类,继承父类的属性及方法
self.setupUi(self) # 构造窗体界面
self.setWindowIcon(QIcon("./IMG/icon/icon.jpg"))
self.setWindowTitle("测试使用") # 设置窗体主体
self.initUI() # 构造功能函数
def initUI(self):
self.textBrowser.setOpenLinks(True)
self.textBrowser.setOpenExternalLinks(True)
self.pushButton.clicked.connect(self.start_get_data)
self.lineEdit.setPlaceholderText("手机号/邮箱/用户名")
self.lineEdit_2.setPlaceholderText("密码")
# 隐藏输入显示的密码
self.lineEdit.setEchoMode(QLineEdit.Password)
self.lineEdit_2.setEchoMode(QLineEdit.Password)
self.pushButton.setShortcut(QKeySequence('Enter'))
self.pushButton_2.clicked.connect(self.clear)
def clear(self):
self.textBrowser.clear()
def start_get_data(self):
if self.pushButton.text() == "查询":
# 获取用户名和密码
name = self.lineEdit.text()
paaword = self.lineEdit_2.text()
if name == "" or paaword == "":
self.receive([2,"用户名或者密码未填写。"])
return
data = [name,paaword]
# 创建工作线程的工作对象
self.worker = Worker(msg = data)
# 连接信号与槽
self.worker.finished.connect(self.receive)
self.worker.start()
self.pushButton.setText("查询中")
self.receive([1,"查询中...\n"])
else:
self.receive([2,"正在查询中"])
# 接收信息
def receive(self,text=[]):
if text[0] == 1:
self.textBrowser.append("<font color=\"#0000FF\">{}:{}</font> ".format(self.gettime(),text[1]))
if text[0] == 2:
self.textBrowser.append("<font color=\"#FF0000\">{}:{}</font> ".format(self.gettime(),text[1]))
if text[0] == 3:
# self.textBrowser.append("<a href=\"%s\">超链接测试</a>" % ("完成下载"))
self.textBrowser.append("<a href=\"{}\">{}:{}</a>".format(text[1],self.gettime(),text[1]))
if text[0] == 4:
self.textBrowser.append("<font color=\"#00FF00\">{}:{}</font> ".format(self.gettime(),text[1]))
self.pushButton.setText("查询")
if text[0] == 5:
self.textBrowser.append("<font color=\"#000000\">{}</font> ".format(text[1]))
def gettime(self):
# 获取当前时间
time_show = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
return time_show
if __name__ == "__main__":
app = QApplication(sys.argv)
ui2 = show_window()
ui2.show()
sys.exit(app.exec_())
四、UI程序
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'd:\pythonitem\获取个人CSDN资源下载次数小工具 - 记录\down.ui'
#
# Created by: PyQt5 UI code generator 5.15.11
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again. Do not edit this file unless you know what you are doing.
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(445, 347)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.gridLayout = QtWidgets.QGridLayout(self.centralwidget)
self.gridLayout.setObjectName("gridLayout")
self.pushButton = QtWidgets.QPushButton(self.centralwidget)
self.pushButton.setObjectName("pushButton")
self.gridLayout.addWidget(self.pushButton, 1, 2, 1, 1)
self.label = QtWidgets.QLabel(self.centralwidget)
self.label.setObjectName("label")
self.gridLayout.addWidget(self.label, 0, 0, 1, 1)
self.label_2 = QtWidgets.QLabel(self.centralwidget)
self.label_2.setObjectName("label_2")
self.gridLayout.addWidget(self.label_2, 1, 0, 1, 1)
self.lineEdit = QtWidgets.QLineEdit(self.centralwidget)
self.lineEdit.setObjectName("lineEdit")
self.gridLayout.addWidget(self.lineEdit, 0, 1, 1, 1)
self.lineEdit_2 = QtWidgets.QLineEdit(self.centralwidget)
self.lineEdit_2.setEchoMode(QtWidgets.QLineEdit.Normal)
self.lineEdit_2.setObjectName("lineEdit_2")
self.gridLayout.addWidget(self.lineEdit_2, 1, 1, 1, 1)
self.textBrowser = QtWidgets.QTextBrowser(self.centralwidget)
self.textBrowser.setObjectName("textBrowser")
self.gridLayout.addWidget(self.textBrowser, 2, 0, 1, 3)
self.pushButton_2 = QtWidgets.QPushButton(self.centralwidget)
self.pushButton_2.setObjectName("pushButton_2")
self.gridLayout.addWidget(self.pushButton_2, 3, 2, 1, 1)
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QtWidgets.QMenuBar(MainWindow)
self.menubar.setGeometry(QtCore.QRect(0, 0, 445, 23))
self.menubar.setObjectName("menubar")
MainWindow.setMenuBar(self.menubar)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
self.pushButton.setText(_translate("MainWindow", "查询"))
self.label.setText(_translate("MainWindow", "<html><head/><body><p align=\"center\">手机号:</p></body></html>"))
self.label_2.setText(_translate("MainWindow", "<html><head/><body><p align=\"center\">密码:</p></body></html>"))
self.pushButton_2.setText(_translate("MainWindow", "清除"))