1. 分析地址
打开小破站热榜首页,查看响应找到如下接口地址
2. 编码
定义请求头
拿到标头
复制粘贴,处理成json
处理请求头代码如下:
def format_headers_to_json():
f = open("data.txt", "r", encoding="utf-8") # 读入请求头的文件
f1 = open("json.txt", "w", encoding="utf-8") # 打印json格式的文件
f1.write("headers={\n") # 添加一个最前面的左括号
line = "1"
while line != "":
line = f.readline()
if (line == ""):
break
x = line
line = f.readline()
y = line
x = x.strip(':\n') # 去除前后的:和换行
y = y.strip(':\n') # 去除前后的:和换行
i = 0
le = len(y)
while (i < le):
if (y[i] == "\""):
y = y[:i] + "\\" + y[i:] # 在“处添加一个转移符号
i = i + 1 # 跳过这个”
le = le + 1 # 同时将总长度增加
i = i + 1
f1.write("\"" + x + "\"" + ":\"" + y + "\",\n") # 字符串拼接成json格式
f1.write("}")
定义好请求头
headers = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cookie": "你的cookie",
"Sec-Ch-Ua": "\"Chromium\";v=\"116\", \"Not)A;Brand\";v=\"24\", \"Google Chrome\";v=\"116\"",
"Sec-Ch-Ua-Mobile": "?1",
"Sec-Ch-Ua-Platform": "\"Android\"",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Mobile Safari/537.36"
}
定义url
https://api.小破站.com/x/web-interface/popular?ps=20&pn=1
获取请求响应转json
r = requests.get(url, headers=headers)
json_data = r.json()
order_list = [] # 视频排名
title_list = [] # 视频标题
like_list = [] # 点赞数
view_list = [] # 播放数
video_list = [] # 播放链接
owner_list = [] # 作者
duration_list = [] # 视频长度
pub_location_list = [] # 发布地
order = 1 # 热榜排名初始值
data_list = json_data['data']['list']
循环
for data in data_list:
title = data.get('title', '') # 视频标题
title_list.append(title)
duration = data.get('duration', '') # 视频长度
duration_list.append(duration)
order_list.append(order)
view_list.append(data['stat']['view']) # 播放
like_list.append(data['stat']['like']) # 点赞
owner_list.append(data['owner']['name']) # 作者
video_list.append(data['short_link_v2']) # 播放链接
pub_location_list.append(data.get('pub_location','')) # 发布地
order += 1
写入csv
df = pd.DataFrame(
{
'视频排名': order_list,
'视频标题': title_list,
'点赞数': like_list,
'播放数': view_list,
'播放链接': video_list,
'时长(单位:秒)': duration_list,
'作者': owner_list,
'视频发布地': pub_location_list,
})
df.to_csv('破站热榜.csv', index=False, encoding='utf_8_sig')