爬虫代码
爬虫代码是我调用的数据接口,可能会过一段时间用不了,欢迎大家留言评论,我会不定时更新
import requests
import time
cookies = {
'token': '5549EB98B15E411DA0BD05935C0F225F',
'tfstk': 'g1vopsc0sQ5SwD8TyEWSTmONZ3cA2u6CReedJ9QEgZ7byzeJYB2HbHn59UKF-Bb2-LpRegdhYZ8l9BBJKIwHfH9-V9n5F36CLV3tBwxWV9smPc5ZXrPVVnSUTjCrIVSuVV3tBxhz090oWUUCxtlcAZSPY_5EmiSGx9SzU9ocuMj_Tz8FLmmcYMzz8_SzuxS5v97e89oD0ZsECRNPWpRw343tphbF6WWfq_bw4a-JjKrhi7tAoJyenIfh7zb0LJJc4nKyrJwi9NR1y1Q9uxelKnSHyZLZQ-XVsCtdxUDuUTd2GL6JHVq1ZebR_KCm_oYGEefwUsqUp3Xhltj2QDF1kKbJ8LfqXRfd3dCNUIhjy6BljeJWrk2e7nK9Fs9nSr7BwG6VX3MunO-PYg5_g5RPkJsqvKPQO_SfmNF7C_i_gRW0kmm06H1PcG_tmmVQO_SfmNnmm5d5ais1W',
'acw_tc': '0a472f9217345091456398947e0084937b6ae99590d77140bfd1bf4a248a00',
'Hm_lvt_a19fd7224d30e3c8a6558dcb38c4beed': '1732521967,1733381547,1734087148,1734509413',
'Hm_lpvt_a19fd7224d30e3c8a6558dcb38c4beed': '1734509413',
'HMACCOUNT': '21B2E9F3C431CAF6',
'sensorsdata2015jssdkcross': '%7B%22distinct_id%22%3A%22nr_7ltg9ho59%22%2C%22first_id%22%3A%2218e5b14d40423b7-08d1278a91f1d-26001b51-3686400-18e5b14d4052309%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu.com%2F%22%2C%22%24latest_utm_source%22%3A%22baidu%22%2C%22%24latest_utm_medium%22%3A%22cpc%22%2C%22%24latest_utm_campaign%22%3A%22%E6%96%B0%E6%8A%96SEM%22%2C%22%24latest_utm_term%22%3A%22%E6%96%B0%E6%8A%96%22%7D%2C%22%24device_id%22%3A%2218e5b14d40423b7-08d1278a91f1d-26001b51-3686400-18e5b14d4052309%22%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTkxZjA4MWE4OGI3ZWYtMDkyZTk1MDhlYjZiZjMtMjYwMDExNTEtMzY4NjQwMC0xOTFmMDgxYTg4YzI1MjkiLCIkaWRlbnRpdHlfbG9naW5faWQiOiJucl83bHRnOWhvNTkifQ%3D%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%24identity_login_id%22%2C%22value%22%3A%22nr_7ltg9ho59%22%7D%7D',
'NR_MAIN_SOURCE_RECORD': '{"locationSearch":"","locationHref":"https://xd.newrank.cn/goods/hot/salesRank","referrer":"https://www.baidu.com/","source":30000,"keyword":"seo","firstReferrer":"","firstLocation":"","sourceHref":"https://xd.newrank.cn/goods/hot/salesRank"}',
'auth_n': 'acihS1J+YcZGzUSRFhf1q09q8WdPhLV5Po6LZW6dWxedk67TpkmiwALw2uzOMhVy',
}
headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Connection': 'keep-alive',
# 'Cookie': 'token=5549EB98B15E411DA0BD05935C0F225F; tfstk=g1vopsc0sQ5SwD8TyEWSTmONZ3cA2u6CReedJ9QEgZ7byzeJYB2HbHn59UKF-Bb2-LpRegdhYZ8l9BBJKIwHfH9-V9n5F36CLV3tBwxWV9smPc5ZXrPVVnSUTjCrIVSuVV3tBxhz090oWUUCxtlcAZSPY_5EmiSGx9SzU9ocuMj_Tz8FLmmcYMzz8_SzuxS5v97e89oD0ZsECRNPWpRw343tphbF6WWfq_bw4a-JjKrhi7tAoJyenIfh7zb0LJJc4nKyrJwi9NR1y1Q9uxelKnSHyZLZQ-XVsCtdxUDuUTd2GL6JHVq1ZebR_KCm_oYGEefwUsqUp3Xhltj2QDF1kKbJ8LfqXRfd3dCNUIhjy6BljeJWrk2e7nK9Fs9nSr7BwG6VX3MunO-PYg5_g5RPkJsqvKPQO_SfmNF7C_i_gRW0kmm06H1PcG_tmmVQO_SfmNnmm5d5ais1W; acw_tc=0a472f9217345091456398947e0084937b6ae99590d77140bfd1bf4a248a00; Hm_lvt_a19fd7224d30e3c8a6558dcb38c4beed=1732521967,1733381547,1734087148,1734509413; Hm_lpvt_a19fd7224d30e3c8a6558dcb38c4beed=1734509413; HMACCOUNT=21B2E9F3C431CAF6; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22nr_7ltg9ho59%22%2C%22first_id%22%3A%2218e5b14d40423b7-08d1278a91f1d-26001b51-3686400-18e5b14d4052309%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu.com%2F%22%2C%22%24latest_utm_source%22%3A%22baidu%22%2C%22%24latest_utm_medium%22%3A%22cpc%22%2C%22%24latest_utm_campaign%22%3A%22%E6%96%B0%E6%8A%96SEM%22%2C%22%24latest_utm_term%22%3A%22%E6%96%B0%E6%8A%96%22%7D%2C%22%24device_id%22%3A%2218e5b14d40423b7-08d1278a91f1d-26001b51-3686400-18e5b14d4052309%22%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTkxZjA4MWE4OGI3ZWYtMDkyZTk1MDhlYjZiZjMtMjYwMDExNTEtMzY4NjQwMC0xOTFmMDgxYTg4YzI1MjkiLCIkaWRlbnRpdHlfbG9naW5faWQiOiJucl83bHRnOWhvNTkifQ%3D%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%24identity_login_id%22%2C%22value%22%3A%22nr_7ltg9ho59%22%7D%7D; NR_MAIN_SOURCE_RECORD={"locationSearch":"","locationHref":"https://xd.newrank.cn/goods/hot/salesRank","referrer":"https://www.baidu.com/","source":30000,"keyword":"seo","firstReferrer":"","firstLocation":"","sourceHref":"https://xd.newrank.cn/goods/hot/salesRank"}; auth_n=acihS1J+YcZGzUSRFhf1q09q8WdPhLV5Po6LZW6dWxedk67TpkmiwALw2uzOMhVy',
'Origin': 'https://xd.newrank.cn',
'Referer': 'https://xd.newrank.cn/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
'content-type': 'application/json',
'gw-c-v': '10000',
'n-token': '9116298d52d64bbfb2bafa92267f74f2',
'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
json_data = {
# 'start': 3,
'size': 20,
'rankDate': '2024-12-17',
'rankType': '',
'dateType': '0',
'cate': {
'cate1': '',
'cate2': '',
'cate3': '',
'cate4': '',
},
'source': '',
'roomCount': '',
'awemeCount': '',
'nature': '',
'sort': 'sales_money',
'priceRange': '',
'bigPromotionStart': '',
'bigPromotionEnd': '',
}
results=[]
for start in range(1,2):
json_data['start']=str(start)
time.sleep(2)
response = requests.post(
'https://gw.newrank.cn/api/xd/xdnphb/nr/cloud/douyin/new/rank/hotGoodsSalesRank',
cookies=cookies,
headers=headers,
json=json_data,).json()
res_list = response['data']['list']
# print(res_list)
for data in res_list:
result = {
"商品名称": data['title'],
"商品价格": data['ana_price'],
"所属店铺": data['goods_source'],
"商品类别": data['productTypeV3'],
"商品类目": data['productTypeV2'],
"商品种类": data['productTypeV1'],
"商品销量": data['add_sales'],
"关联直播":data['room_count'],
"关联达人":data['user_count'],
"关联视频":data['aweme_count'],
}
results.append(result)
print(results)
数据分析可视化
import pandas as pd
import numpy as np
import jieba
import time
from pyecharts.charts import Bar,Line,Map,Page,Pie
from pyecharts import options as opts
from pyecharts.globals import SymbolType
data=pd.read_excel('/home/mw/input/douyin9762/抖音近期商品热门商品排行.xlsx')
data.head(20)
data.info()
data.describe()
研究方法
商品类别分析
data["商品类别"].unique()
bar_list=data["商品类别"].value_counts().head(10)
bar_list
# 创建 Bar 实例
bar = Bar()
# 添加 X 轴数据和 Y 轴数据
bar.add_xaxis(bar_list.index.tolist())
bar.add_yaxis("商品个数", bar_list.values.tolist())
# 设置全局配置项
bar.set_global_opts(
title_opts=opts.TitleOpts(title="男女个数", subtitle="数量"),
xaxis_opts=opts.AxisOpts(name="商品类别"),
yaxis_opts=opts.AxisOpts(name="个数"),
)
# 渲染图表到本地 HTML 文件
bar.render_notebook()
通过代码欢迎数据可视化分析可以知道,目前是2024年底近期也是冬天卖羽绒服的是比较赚钱的,也是符合当下的产品,其次买的东西都是衣服方面比较多。
# 商品价格分析
data1["商品价格"]=data1["商品价格"].astype(int)
data1.info()
data1["price_cut"]=pd.cut(data1["商品价格"],bins=[0,500,1000,1500,2000,3000,5000])
data1.head(20)
data1.price_cut.cat.categori
con_data2=data1.copy()
con_data2["price_cut"]=pd.qcut(con_data2["商品价格"],6,labels=['实惠入门', '经济优选', '亲民进阶', '价值之选','舒适尊享','旗舰实惠'])
con_data2.head(10)
pie_list=con_data2.price_cut.value_counts()
pie_list
# 绘制饼图
pie = Pie()
pie.add(
series_name="访问来源",
data_pair=[
list(z) for z in zip(
pie_list.index.tolist(),
pie_list.values.tolist(),
)
],
radius=["40%", "70%"], # 设置内外半径
)
pie.set_global_opts(title_opts=opts.TitleOpts(title='抖音近期热卖价格状况表现'),
legend_opts=opts.LegendOpts(orient='vertical', pos_top='15%', pos_left='2%'))
pie.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%"))
pie.set_colors(['#EF9050', '#3B7BA9', '#6FB27C','#CC0033','#003399','#800080'])
pie.render_notebook()
通过数据分析可以知道大部分的羽绒服都是在0-500指间的价格区间,占比在将近百分之20,其次是500-1000之间,对于普通人来说基本就是这个几个区间了,当然因为我拿的数据只有500条,所以可能还是会有误差在这个地方。
con_data2["商品类目"].unique()
bar2_list=con_data2["商品类目"].value_counts().head(10)
bar2_list
# 创建 Bar 实例
line = Line()
# 添加 X 轴数据和 Y 轴数据
line.add_xaxis(bar2_list.index.tolist())
line.add_yaxis("商品类目个数", bar2_list.values.tolist())
# 设置全局配置项
line.set_global_opts(
title_opts=opts.TitleOpts(title="商品类目个数", subtitle="数量"),
xaxis_opts=opts.AxisOpts(name="商品类目"),
yaxis_opts=opts.AxisOpts(name="商品个数"),
)
# 渲染图表到本地 HTML 文件
line.render_notebook()
在抖音小店里面目前女装产品是最多的,比较适合卖衣服这个季节,其次是护肤品这些。
def get_cut_words(content_series):
# 读入停用词表
stop_words = []
# 添加关键词
my_words = ['中长款', '连帽长', '防滑软底', '2024新款']
for i in my_words:
jieba.add_word(i)
# 分词
word_num = jieba.lcut(content_series.str.cat(sep='。'), cut_all=False)
# 条件筛选
word_num_selected = [i for i in word_num if i not in stop_words and len(i)>=2]
return word_num_selected
import jieba
text = get_cut_words(content_series=data["商品名称"])
text[:20]
!pip install stylecloud
import stylecloud
from pathlib import Path
from IPython.display import Image # 用于在jupyter lab中显示本地图片
# 绘制词云图
stylecloud.gen_stylecloud(
text=' '.join(text),
collocations=False,
font_path=(r'/home/mw/input/simhei4936/SimHei.ttf'),#2-1字体的Path路径,
icon_name='fas fa-heart',
size = 578,
output_name='抖音商品标题关键字数据可视化.png'
)
Image(filename='抖音商品标题关键字数据可视化.png')