爬虫案例—雪球网行情中心板块数据抓取
雪球网行情中心网址:https://xueqiu.com/hq
目标:市场一览板块、热股榜板块、新股预告板块、关注排行榜板块
import datetime
import requests
headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
}
# 实例化session对象,进行会话保持
session = requests.Session()
url = 'https://xueqiu.com'
session.get(url, headers=headers)
# 时间戳转成日期的函数
def timestamp_to_date(timestamp_data):
timestamp = timestamp_data
# 使用datetime.fromtimestamp()方法将时间戳转换为datetime对象
datetime_obj = datetime.datetime.fromtimestamp(timestamp)
# 使用strftime()方法将datetime对象格式化为字符串表示的日期
formatted_date = datetime_obj.strftime("%Y-%m-%d")
return formatted_date
# 获取四大板块的指数
def get_four_index():
url = 'https://stock.xueqiu.com/v5/stock/batch/quote.json?symbol=SH000001,SZ399001,SZ399006,SH000688'
res = session.get(url, headers=headers)
items_lst = res.json()['data']['items']
print('市 场 一 览:')
print()
print('板块名称\t指 数\t涨跌幅\t\t\t总市值')
for item in items_lst:
data_dic = item['quote']
print(data_dic['name'], data_dic['current'], str(data_dic['chg']) + '(' + str(data_dic['percent']) + ')\t\t',
f"{data_dic['market_capital'] / 1000000000000: >.2f}万亿")
print('- ' * 50)
def get_stock(url_dict):
for stock_type in url_lst_dict.keys():
res = session.get(url_dict[stock_type], headers=headers)
res.encoding = res.apparent_encoding
stock_data = res.json()
print(f'热股榜——{stock_type}:\n')
print('股票代码\t\t', '股票名称\t\t\t\t\t', '股票涨跌幅')
for stock in stock_data['data']['items']:
print(f'{stock["code"]:8}\t', f'{stock["name"]:<25}', f'{stock["percent"]:>8}')
print('- ' * 30)
# 定义获取新股json函数
def get_json(new_stock_url):
res = session.get(new_stock_url, headers=headers)
new_stock = res.json()
return new_stock
# 定义获取港股新股函数
def hk_new_stokc(hk_new_stock_url):
new_stock = get_json(hk_new_stock_url)
print('- ' * 40)
print('港股')
print('新股代码\t', '新股名称\t', '上市日期\t', '招股价下限\t', '招股价上限')
new_stock_info = new_stock['data']['items']
for new_stock_item in new_stock_info:
# 上市日期时间戳
list_timestamp = new_stock_item['list_date'] / 1000
print(new_stock_item['symbol'], '\t', new_stock_item['name'], '\t', timestamp_to_date(list_timestamp), '\t',
new_stock_item['issprice_min'], '\t', new_stock_item['issprice_max'])
# 定义沪深新股抓取函数
def hs_new_stock(hs_new_stock_url):
new_stock = get_json(hs_new_stock_url)
print('新 股 预 告')
print('- ' * 40)
print('沪深')
print('新股发行数量:', new_stock['data']['count'])
print('新股代码\t', '新股名称\t', '申购代码\t', '预计发行量(万股)\t', '申购上限(万股)\t', '申购日期\t', '中签号公布日')
new_stock_info = new_stock['data']['items']
for new_stock_item in new_stock_info:
# 申购日期时间戳
distr_timestamp = new_stock_item['onl_distr_date'] / 1000
# 公布中签日期时间戳
draw_timestamp = new_stock_item['onl_lotwiner_stpub_date'] / 1000
print(new_stock_item['symbol'], new_stock_item['name'], '\t', new_stock_item['onl_subcode'], '\t',
new_stock_item['actissqty'], '\t\t\t', new_stock_item['onl_sub_maxqty'], '\t\t',
timestamp_to_date(distr_timestamp), '\t', timestamp_to_date(draw_timestamp))
# 定义美股新股抓取函数
def un_new_stock(un_new_stock_url):
new_stock = get_json(un_new_stock_url)
print('- ' * 50)
print('美股')
print('新股发行数量:', new_stock['data']['count'])
print('新股代码\t', '新股名称\t', '上市日期\t', '\t股本', '\t招股价下限\t', '招股价上限')
new_stock_info = new_stock['data']['items']
for new_stock_item in new_stock_info:
# 上市日期时间戳
list_timestamp = new_stock_item['list_date'] / 1000
if new_stock_item['shares']:
new_shares = '\t' + str(new_stock_item['shares'] / 10000) + '万'
else:
new_shares = '\t\t-\t'
if new_stock_item['issprice_min']:
new_min = '\t' + str(new_stock_item['issprice_min']) + '\t'
if new_stock_item['issprice_max']:
new_max = '\t' + str(new_stock_item['issprice_max']) + '\t'
else:
new_max = '\t-'
new_min = '\t-\t'
print(new_stock_item['symbol'], '\t', new_stock_item['name'][:8], '\t', timestamp_to_date(list_timestamp),
new_shares, new_min, new_max)
# 本周排行榜, 本周新增股票,最热门股票
def get_new_add_stock(new_add_url):
new_add_stock = get_json(new_add_url)
print('- ' * 50)
print('关注排行榜——本周新增')
new_list = new_add_stock['data']['list']
print('股票名称\t\t股 价\t\t关 注')
for add_stock in new_list:
print(f"{add_stock['name']}\t\t{add_stock['current']}\t\t{int(add_stock['follow7d']):<}")
# 本周排行榜,最热门股票
def get_hot_stock(new_hot_url):
hot_stock = get_json(new_hot_url)
print('- ' * 50)
print('关注排行榜——最热门')
hot_lst = hot_stock['data']['list']
print('股票名称\t\t股 价\t\t关 注')
for hot_stock in hot_lst:
print(f"{hot_stock['name']}\t\t{hot_stock['current']}\t\t{int(hot_stock['follow']):<}")
if __name__ == '__main__':
# 四大板块信息
get_four_index()
# 热门股票
url_lst_dict = {'沪深': 'https://stock.xueqiu.com/v5/stock/hot_stock/list.json?page=1&size=9&_type=12&type=12',
'港股': 'https://stock.xueqiu.com/v5/stock/hot_stock/list.json?page=1&size=9&_type=13&type=13',
'美股': 'https://stock.xueqiu.com/v5/stock/hot_stock/list.json?page=1&size=9&_type=11&type=11'}
get_stock(url_lst_dict)
print()
# 沪深新股网址
hs_new_stock_url = 'https://stock.xueqiu.com/v5/stock/preipo/cn/query.json?type=subscribe&order_by=onl_subbeg_date&order=asc&source=new_subscribe&page=1&size=10'
hs_new_stock(hs_new_stock_url)
# 港股新股网址
hk_new_stock_url = 'https://stock.xueqiu.com/v5/stock/preipo/hk/query.json?order=desc&order_by=list_date&type=unlisted&page=1&size=10'
hk_new_stokc(hk_new_stock_url)
# 美股新股网址
un_new_stock_url = 'https://stock.xueqiu.com/v5/stock/preipo/us/list.json?order=desc&order_by=list_date&type=unlisted&page=1&size=10'
un_new_stock(un_new_stock_url)
# 关注排行榜,本周新增
new_add_stock_url = 'https://stock.xueqiu.com/v5/stock/screener/screen.json?page=1&only_count=0&size=10&category=CN&order_by=follow7d&order=desc'
get_new_add_stock(new_add_stock_url)
# 最热门股票
hot_stock_url = 'https://stock.xueqiu.com/v5/stock/screener/screen.json?page=1&only_count=0&size=10&category=CN&order_by=follow&order=desc'
get_hot_stock(hot_stock_url)
运行结果如下: