我们要获取每个地区的kfc信息就要先获取中国一共有哪些地区
中国所有城市名称获取
import requests
from lxml import etree
with open(f'./省份.txt', 'w') as fp:
fp.write('')
with open(f'./城市.txt', 'w') as fp:
fp.write('')
url1='http://www.kfc.com.cn/kfccda/storelist/index.aspx'#页面网址
city=[]
res1=requests.get(url1).text
tre=etree.HTML(res1)
sheng=tre.xpath('//*[@id="container"]/div[1]/div[2]/div/div[1]/div[1]/div/div[3]/ul/li/strong/text()')
#得到页面源码
n=len(sheng)
for i in range(1,n+1):
shi=tre.xpath(f'//*[@id="container"]/div[1]/div[2]/div/div[1]/div[1]/div/div[3]/ul/li[{i}]/div/a/text()')
#使用xpath对信息获取
for s in shi:
city.append(sheng[i-1]+'-'+s)
print(city)
for i in city:
with open(f'./城市.txt', 'a') as fp:
fp.write(i+'\n')
for i in sheng:
with open(f'./省份.txt', 'a') as fp:
fp.write(i+'\n')
现在我们已经有了每个城市的名字我们只需要将他们替换到请求接口的data中即可获取每个地区的kfc门店基本信息
每个地区门店基本信息获取
import shutil
import time
import requests
import os
from multiprocessing.dummy import Pool
def RemoveDir(): #用来删除文件夹中的所有内容
try:
shutil.rmtree('./city')#删除文件夹
except:
pass
os.mkdir('./city')#创建文件夹
amelist = []
file = open('./省份.txt', "r", encoding="GBK")
file = file.readlines() #读取所有文件按行
for line in file:
line = line.strip('\n')#删除掉line里的\n
amelist.append(line)#将line添加到列表amelist中
for i in amelist:
filepath=f'./city/{i}'
os.mkdir(filepath)#创建名字为filepath的文件夹
def zhu(name):
s=str(name).split('-')#将name以-切片
print(1)
data={
'cname': f'{s[1]}',
'pid': '',
'pageIndex': '1',
'pageSize': '900'
}
res=requests.post(url,data=data).json()#post方法获取数据url为网址data为负载.json为以json格式获取数据
print(res)
if str(res['Table'][0]['rowcount'])!='0':#如果res['Table'][0]['rowcount']==0说明这个城市没有店不在进行下面操作
fort=[]#保存店铺的名称
for i in res['Table1']:
if str(i['addressDetail']) not in fort:#如果当前店铺名称已经保存过说明重复不在进行以下程序
with open(f'./city/{s[0]}/{s[1]}.txt', 'a+') as fp:
print(str(i['storeName'])+'%'+str(i['addressDetail'])+'%'+str(i['pro']))
fp.write(str(i['storeName'])+'%'+str(i['addressDetail'])+'%'+str(i['pro'])+'\n')
fort.append(str(i['addressDetail']))
with open(f'./city/{s[0]}/{s[1]}.txt', 'a+') as fp:#以a+打开文件追加模式
fp.write(str(len(fort)))
time.sleep(0.5)
RemoveDir()
number=[]
url='http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname'
namelist=[]
file = open('./城市.txt', "r", encoding="GBK")#r代表以只读模式打开文件 encoding="GBK"以GBK格式读取文件
file = file.readlines()
for line in file:
line = line.strip('\n')
namelist.append(line)
print(namelist)
pool = Pool(3) #开启线程池
# 定义循环数
origin_num = [x for x in namelist] #每一个货物开启一个线程
# 利用map让线程池中的所有线程‘同时’执行calc_power2函数
#zhu为函数zhu origin_num为任务列表
pool.map(zhu, origin_num)