本案例用到列表,函数,字符串等知识点,知识点参考链接如下:
python基础知识(一)&输入输出函数
python基础知识(二)&基本命令
python基础知识(三)&常用的内置函数
python基础知识(四)&符串常用的方法
python基础知识(六)&字典
python基础知识(七)& 列表
python基础知识(八)&open函数
python基础知识(九)&函数
完整代码如下:
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import pandas as pd
import csv
def qingqiu(url):
rq = requests.get(url, headers=headers)
html = rq.text
return html
def paser_html(html):
bs=BeautifulSoup(html,"lxml")
price_all=[]
price=bs.select('span[class=""]')
for p in price:
price_all.append(float(p.get_text()))
danjia_all=[]
danjia=bs.select('div[class="unitPrice"]')
for d in danjia:
danjia_all.append(d.get_text().replace("元/平",""))
loupan_all=[]
loupan=bs.select('a[data-el="region"]')
for l in loupan:
loupan_all.append(l.get_text())
all=bs.select('div[class="houseInfo"]')
all_data=[]
for a in all:
all_data.append(a.get_text().split('|'))
huxing=[]
size=[]
fangxiang=[]
zhuangxiu=[]
louceng=[]
for i in all_data:
huxing.append(i[0])
size.append(float(i[1].replace("平米","")))
fangxiang.append(i[2])
zhuangxiu.append(i[3])
louceng.append(i[4])
result=zip(loupan_all,danjia_all,price_all,huxing,size,fangxiang,zhuangxiu,louceng)
return result
def save(result): # 保存
with open('长沙二手房.csv', "a", newline='', encoding='utf-8') as f:
wr = csv.writer(f)
for a in result:
wr.writerow(a)
if __name__ == '__main__':
T_head = ['楼盘', '单价', '总价', '户型', '面积', '方向', '装修','楼层']
with open('长沙二手房.csv', "a", newline='', encoding='utf-8') as f:
wr = csv.writer(f)
wr.writerow(T_head)
start_ye = int(input("请输入起始页:"))
end_ye = int(input("请输入终止页:"))
for y in range(start_ye, end_ye):
url='https://cs.lianjia.com/ershoufang/pg{}/'.format(y)
headers = {'User-Agent':'Mozilla/5.0'}
html=qingqiu(url)
result=paser_html(html)
save(result)
print("爬取第{}页".format(y))
运行后保存的数据如下表所示: