1. 背景信息
爬取安居客二手房源信息
URL地址:https://wuhan.anjuke.com/sale/?from=navigation
2. 代码实现
import requests
from lxml import etree
if __name__ == '__main__':
# 1.指定URL
url = "https://wuhan.anjuke.com/sale/?from=navigation"
# 2.UA伪装(模拟浏览器)
headers = {
'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'
}
# 3.发起请求并获取响应内容
page_text = requests.get(url=url,headers=headers).text
# print(page_text)
# 4.使用etree进行数据解析
tree = etree.HTML(page_text)
# 5.存储div标签的对象
div_list = tree.xpath('//section[@class="list"]/div')
# print(div_list)
fp = open('58.txt','w',encoding='utf-8')
for div in div_list:
# 局部解析
title = div.xpath('./a/div[2]/div[1]/div[1]/h3/text()')[0]
print(title)
fp.write(title+'\n')