# coding:utf-8 "爬取58二手房中的房源信息" from lxml import etree import requests if __name__ == '__main__': # 获取源码 url = 'https://hrb.58.com/nangang/ershoufang/' headers = { 'user - agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36' } response = requests.get(url=url,headers=headers) response.encoding = 'utf-8' page_text = response.text # 数据解析 tree = etree.HTML('page_text',etree.HTMLParser()) # 获取整个数据 div_list = tree.xpath('//section[@class="list"]/div') # 创建文件 fp = open('./58二手房.txt','w',encoding='utf-8') # 获取数据 for i in div_list: title_page = i.xpath('./a/div[2]/div/div/h3/text()')[0] print(title_page) fp.write(title_page+'\n')
添加爬取网页信息: