import requests,openpyxl
from bs4 import BeautifulSoup
workbook=openpyxl.Workbook()
sheet=workbook.active
sheet.title="湛江房产信息"
title=["小区名称","居室面积","地址","电话","房价"]
sheet.append(title)
def getMsg(page):
#爬取信息
url="https://zj.newhouse.fang.com/house/s/b9"+str(page)+"/"#从第一页到最后一页
header={"Referer":url,
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"}
res=requests.get(url,headers=header)
res.encoding="gb2312"
print(res.text)
#解析界面
soup=BeautifulSoup(res.text,"html.parser")
divs=soup.find_all("div",class_="nlc_details")
for div in divs:
list1=[]
list1.append(div.find_all("div",class_="nlcd_name")[0].a.text.strip())
list1.append("".join(div.find_all("div",class_="house_type clearfix")[0].text.split()))
list1.append("".join(div.find_all("div",class_="address")[0].text.split()))
list1.append("".join(div.find_all("div",class_="tel")[0].text.split()))
list1.append("".join(div.find_all("div",class_="nhouse_price")[0].span.text.split()))
sheet.append(list1)
print("第%d页下载完成"%(page))
for page in range(1,13):
getMsg(page)
print("第%d页正在下载"%(page))
……