from selenium import webdriver
from pyquery import PyQuery as pq
import csv
import time
#URL="https://beijing.anjuke.com/sale/p%22 # test2.csv (60页)
URL="https://sjz.anjuke.com/sale/p%22 # test3.csv (40页)
bw = webdriver.Chrome()
alldata = [['标题', '户型', '面积', '朝向', '楼层', '修建时间','区域','小区地址','每平米价格','价格']] # 存储所有的数据,
for x in range(40):
new_url = URL + str(x)
print(new_url)
bw.get(new_url)
html = bw.page_source
doc = pq(html)
time.sleep(5)
for item in doc('.property').items(): # 获取房产的来遍历
pdoc = pq(item.html())
# 获取标题
name = list(pdoc('.property-content-title-name').items())[0].text()
# 获取房子详细信息
data1 = list(pdoc('.property-content-info').items())[0].text().split('\n')
# 基础信息
data = list(pdoc('.property-content-info').items())[0].text().split('\n')
# 地址
addr = list(pdoc('.property-content-info').items())[1].text().split('\n')
# 获取每平米价格
avgprice = list(pdoc('.property-price-average').items())[0].text()
# 总价
price=list(pdoc('.property-price-total').items())[0].text()
# 分割
if (len(data1) == 4):
data.append('')
data.insert(0, name)
data.append(addr[0])
data.append(addr[1])
data.insert(8, avgprice)
data.insert(9, price)
alldata.append(data)
bw.close()
with open('test3.csv', 'w', newline='') as f:
writer = csv.writer(f)
for data in alldata:
writer.writerow(data)