conn=pymysql.connect(host="localhost",user="uroot",password="000000",port=3306,database='BeiKei')
cursor=conn.cursor()
sql='INSERT INTO zufang(housetitle,housetype,price) values(%s,%s,%s)'
for x in range(1,3):
doc=pq(bw.page_source)
housetitle=doc('.twoline').text()
housetype=doc('.content__list--item--des').text()
price=doc('.content__list--item-price').text()
try:
cursor.execute(sql, (housetitle, housetype, price))
conn.commit()
except:
conn.rollback()
conn.close()
怎么写这个循环才能将数据写入数据库
import time
from selenium import webdriver
from pyquery import PyQuery as pq
import csv
URL="https://beijing.anjuke.com/sale/p"
bw=webdriver.Chrome()
alldata=[['标题','价格','平米价格','户型', '面积', '朝向', '楼层', '修建时间','所处小区','区域地址']]#存储所有数据
for x in range(2):
new_url = URL+str(x)
print(new_url)
# 打开页面
bw.get(new_url)
html=bw.page_source
doc =pq(html)
#time.sleep(10)
for item in doc('.property').items():#获取房产来遍历
pdoc=pq(item.html())
name=list(pdoc('.property-content-title-name').items())[0].text()#获取标题
price=list(pdoc('.property-price-total').items())[0].text()#获取价格
avgprice=list(pdoc('.property-price-average').items())[0].text()#获取平米价格
#获取第一个property-content-info信息,每项房产有2~3个property-content-info信息,所以不能直接获取所有的property-content-info
data=list(pdoc('.property-content-info').items())[0].text().split('\n')#基础信息
addr=list(pdoc('.property-content-info').items())[1].text().split('\n')#地址
if (len(data) == 4):
data.append('')
data.insert(0, name)
data.insert(1, price)
data.insert(2, avgprice)
data.append(addr[0])
data.append(addr[1])
alldata.append(data)#存入数组中,最后再一次写入
bw.close()
#最后再一起写csv,而不是for循环中打开文件写,会覆盖内容
with open('test13.csv','w',newline='')as f:
writer = csv.writer(f)
for data in alldata:
writer.writerow(data)
参考代码:
sess=requests.session()
urls = [f'https://sz.zu.ke.com/zufang/futianqu/pg{i}/' for i in range(1,3)]
dfs=pd.DataFrame()
for url in urls:
bw=sess.get(url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36 Edg/101.0.1210.39'})
doc = pq(bw.text)
# 房子主题
housetitle = [pq(x).text() for x in doc('.twoline')]
# 户型
housetype = [pq(x).text() for x in doc('.content__list--item--des')]
#价格
price = [pq(x).text() for x in doc('.content__list--item-price')]
df=pd.DataFrame({'housetitle':housetitle,'housetype':housetype,'price':price})
dfs=pd.concat([dfs,df],ignore_index=True)
time.sleep(1)
dfs.to_sql('zufang', conn)
您好,我是有问必答小助手,您的问题已经有小伙伴帮您解答,感谢您对有问必答的支持与关注!