python爬虫爬取房源

获取mysql连接

conn=pymysql.connect(host="localhost",user="uroot",password="000000",port=3306,database='BeiKei')
cursor=conn.cursor()
sql='INSERT INTO zufang(housetitle,housetype,price) values(%s,%s,%s)'
for x in range(1,3):
doc=pq(bw.page_source)

房子主题

housetitle=doc('.twoline').text()

print(housetitle)

户型信息

housetype=doc('.content__list--item--des').text()

print(housetype)

价格

price=doc('.content__list--item-price').text()

print(price)

try:
cursor.execute(sql, (housetitle, housetype, price))
conn.commit()

except:
conn.rollback()
conn.close()
怎么写这个循环才能将数据写入数据库

import time

from selenium import webdriver
from pyquery import PyQuery as pq
import csv

URL="https://beijing.anjuke.com/sale/p"
bw=webdriver.Chrome()
alldata=[['标题','价格','平米价格','户型', '面积', '朝向', '楼层', '修建时间','所处小区','区域地址']]#存储所有数据
for x in range(2):
    new_url = URL+str(x)
    print(new_url)
    # 打开页面
    bw.get(new_url)
    html=bw.page_source
    doc =pq(html)
    #time.sleep(10)

    for item in doc('.property').items():#获取房产来遍历
        pdoc=pq(item.html())
        name=list(pdoc('.property-content-title-name').items())[0].text()#获取标题
        price=list(pdoc('.property-price-total').items())[0].text()#获取价格
        avgprice=list(pdoc('.property-price-average').items())[0].text()#获取平米价格
        #获取第一个property-content-info信息,每项房产有2~3个property-content-info信息,所以不能直接获取所有的property-content-info
        data=list(pdoc('.property-content-info').items())[0].text().split('\n')#基础信息
        addr=list(pdoc('.property-content-info').items())[1].text().split('\n')#地址
        if (len(data) == 4):
            data.append('')
        data.insert(0, name)
        data.insert(1, price)
        data.insert(2, avgprice)
        data.append(addr[0])
        data.append(addr[1])
        alldata.append(data)#存入数组中,最后再一次写入
bw.close()


 
#最后再一起写csv,而不是for循环中打开文件写,会覆盖内容
with open('test13.csv','w',newline='')as f:
    writer = csv.writer(f)
    for data in alldata:
        writer.writerow(data)

参考代码:

sess=requests.session()
urls = [f'https://sz.zu.ke.com/zufang/futianqu/pg{i}/' for i in range(1,3)]
dfs=pd.DataFrame()
for url in urls:
    bw=sess.get(url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36 Edg/101.0.1210.39'})
    doc = pq(bw.text)
    # 房子主题
    housetitle = [pq(x).text() for x in doc('.twoline')]
    # 户型
    housetype = [pq(x).text() for x in doc('.content__list--item--des')]
    #价格
    price = [pq(x).text() for x in doc('.content__list--item-price')]
    df=pd.DataFrame({'housetitle':housetitle,'housetype':housetype,'price':price})
    dfs=pd.concat([dfs,df],ignore_index=True)
    time.sleep(1)
dfs.to_sql('zufang', conn)

您好,我是有问必答小助手,您的问题已经有小伙伴帮您解答,感谢您对有问必答的支持与关注!
PS:问答VIP年卡 【限时加赠:IT技术图书免费领】,了解详情>>> https://vip.csdn.net/askvip?utm_source=1146287632