爬取链家上多页二手房数据

希望使用python爬取链家上多页二手房数据,但是最终结果是循环写入第一页的数据,求解答

import requests,csv,parsel
url = 'https://tj.lianjia.com/ershoufang/'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
}

response = requests.get(url=url, headers=headers)
print(response.text)
selector = parsel.Selector(response.text)
lis = selector.css('.sellListContent li')

for page in range(1, 101):
    url = f'https://tj.lianjia.com/ershoufang/pg{page}/'
    print(url)
    for li in lis:
        # 标题
        title = li.css('.title a::text').get()
        if title:
        # 地址
            positionInfo = li.css('.positionInfo a::text').getall()
        # 小区
            community = positionInfo[0]
        # 地名
            address = positionInfo[1]
        # 房子基本信息
            houseInfo = li.css('.houseInfo::text').get()
        # 房价
            Price = li.css('.totalPrice span::text').get() + '万'
        # 单价
            unitPrice = li.css('.unitPrice span::text').get().replace('单价', '')
        # 发布信息
            followInfo = li.css('.followInfo::text').get()
            dit = {
                '标题': title,
                '小区': community,
                '地名': address,
                '房子基本信息': houseInfo,
                '房价': Price,
                '单价': unitPrice,
                '发布信息': followInfo,
            }
            print(dit)
        # 创建文件
        f = open('二手房数据.csv', mode='a', encoding='utf-8', newline='')
        csv_writer = csv.DictWriter(f, fieldnames=['标题', '小区', '地名', '房子基本信息', '房价', '单价', '发布信息'])
        # 写入表头
        csv_writer.writeheader()
        csv_writer.writerow(dit)


讲格式修改一下即可
代码成功运行完毕,数据保存结果如下

img

img

代码修改后如下

import requests, csv, parsel

url = 'https://tj.lianjia.com/ershoufang/'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
}
response = requests.get(url=url, headers=headers)
print(response.text)
selector = parsel.Selector(response.text)
lis = selector.css('.sellListContent li')
for page in range(1, 101):
    url = f'https://tj.lianjia.com/ershoufang/pg{page}/'
    print(url)
    for li in lis:
        # 标题
        title = li.css('.title a::text').get()
        if title:
            # 地址
            positionInfo = li.css('.positionInfo a::text').getall()
            # 小区
            community = positionInfo[0]
            # 地名
            address = positionInfo[1]
            # 房子基本信息
            houseInfo = li.css('.houseInfo::text').get()
            # 房价
            Price = li.css('.totalPrice span::text').get() + '万'
            # 单价
            unitPrice = li.css('.unitPrice span::text').get().replace('单价', '')
            # 发布信息
            followInfo = li.css('.followInfo::text').get()
            dit = {
                '标题': title,
                '小区': community,
                '地名': address,
                '房子基本信息': houseInfo,
                '房价': Price,
                '单价': unitPrice,
                '发布信息': followInfo,
            }
            print(dit)
            # 创建文件
            f = open('二手房数据.csv', mode='a', encoding='utf-8', newline='')
            csv_writer = csv.DictWriter(f, fieldnames=['标题', '小区', '地名', '房子基本信息', '房价', '单价', '发布信息'])
            # 写入表头
            csv_writer.writeheader()
            csv_writer.writerow(dit)