某二手车数据获取问题

爬虫爬的结果不一样,重复爬取一页的结果好像,想做到下方教程一样,b站的图灵教程,

img

img

import requests
import time
from lxml import etree
headers = {
    'Accept': '*/*',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,ru;q=0.7',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Origin': 'https://changsha.taoche.com',
    'Pragma': 'no-cache',
    'Referer': 'https://changsha.taoche.com/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-site',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
    'sec-ch-ua': '"Not?A_Brand";v="8", "Chromium";v="108", "Google Chrome";v="108"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',

}


def get_city():

    params = {
    'group': '26',
    }

    response = requests.get(
    'https://proconsumer.taoche.com/c-city-consumer/city/get-filter-city-by-group',
    params=params,
    headers=headers,
    ).json()

    #print(response)
    datas=response
    datas=response['data']
    results=[]
    for data in datas:
        cityList=data['cityList']
        for city in cityList:
            result={
                'cityName':city['cityName'],
                'citySpell':city['citySpell']
                    }
            results.append(result)
    return results

def main():
    cities=get_city()
    print(cities)
    for city in cities:
        results=[]
        for page in range(1,3):
            print(city,page)
            params={
                'page':str(page),
            }
            response=requests.get(f'https://{city["citySpell"]}.taoche.com/all/',params=params,headers=headers,)
            #response = requests.get('https://changsha.taoche.com/all/', params=params, headers=headers)


            content=response.text
            html=etree.HTML(content)
            divs=html.xpath('//*[@id="container_base"]/ul/li/div[@class="gongge_main"]')

            for div in divs:
                title=div.xpath('//*[@id="container_base"]/ul/li/div[@class="gongge_main"]/a/span/text()')
                #titles = ''.join(title)
                #buy_car_time=divs.xpath("/p/i[1]/text()")
                #car_km=divs.xpath("/p/i[2]/text()")
                buy_car_time = div.xpath('//*[@id="container_base"]/ul/li/div[@class="gongge_main"]/p/i[1]/text()')
                car_km = div.xpath('//*[@id="container_base"]/ul/li/div[@class="gongge_main"]/p/i[2]/text()')

                price=div.xpath('./div/i[@class="Total brand_col"]/text()')+div.xpath('./div/i[@class="Total brand_col"]/em/text()')
                prices= ''.join(price)
                result={
                    '名称':title ,

                    '时间':buy_car_time,
                    '公里':car_km,
                    '价格': prices,
                    '城市':city["cityName"]
                }
                print(result)
                results.append(result)
                time.sleep(3)


if __name__ == '__main__':
    main()


被反爬了吧,不然就是参数没设置对。代码跟视频里的一样吗

不知道你这个问题是否已经解决, 如果还没有解决的话:

如果你已经解决了该问题, 非常希望你能够分享一下解决方案, 写成博客, 将相关链接放在评论区, 以帮助更多的人 ^-^