调试一下代码,报错与库有关,还是数据类型方面

没看到报错说了什么,报错出现了库文件,是使用出了问题,不是网站问题

img

img

import self as self
from gevent import monkey

# 猴子补丁

monkey.patch_all()

from gevent.pool import Pool

from queue import Queue

import requests

import json

from lxml import etree


class RedBookSpider():


    """小红书爬虫"""


    def __init__(self, pages):


        """初始化"""

        self.url = 'https://www.xiaohongshu.com/web_api/sns/v2/trending/page/brand?page={}&page_size=20'

        self.headers = {

            "User-Agent": "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Mobile Safari/537.36"

        }

        self.url_queue = Queue()

        self.pool = Pool(5)

        self.pages = pages

        pass


    def get_url(self):


        """获取url"""

        for page in range(1, self.pages):

            url = self.url.format(page)

            self.url_queue.put(url)


    def save_data(self, items):


        """数据保存"""

        with open('./redbook.txt', 'a', encoding='utf-8') as f:

            f.write(str(items) + '\n')


    def deal_detail(self, detail_url, items, data):


        """详情页内容提取"""

        resp = requests.get(url=detail_url, headers=self.headers)

        eroot = etree.HTML(resp.text)

        items['fans'] = eroot.xpath('//div[@data-v-64bff0ce]/div[@class="extra"]/text()')

        items['articles'] = eroot.xpath('//div/span[@class="stats"]/text()')

        items['introduce'] = eroot.xpath('//div[@class="desc"]/div[@class="content"]/text()')

        items['detail_url'] = detail_url

        items['image'] = data['page_info']['banner']

        print(items)

        self.save_data(items)


    def deal_response(self, resp):


        """数据提取"""

        dict_data = json.loads(resp.text)

        dict_data = dict_data['data']

        for data in dict_data:

            items = {}

            items['name'] = data['page_info']['name']

            detail_url = 'https://www.xiaohongshu.com/page/brands/' + data['page_id']

            self.deal_detail(detail_url, items, data)


    def execute_task(self):


     

img

这个错误是

    dict_data = json.loads(resp.text)

中resp.text内容不是标准的json数据, json.loads()无法解析
你输出resp.text内容看看是不是标准的json数据