PYTHON爬虫出现Traceback (most recent cal. last):问题

问题遇到的现象和发生背景
问题相关代码,请勿粘贴截图
运行结果及报错内容
我的解答思路和尝试过的方法
我想要达到的结果

```python
import requests, re, time
from lxml import etree
from openpyxl import Workbook

wb = Workbook()
ws = wb.active
ws.append(["年份", "排名", "电影名", "日期", "票房", "详情页url", "id", "类型", "地点","评分", "导演们", "主演们"])

# 伪装成浏览器去请求数据
# headers={'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Mobile Safari/537.36'}
# 出现了这个错误:   raise JSONDecodeError("Expecting value", s, err.value) from None,JSONDecodeError: Expecting value
headers = {
     'Accept': '*/*',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
    'Connection': 'keep-alive',
    'Cookie': '_lxsdk_cuid=172574eb0c1c8-089b1c7aa92fd5-f7d1d38-144000-172574eb0c18d; _lxsdk=B55ECA50A04411EA8E3317C78304F5C137BF739A17864686BAFDCDCF192B28E8; isid=6411AA68003B448DE53F6EFCABD36751; token=azddXq07vt8BZhHqP67X5vusIloAAAAAygoAAKp_5OrZiZgDiGsXdLQ3DGZr6a3OKVmSxuPH1jFcLs709YEdYhzfq8_FU-cPpvRm9A; Hm_lvt_703e94591e87be68cc8da0da7cbd0be2=1590602756,1590602770,1591330351,1591423061; __mta=251575993.1590602756744.1590602770565.1591423063062.3; __mta=217833001.1590602674639.1591421273339.1591445015235.18; _lx_utm=utm_source%3Dgoogle%26utm_medium%3Dorganic',
    'Host': 'piaofang.maoyan.com',
    'Referer': 'https://piaofang.maoyan.com/rankings/year',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'Token': 'azddXq07vt8BZhHqP67X5vusIloAAAAAygoAAKp_5OrZiZgDiGsXdLQ3DGZr6a3OKVmSxuPH1jFcLs709YEdYhzfq8_FU-cPpvRm9A',
    'Uid': 'c9cd1cfb8f95cd68972376145781dd3937103067',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest'
}

# 获取数据并解析

# 拿到的是request url,因为猫眼那个网站地址是ajax请求,更改括号就可拿到不同年份的数据year={}&limit=100&tab={}
temp_url = 'https://piaofang.maoyan.com/rankings/year?token=azddXq07vt8BZhHqP67X5vusIloAAAAAygoAAKp_5OrZiZgDiGsXdLQ3DGZr6a3OKVmSxuPH1jFcLs709YEdYhzfq8_FU-cPpvRm9A&year={}&limit=100&tab={}'
years = [2021, 2020, 2019, 2018, 2017]

for year in years:
    index = years.index(year)
    url = temp_url.format(year, index)  # 生成相应年份的url
    # print(url)
    resp = requests.get(url, headers=headers)
    js = resp.json()  # 返回被解析的对象
    content = js['yearList']  # 获取数据,在f12>network>preview
    # print(content)
    html = etree.HTML(content)  # 解析
    # print(html)   #得到一个<Element html at 0x26a4e5a7d48>
    uls = html.xpath("//ul")
    # print(uls)    #得到多个element

    # 提取需要的数据
    for ul in uls:
        item = {}  # 把字典中每对key和value组成一个元组,并把这些元组放在列表中返回。
        item['year'] = year
        item['paiming'] = ul.xpath("./li[1]/text()")[0]
        item['name'] = ul.xpath("./li[2]/p[1]/text()")[0]
        item['date'] = ul.xpath("./li[2]/p[2]/text()")[0]
        item['pf'] = ul.xpath("./li[3]/text()")[0]
        # print(item)
        # 进去详情页抓取电影具体信息
        # 发现每部电影有其编号:<ul class="row" data-com="hrefTo,href:'/movie/1211270'" data-loaded="true">
        h = re.sub("hrefTo,href:'|'", "", ul.xpath("./@data-com")[0])
        # print(h)   #拿到/movie/1211270
        item['href'] = 'https://piaofang.maoyan.com' + h  # 每部电影详情页的网址https://piaofang.maoyan.com/movie/1211270
        item['id'] = h.split("/")[-1]
        # 到每部电影的详情页爬取其他数据
        resp1 = requests.get(item['href'], headers=headers)
        de_html = etree.HTML(resp1.text)
        # 发现好多爬取失败的原因,改正
        try:
            item['type'] = de_html.xpath("//div[@class='detail-list']/div/p[1]/text()")[0].strip()
        except Exception:
            item['type'] = ''
        except Exception:
            item['type'] = de_html.xpath("//div[@class='info-list']/p[1]/text()")[0].strip()
        except Exception:
            item['type'] = ''
        except:
            continue

        try:
            item['address'] = re.sub("\s|/", "",
                                     de_html.xpath("//div[@class='detail-list']/div/div[1]/div/p/text()")[0])
        except:
            item['address'] = ''

        try:
            item['rating'] = re.sub("\s|/", "",
                                     de_html.xpath("//div[@class='score-detail']/div[1]/span[1]/text()")[0])
        except:
            item['rating'] = ''
        # print(item)
        deUrl = f"https://piaofang.maoyan.com/movie/{item['id']}/moresections?token=azddXq07vt8BZhHqP67X5vusIloAAAAAygoAAKp_5OrZiZgDiGsXdLQ3DGZr6a3OKVmSxuPH1jFcLs709YEdYhzfq8_FU-cPpvRm9A"
        resp2 = requests.get(deUrl, headers=headers)
        js1 = resp2.json()
        de_html2 = etree.HTML(js1['sectionHTMLs']['celebritySection']['html'])

        try:
            item['directors'] = ' '.join([re.sub("\s", "", i) for i in
                                    de_html2.xpath("//div[@class='hc-layout']/div[1]/div[2]/a//text()")]).strip()
        except Exception:
            item['derectors'] = ''
        try:
            item['actors'] = ' '.join([re.sub("\s", "", i).strip() for i in
                                    de_html2.xpath("//div[@class='hc-layout']/div[2]/div[2]/a//text()") if
                                    i.strip() != '']).strip()
        except Exception:
            item['actors'] = ''


        print(item)
        datas = [str(i) for i in item.values()]
        ws.append(datas)
        time.sleep(2)
wb.save(r'数据采集.xlsx')



报错
Traceback (most recent call last):
  File "C:\Users\btod\PycharmProjects\crawl\venv\lib\site-packages\requests\models.py", line 910, in json
    return complexjson.loads(self.text, **kwargs)
  File "C:\anaconda\lib\json\__init__.py", line 348, in loads
    return _default_decoder.decode(s)
  File "C:\anaconda\lib\json\decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "C:\anaconda\lib\json\decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:/Users/btod/PycharmProjects/crawl/maoyan.py", line 99, in <module>
    js1 = resp2.json()
  File "C:\Users\btod\PycharmProjects\crawl\venv\lib\site-packages\requests\models.py", line 917, in json
    raise RequestsJSONDecodeError(e.msg, e.doc, e.pos)
requests.exceptions.JSONDecodeError: [Errno Expecting value] Not Found: 0

Process finished with exit code 1



一般这样好像需要去重新刷新猫眼界面,但经常成功运行爬取了个两百条左右后又会报这个错




import requests, re, time
from lxml import etree
import openpyxl


def run(year, tab):
    url = "https://piaofang.maoyan.com/rankings/year?year={}&limit=100&tab={}".format(year, tab)
    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Cookie': '_lxsdk_cuid=18048322de5c8-0d61a0565efa25-1734337f-1fa400-18048322de5c8; _lxsdk=18048322de5c8-0d61a0565efa25-1734337f-1fa400-18048322de5c8; theme=moviepro; _lxsdk_s=18048322de6-c1c-8e2-b99%7C%7C14',
        'DNT': '1',
        'Pragma': 'no-cache',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-User': '?1',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36',
        'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"'
    }

    response = requests.request("GET", url, headers=headers).text
    dom = etree.HTML(response)
    ranks_list = dom.xpath('//*[@id="ranks-list"]/ul')
    global dataList
    for row in ranks_list:
        item = {}
        # 年份
        item['year'] = year
        # 排名
        item['rank'] = row.xpath('./li[1]//text()')[0]
        # 电影名
        item['name'] = row.xpath('./li[2]/p[1]/text()')[0]
        # 日期
        item['date'] = str(row.xpath('./li[2]/p[2]/text()')[0])[0:10]
        # 票房
        item['pf'] = row.xpath('./li[3]/text()')[0]
        # 平均均价
        item['avg'] = row.xpath('./li[4]/text()')[0]
        # 场均人次
        item['avg_count'] = row.xpath('./li[5]/text()')[0]
        dataList.append(item)
    print(year, len(dataList), dataList)
    time.sleep(1)


def write_excel(dataList, filename):
    label_array = ["年份", "排名", "片名", "日期", "票房(万元)", "平均票价", "场均人次"]
    book = openpyxl.Workbook()
    sheet = book.active
    sheet.append(label_array)
    for item in dataList:
        year = item['year']
        rank = item["rank"]
        name = item["name"]
        date = item["date"]
        pf = item["pf"]
        avg = item["avg"]
        avg_count = item["avg_count"]
        row = [year, rank, name, date, pf, avg, avg_count]
        sheet.append(row)
    book.save(f"{filename}.xlsx")


if __name__ == '__main__':
    dataList = []
    year = 2012
    tab = 12
    for i in range(1, tab):
        run(year, i)
        year += 1
    write_excel(dataList, "猫眼专业版")