刚学Python,爬网站图片的时候代码报“”label empty or too long“”错误

下面是代码,刚开始的时候能爬一点图片,过几秒就报错,不知道是怎么回事,求大佬指点!

import requests
import re
for i in range(0,200):
    base_url = ("https://www.buxiuse.com/?page={}".format(i))
    headers = {
        'headers':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36'
    }
    response = requests.get(base_url,headers=headers)
    page_urls = re.findall('<div class=".*?"> <span class=".*?"> <a href="(.*?)" class=".*?">',response.text)
    for urls in page_urls:
        response = requests.get(urls,headers=headers)
        jpg_urls = re.findall('<img src="(.*?)" width=".*?" referrerpolicy=".*?">',response.text)
        for jpg in jpg_urls:
            jpg_name = jpg.split('/')[-1]
            response = requests.get(jpg,headers=headers)
            with open('IMG\\' + jpg_name,'wb') as f:
                f.write(response.content)
                print("下载完成=======>",jpg_name)

可以用下面代码试试,但是涉及到这样io操作的,建议还是用多线程

# -*- coding:utf-8 -*-
import requests
import re
import os
import time
from urllib.request import urlretrieve

for i in range(0,200):
    base_url = ("https://www.buxiuse.com/?page={}".format(i))
    headers = {
        'headers':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36'
    }
    response = requests.get(base_url,headers=headers)
    regx = re.compile(r'<img class="height_min" title=".*?" alt=".*?".*?src="(.*?)"')
    image_urls = re.findall(regx,response.text)
    if not os.path.exists('image'):
        os.mkdir('image')

    for item in image_urls:
        name = item.rsplit('/',1)[1]
        time.sleep(0.5)
        urlretrieve(item,"image/%s" %name)

多线程代码(利用生产者和消费者模式实现):

# -*- coding:utf-8 -*-
import requests
import re
import os
import time
from urllib.request import urlretrieve
from queue import Queue
import threading



def Productor(q,urls):
    for i in range(0, 200):
        base_url = ("https://www.buxiuse.com/?page={}".format(i))
        urls.append(base_url)
        headers = {
            'headers': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36'
        }
        response = requests.get(base_url, headers=headers)
        regx = re.compile(r'<img class="height_min" title=".*?" alt=".*?".*?src="(.*?)"')
        image_urls = re.findall(regx, response.text)
        for item in image_urls:
            q.put(item)

def Consumer(q,urls):
    if not os.path.exists('image'):
        os.mkdir('image')
    if len(urls) == 200 and q.empty():
        exit('爬取完成')
    else:
        while True:
            image_url = q.get()
            if image_url:
                name = image_url.rsplit('/', 1)[1]
                time.sleep(0.2)
                urlretrieve(image_url, "image/%s" % name)


if __name__ == '__main__':
    q = Queue(maxsize=1000)
    urls = []
    for i in range(4):
        p = threading.Thread(target=Productor,args=(q,urls))
        p.start()
    for x in range(5):
        c = threading.Thread(target=Consumer,args=(q,urls))
        c.start()