python图片爬取，求大神帮忙看看问题在哪儿

试过好多次，能爬到大概2页多一点，没能爬完指定的页面就显示代码里面的except的内容，然后就停了，好像又没报错，实在不知道要怎么调试，（爬取的文件有点多，是不是需要打包成多线程，百度试过方法，没成功），然后需要把详情页里的一段信息和网址加到图片属性里，一点头绪也没有，门外汉一点点百度学的，请大神们不要嫌弃，求大神们赐教

import traceback

from bs4 import BeautifulSoup
import requests
import os
import lxml
import json
import time
import re

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
}


# 从缩略图网页里爬取整个图片集
def getPic(url):
    print("download pic url +==="+url)
    result = requests.get(url, headers=headers)
    result.encoding = 'utf-8'
    soup = BeautifulSoup(result.content, 'lxml')
    json_data = soup.find('div', attrs={'id': 'gallery-items'})
    name = soup.find('a').string

    simpleName = re.sub(r'[/:*?"<>|\\\\]+', '-', name)
    print(simpleName)
    path = 'f:/CodeWar/spider/Archdaily/'

    newPath = os.path.join(path, simpleName)
    os.makedirs(newPath, exist_ok=True)
    os.chdir(newPath)

    # print(newPath)
    figures = json.loads(json_data.get('data-images'))

    i = 1
    for figure in figures:
        # print(figure['url_large'])
        try:
            print('downloading number:' + str(i)+"====>>"+figure['url_large'])
            image = requests.get(url=figure['url_large'], headers=headers)
            if image.status_code == 200:
                # with open(simpleName + str(i) + '.jpg', 'wb') as f:
                with open(str(i) + '.jpg', 'wb') as f:
                    f.write(image.content)
            i += 1
        except:
            print("figure=======>>ZZzzzz...")
            time.sleep(5)
            print("===================e...")
            continue

# 从主网页获取单个网页的地址
def get_url(page):
    pageResult = requests.get(sourceWeb, headers=headers)
    pageSoup = BeautifulSoup(pageResult.content, 'lxml')

    for collection in pageSoup.find_all('a', class_='afd-title--black-link'):
        if 'href' in collection.attrs:
            sonLink = 'https://www.archdaily.com' + collection.attrs['href']
            sonResponde = requests.get(sonLink, headers=headers)
            sonResponde.encoding = 'utf-8'
            sonSoup = BeautifulSoup(sonResponde.content, 'lxml')
            thumb = sonSoup.find('a', class_='gallery-thumbs-link')
            if thumb:
                thumbLink = 'https://www.archdaily.com' + thumb.attrs['href']
                # print(thumbLink)
                try:
                    getPic(thumbLink)
                except:
                    print("ZZzzzz...")
                    time.sleep(5)
                    print("==xxxxxx=======e...")
                    continue
            # print(url_collections)
        print('—--------creat next folder—--------')

motherWeb = 'https://www.archdaily.com/page/'
n = 0
# 指定需要爬取页数
wanna_page = 10
while n <= wanna_page:
    n += 1
    sourceWeb = motherWeb + str(n)
    try:
        get_url(sourceWeb)
        print('this is page' + str(n))
    except:
        print("Connection refused by the server..")
        print("Let me sleep for 5 seconds")
        print("ZZzzzz...")
        time.sleep(5)
        print("Was a nice sleep, now let me continue...")
        continue

是放在属性的详细信息里吧？修改图片exif信息，把你的user-agent 放作者里了。

from PIL import Image
import piexif
im = Image.open('4.jpg')
exif_dict = piexif.load(im.info["exif"])
exif_dict["0th"][piexif.ImageIFD.Artist] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36".encode()
exif_bytes = piexif.dump(exif_dict)
im.save("4.jpg", exif=exif_bytes)