关于#python#的问题:但通过异步爬虫抓图时,有些图片会打不开

本人才开始接触python爬虫,通过同步爬虫获取的图片都能打开,但是异步爬虫获取的图片有些打不开

from bs4 import BeautifulSoup
import asyncio
import aiohttp

headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36'
    }
def geturl():
    hreflist = []
    child_url = []
    for i in range(0,3):
        data={
            'append': 'list-home',
            'paged': str(i),
            'action':'ajax_load_posts',
            'query': '',
            'page': 'home'
            }
        resp=requests.post(url=url,headers=headers,data=data)
        page=BeautifulSoup(resp.text,'html.parser')
        hreflist.append(page.find_all('a',class_='media-content'))
    for href in hreflist:
        for child_href in href:
            child_url.append(child_href.get('href'))
    return child_url

def getimgurl(chid_url):
    img_url=[]
    resp=requests.get(url=chid_url,headers=headers)
    page=BeautifulSoup(resp.text,'html.parser')
    alist=page.find('div',class_='nc-light-gallery').find_all('img')
    for a in alist:
        img_url.append('https:'+a.get('src'))
    return img_url

#异步
# async def download(img_url):
#     img_name=img_url.split('/')[-1]
#     async with aiohttp.ClientSession() as session:
#         async with session.get(url=img_url,headers=headers) as resp:
#             with open('img/'+img_name,mode='wb') as f:
#                 f.write(await resp.content.read())
#     print(img_name,'over!')

#同步
def download(img_url):
    img_name = img_url.split('/')[-1]
    resp=requests.get(url=img_url,headers=headers)
    with open('img/'+img_name,mode='wb') as f:
        f.write(resp.content)
    print(img_name,'over!')

#异步
# async def main():
#     tasks = []
#     for child_url in geturl():
#         for img_url in getimgurl(child_url):
#             tasks.append(asyncio.create_task(download(img_url)))
#     await asyncio.wait(tasks)

#同步
def main():
    for child_url in geturl():
        for img_url in getimgurl(child_url):
            download(img_url)

if __name__ == '__main__':
    url = 'https://www.vmgirls.com/wp-admin/admin-ajax.php'
    # loop = asyncio.get_event_loop()
    # loop.run_until_complete(main())
    main()



你好,我是有问必答小助手,非常抱歉,本次您提出的有问必答问题,技术专家团超时未为您做出解答

本次提问扣除的有问必答次数,将会以问答VIP体验卡(1次有问必答机会、商城购买实体图书享受95折优惠)的形式为您补发到账户。

​​​​因为有问必答VIP体验卡有效期仅有1天,您在需要使用的时候【私信】联系我,我会为您补发。