本人才开始接触python爬虫,通过同步爬虫获取的图片都能打开,但是异步爬虫获取的图片有些打不开
from bs4 import BeautifulSoup
import asyncio
import aiohttp
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36'
}
def geturl():
hreflist = []
child_url = []
for i in range(0,3):
data={
'append': 'list-home',
'paged': str(i),
'action':'ajax_load_posts',
'query': '',
'page': 'home'
}
resp=requests.post(url=url,headers=headers,data=data)
page=BeautifulSoup(resp.text,'html.parser')
hreflist.append(page.find_all('a',class_='media-content'))
for href in hreflist:
for child_href in href:
child_url.append(child_href.get('href'))
return child_url
def getimgurl(chid_url):
img_url=[]
resp=requests.get(url=chid_url,headers=headers)
page=BeautifulSoup(resp.text,'html.parser')
alist=page.find('div',class_='nc-light-gallery').find_all('img')
for a in alist:
img_url.append('https:'+a.get('src'))
return img_url
#异步
# async def download(img_url):
# img_name=img_url.split('/')[-1]
# async with aiohttp.ClientSession() as session:
# async with session.get(url=img_url,headers=headers) as resp:
# with open('img/'+img_name,mode='wb') as f:
# f.write(await resp.content.read())
# print(img_name,'over!')
#同步
def download(img_url):
img_name = img_url.split('/')[-1]
resp=requests.get(url=img_url,headers=headers)
with open('img/'+img_name,mode='wb') as f:
f.write(resp.content)
print(img_name,'over!')
#异步
# async def main():
# tasks = []
# for child_url in geturl():
# for img_url in getimgurl(child_url):
# tasks.append(asyncio.create_task(download(img_url)))
# await asyncio.wait(tasks)
#同步
def main():
for child_url in geturl():
for img_url in getimgurl(child_url):
download(img_url)
if __name__ == '__main__':
url = 'https://www.vmgirls.com/wp-admin/admin-ajax.php'
# loop = asyncio.get_event_loop()
# loop.run_until_complete(main())
main()
你好,我是有问必答小助手,非常抱歉,本次您提出的有问必答问题,技术专家团超时未为您做出解答
本次提问扣除的有问必答次数,将会以问答VIP体验卡(1次有问必答机会、商城购买实体图书享受95折优惠)的形式为您补发到账户。
因为有问必答VIP体验卡有效期仅有1天,您在需要使用的时候【私信】联系我,我会为您补发。