”结合第三方库Beautiful Soup和requests,将网页(https://m.huiyi8.com/fengjing/zuimei/)中的图片分别爬取保存至本地文件目录“image1/“中。感觉挺简单的,我也按照示例代码改了一下,但是爬到的图片都是这样空白的,不知道为什么。。
import requests
from bs4 import BeautifulSoup
#url="https://sc.chinaz.com/tupian/"
url="https://m.huiyi8.com/fengjing/zuimei/"
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) \
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
resp=requests.get(url=url,headers=headers)
resp.encoding="utf-8"
resp_text=resp.text
'''2.从网页源代码获取子标签里的链接'''
soup=BeautifulSoup(resp_text,'html.parser')
img_tags=soup.find_all("img")
'''3.爬取链接下的图片,并写入文件'''
for ipic in img_tags:
href=ipic.get('src')
img_resp=requests.get(href)
tupian=img_resp.content
tupian_name=href.split('/')[-1]
with open("image2/"+tupian_name,mode='wb')as f:
f.write(tupian)
f.close()
print("下载成功")
resp.close()
`

我结合你的代码,进行了一些修改:
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import urllib3
urllib3.disable_warnings()
# url="https://sc.chinaz.com/tupian/"
url = "https://m.huiyi8.com/fengjing/zuimei/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) \
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
resp = requests.get(url=url, headers=headers, verify=False)
resp.encoding = "utf-8"
resp_text = resp.text
'''2.从网页源代码获取子标签里的链接'''
soup = BeautifulSoup(resp_text, 'html.parser')
img_tags = soup.find_all("img")
for ipic in img_tags:
href = ipic.get('src')
img_resp = requests.get(href, headers=headers, verify=False)
tupian = img_resp.content
tupian_name = href.split('/')[-1]
with open("image2/" + tupian_name, mode='wb') as f:
f.write(tupian)
print("下载成功")
resp.close()
有些注意点:
图片后缀啥的都正常不