import urllib.request
from urllib.request import urlretrieve
from bs4 import BeautifulSoup
import re
findname=re.compile(r'<img alt="(.*?)"')
findimg=re.compile(r'<img.*src="/(.*?)"/>')
def main():
url="https://www.ivsky.com/tupian/"
data=askdata(url)
save(data)
def askdata(url):
datalist=[]
html=askurl(url)
soup=BeautifulSoup(html,"html.parser")
for item in soup.find_all('div',class_="il_img"):
data=[]
item=str(item)
name=re.findall(findname,item)
name=str(name)
data.append(name)
img=re.findall(findimg,item)
img=str(img)
data.append(img)
datalist.append(data)
return datalist
def save(data):
for i in range(0,18):
datalist=data[i]
urllib.request.urlretrieve("https:"+datalist[1],datalist[0]+".jpg")
def askurl(url):
head={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36 Edg/86.0.622.61"}
req=urllib.request.Request(headers=head,url=url)
res=urllib.request.urlopen(req)
html=res.read().decode('utf-8')
return html
if __name__ == '__main__':
main()
urllib.request.urlretrieve("https:"+datalist[1],datalist[0]+".jpg") 这句的URL你打印出来看看,可能是这个URL拼装有问题