import re
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"}
url = "http://www.aiimg.com/"
res = requests.get(url=url,headers=headers)
response = res.content.decode('gb2312')
print(response)
img_url = re.findall(r'img\ssrc="(.*?)"',response,re.M)
print(img_url)
for photos in img_url:
print(photos)
photo_response = requests.get(url=photos,headers=headers).content.decode('gb2312')
for photo in photo_response:
for numbers in range(1,len(img_url)):
with open("imgs_"+str(numbers)+".jpg","wb") as fp:
fp.write(photo_response)
print("OK")
import requests
import re
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"}
url = "http://www.aiimg.com/"
res = requests.get(url=url,headers=headers)
response = res.content.decode('gb2312')
img_url = re.findall(r'<img[^>]+src=[\'"](http://[^\'"]+)[\'"]+', response, re.M)
#print('get image urls:')
print(img_url)
numbers = 1
for photos in img_url:
print(photos)
photo_response = requests.get(url=photos,headers=headers).content
with open("imgs_"+str(numbers)+".jpg","wb") as fp:
fp.write(photo_response)
numbers += 1
正则没写对,将s改成.*?即可
正则表达式的正确写法应该是,因为img和src之间并不一定是单个空白符,所以需要加上.*?,由于这里也有可能是换行符,所以最后加上re.DOTALL从而表示.(点)也可以用来表示换行符:
img_url = re.findall(r'img.*?src="(.*?)"',response, re.DOTALL)