import requests
from bs4 import BeautifulSoup
from requests import status_codes
for page_number in range(23000,23957):
url = 'http://www.netbian.com/desk/{}.htm'.format(page_number)
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36'}
bian_res = requests.get(url,headers=headers)
bian_res.encoding='gbk'
soup = BeautifulSoup(bian_res.text,'html.parser')
comment_page = soup.find_all('div',class_='pic')
for comment in comment_page:
comment_name = comment.find('a').find('img')['alt']
comment_url = comment.find('a').find('img')['src']
comment_res = requests.get(comment_url)
pic = comment_res.content
with open('D:\\图片\cars\\' + comment_name + '.jpg','wb') as f:
f.write(pic)
print('({})图片已保存'.format(comment_name))
import requests
from bs4 import BeautifulSoup
from requests import status_codes
def geturl(url):
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36'}
bian_res = requests.get(url,headers=headers)
bian_res.encoding='gbk'
soup = BeautifulSoup(bian_res.text,'html.parser')
comment_page = soup.find_all('div',class_='pic')
for comment in comment_page:
comment_name = comment.find('a').find('img')['alt']
comment_url = comment.find('a').find('img')['src']
comment_res = requests.get(comment_url)
pic = comment_res.content
with open('D:\\图片\cars\\' + comment_name + '.jpg','wb') as f:
f.write(pic)
print('({})图片已保存'.format(comment_name))
for page_number in range(23000,23957):
url = 'http://www.netbian.com/desk/{}.htm'.format(page_number)
geturl(url)
使用函数目的就是能提高应用的模块性和代码的重复利用,将获取图片地址与获取并下载图片操作分别写入两个函数中,在主函数中进行调用即可。同时要注意请求页面内容时要设置一个休眠时间,不要过于频繁。修改代码如下:
import requests
from bs4 import BeautifulSoup
from requests import status_codes
import time
def geturl(url):
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36'}
bian_res = requests.get(url,headers=headers)
bian_res.encoding='gbk'
soup = BeautifulSoup(bian_res.text,'html.parser')
comment_page = soup.find_all('div',class_='pic')
imgs={}
for comment in comment_page:
comment_name = comment.find('a').find('img')['alt']
comment_url = comment.find('a').find('img')['src']
imgs[comment_name]=comment_url
return imgs
def get_imgs(name,url):
comment_res = requests.get(url)
pic = comment_res.content
with open('imgs/ab/' + name + '.jpg', 'wb') as f:
f.write(pic)
print('({})图片已保存'.format(name))
def main():
for page_number in range(23000,23003):
url = 'http://www.netbian.com/desk/{}.htm'.format(page_number)
img_dict=geturl(url)
time.sleep(1)
for n,h in img_dict.items():
get_imgs(n,h)
time.sleep(1)
main()
如有帮助,请点击我回答的采纳按钮给予采纳。