爬虫网站图片
代码如下:
import requests
import parsel
url = 'http://www.199it.com/archives/category/199itdata'
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.0'}
res = requests.get(url=url,headers=header)
url_text = res.text
# print(url_text)打印网页源代码文本,查看运行情况
a = parsel.Selector(url_text)
lis = a.xpath('//div[@id="content"]//article')
# print(lis)
for i in lis:
title = i.xpath('.//h2/a/text()').get()
img = i.xpath('.//a/img').get()
# print(title,img)
img_text = requests.get(url=img,headers=header).text
# print(img_text)
data = parsel.Selector(img_text)
img_url = data.xpath('//div[@id="content"]//article//a/img/@src').getall()
# print(img_url)
for l in img_url:
pic = requests.get(url=l, headers=header).content
pic_name = l.split('/')[-1]
# print(pic_name)
with open('img//'+ pic_name,mode='wb')as f:
f.write(pic)
print('保存完成',pic_name)
运行出现如下错误
但是将代码15和21行的url修改后可以爬虫到图片,同时图片损坏无法显示
【相关推荐】
#通过接口进行调用
@app.route('/<path:path>')
def show_predict(path):
X_predict = []
X_predict.append(path)
model, vector = loadModel()
x = vector.transform(X_predict)
y_predict = model.predict(x)
print(y_predict[0])
return "url predict: "+str(y_predict[0])