亲们,为什么requests.text爬出来的网页只有一半,写入文件也是到一半多就没了
import requests
import re
from urllib import parse
import os
class BaiduImageSpider(object):
def __init__(self):
self.url = 'https://www.123rf.com.cn/browse/search.php?keyword=%{}&mediaType=1'
self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'}
def get_image(self,url,word):
res= requests.get(url,headers=self.headers)
res.encoding="utf-8"
html=res.text
print(html)
pattern = re.compile('.*?data-original=".*?src="(.*?)">',re.S)
img_link_list = pattern.findall(html)
print(len(img_link_list))
这个网址搜索结果是ajax动态加载,用request获取不到结果,除非用selenium来爬。直接请求接口获取数据就行
import requests
import re
from urllib import parse
import os
import json
class BaiduImageSpider(object):
def __init__(self):
self.url = 'https://api.plus.123rf.com.cn/search'
self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'}
def get_image(self,word):
kv={'page': 1,'size': 100,'mediaType': 1,'keyword': word,'t': '1665144685000','r': 'C7X3jzsGTaz2r3Ei','s': 'ba3f37e4c0caa536afe73b1caa329106'}
res= requests.post(self.url,headers=self.headers,data=kv)
res.encoding="utf-8"
jsonstr=res.text
obj=json.loads(jsonstr)
for img in obj['result']['image']:
print(img['link_image'])
bi=BaiduImageSpider()
bi.get_image('小猫')
可以爬完整
import requests
import re
from urllib import parse
import os
class BaiduImageSpider(object):
def __init__(self):
self.url = 'https://www.123rf.com.cn/browse/search.php?keyword=%{}&mediaType=1'
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'}
def get_image(self):
res = requests.get(self.url, headers=self.headers)
res.encoding = "utf-8"
html = res.text
print(html)
pattern = re.compile('<!doctype html>.*?data-original=".*?src="(.*?)">', re.S)
img_link_list = pattern.findall(html)
print(len(img_link_list))
bai = BaiduImageSpider()
bai.get_image()
您好,我是有问必答小助手,您的问题已经有小伙伴帮您解答,感谢您对有问必答的支持与关注!