requests.text爬出来的网页不全

亲们,为什么requests.text爬出来的网页只有一半,写入文件也是到一半多就没了

import requests
import re
from urllib import parse
import os
class BaiduImageSpider(object):
    def __init__(self):
        self.url = 'https://www.123rf.com.cn/browse/search.php?keyword=%{}&mediaType=1'
        self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'}
    def get_image(self,url,word):
        res= requests.get(url,headers=self.headers)
        res.encoding="utf-8"
        html=res.text
        print(html)
        pattern = re.compile('.*?data-original=".*?src="(.*?)">',re.S)
        img_link_list = pattern.findall(html)
        print(len(img_link_list))

这个网址搜索结果是ajax动态加载,用request获取不到结果,除非用selenium来爬。直接请求接口获取数据就行

import requests
import re
from urllib import parse
import os
import json
class BaiduImageSpider(object):
    def __init__(self):
        self.url = 'https://api.plus.123rf.com.cn/search'
        self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'}
    def get_image(self,word):
        kv={'page': 1,'size': 100,'mediaType': 1,'keyword': word,'t': '1665144685000','r': 'C7X3jzsGTaz2r3Ei','s': 'ba3f37e4c0caa536afe73b1caa329106'}
        res= requests.post(self.url,headers=self.headers,data=kv)
        res.encoding="utf-8"
        jsonstr=res.text
        obj=json.loads(jsonstr)
        for img in obj['result']['image']:
            print(img['link_image'])
 
bi=BaiduImageSpider()
bi.get_image('小猫')

可以爬完整

img

import requests
import re
from urllib import parse
import os


class BaiduImageSpider(object):
    def __init__(self):
        self.url = 'https://www.123rf.com.cn/browse/search.php?keyword=%{}&mediaType=1'
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'}

    def get_image(self):
        res = requests.get(self.url, headers=self.headers)
        res.encoding = "utf-8"
        html = res.text
        print(html)
        pattern = re.compile('<!doctype html>.*?data-original=".*?src="(.*?)">', re.S)
        img_link_list = pattern.findall(html)
        print(len(img_link_list))


bai = BaiduImageSpider()
bai.get_image()


您好,我是有问必答小助手,您的问题已经有小伙伴帮您解答,感谢您对有问必答的支持与关注!
PS:问答VIP年卡 【限时加赠:IT技术图书免费领】,了解详情>>> https://vip.csdn.net/askvip?utm_source=1146287632