Python爬虫爬取不到整个列表的商品信息

网页:https://www.walmart.com/search/?page=2&ps=40&query=pillowcase


我在爬取沃尔玛的商品搜索信息时,每次每个页面只能爬取到前10个商品(每个页面有40个商品)
那个网页也不像是动态加载的网页
不过我用浏览器代开那网页发现个规律,它会先加载10个商品,然后过一两秒会再加载出剩下的商品

求大神指点呀,头好疼好久解决不了,网络上也没找到相关的问题。

我的代码:

import urllib
import urllib.request
import random
import bs4
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
from selenium import webdriver
import time as t
import numpy  
import datetime
from selenium import webdriver

#定义一个数组存储多个User-Agent
my_headers = [
    "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36",
    "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14",
    "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)"
]
#设置要爬取的网页地址
keyword='Networking+Cables'
def geturl():
    url='https://www.walmart.com/search/?grid=true&'
    urls=[]
    for i in range(1):
        if(i==0):
            urls.append(url+"query="+keyword)
        else:
            urls.append(url+"page="+str(i+1)+"&ps=40&"+"query=pillowcase")#组合成新的url地址
    return  urls
#获取网页链接列表
urls=geturl()
print(urls)
links=[]
wmid=[]
time=[]
locate=[]
ranking=[]
seller=[]
price=[]
price_min=[]
price_max=[]
reviews=[]
star=[]
prodflag=[]
title=[]

#随机获得一个User-Agent
randdom_header = random.choice(my_headers)
a=0
n=1
rank=1

for i in urls: 
    print("正在解析第"+str(n)+"个页面......")
    t.sleep(numpy.random.uniform(0,4))   
   #使用urllib库解析网页地址
    req = urllib.request.Request(i)
    #添加头部信息
    req.add_header("User-Agent", randdom_header)
    req.add_header("GET",i)
    #获取网页加载完的信息
    response = urllib.request.urlopen(req)  
    html = response.read().decode('utf-8')
    #用bs4库优化网页内容的结构
    soup=BeautifulSoup(html,'html.parser')  

    for li in soup.find('div',id="searchProductResult").find_all('li'):
        a+=1 
        
        try:reviews.append(li.find('span',class_="stars-reviews-count").span.string)
        except:reviews.append(0)
       
        link=("https://www.walmart.com"+li.find('a',class_="product-title-link line-clamp line-clamp-2 truncate-title").attrs['href'])        
        if(len(link)>30):
            link_new=link.split("?")[0]
        links.append(link_new) 
        
        time.append(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        
        title.append(link.split('/')[4].replace('-',' '))
        
        wmid.append(link_new.split('/')[-1])
       
        try:prodflag.append(li.find('span',class_="flag-angle__content").string)
        except:prodflag.append("None")
        
        locate.append(str(n)+"-"+str(a))
       
        ranking.append(rank)
        rank+=1
            
        seller.append(li.find('div',class_="search-result-product-shipping-details gridview").get_text())
        
        try:min_=li.find('span',class_="price-main-block").find_all('span',class_="price price-main")[0].span.string;max_=li.find('span',class_="price-main-block").find_all('span',class_="price price-main")[1].span.string;price.append(min_+"-"+max_);price_min.append(min_);price_max.append(max_)
        except:price_=li.find('span',class_="price display-inline-block arrange-fit price price-main").span.string;price.append(price_);price_min.append(price_);price_max.append(price_)
        
        star.append(li.find('span',class_='visuallyhidden seo-avg-rating').string)
        
    print("第"+str(n)+"个页面解析成功!")
    n+=1
    a=0

table=pd.DataFrame()
table['Link']=links
table['Time']=time
table['WMID']=wmid
table['Locate']=locate
table['Ranking']=ranking
table['Seller']=seller
table['Price']=price;table['Price_min']=price_min;table['Price_max']=price_max
table['Reviews']=reviews
table['Star']=star
table['prodFlag']=prodflag
table['Title']=title

table.to_excel('walmart_spider/'+keyword+'_rank_info.xlsx',sheet_name=keyword,index=False)

print(table)

 

既然过几秒才会加载出全部内容,那为什么不等几秒在开始爬呢?