xpath语法正确匹配所有,在pycharm的selenium的代码中只能提取前面四个产品的价格

地址信息为:https://mall.jd.com/view_search-1056224-9061630-99-1-20-1.html

干脆别用selenium了,我找到了两个接口:

import requests
from lxml import etree
import json

# 必须加 referer, 否则第一次请求无结果
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/74.0.3729.157 Safari/537.36',
    'referer': "https://mall.jd.com/"
}

info = requests.get(
    'https://module-jshop.jd.com/module/getModuleHtml.html?orderBy=99&direction=1&pageNo=1&categoryId=9061630'
    '&pageSize=20&pagePrototypeId=8&pageInstanceId=111774908&moduleInstanceId=258367169&prototypeId=55555&templateId'
    '=905542&appId=1056224&layoutInstanceId=258367169&origin=0&shopId=813953&venderId=815911&callback'
    '=jshop_module_render_callback', headers=headers).text
info = info[29:-1]

json_info = json.loads(info)

# 抓取商品SKU
tree = etree.HTML(json_info['moduleText'])
skus = tree.xpath('//span[@class="jdNum"]/@jdprice')

# 商品SKU前加上  J_
new_skus = ['J_' + sku for sku in skus]

# print(new_skus)

# 使用多个商品SKU 请求价格接口
skus_string = ','.join(new_skus)
result = requests.get(f'https://f-mall.jd.com/prices/mgets?source=jshop&type=mgets&area=18_1482_48939_0&skuids={skus_string}&_=1624516262221').json()
print(result)

# result: 结果是一个json,包含所有请求商品的价格
# [{'p': '69.00', 'op': '69.00', 'm': '169.00', 'cbf': 0, 'id': 'J_69200853812'}, {'p': '179.00',
# 'op': '179.00', 'm': '369.00', 'cbf': 0, 'id': 'J_66266801909'}, ... 省略

 

//*[@id="J_GoodsList"]/ul/li/div/div[3]/div[1]/div/span[2]

 

还是这个问题,我用你的放到代码中是只有四个

我检查了一下,这个span标签中的text以及preprice属性在最初是隐藏的,使用滚轮往下滑它才会出现

尝试用selenium控制滚轮滑动到最底部以显示所有价格:

driver.execute_script("var action=document.documentElement.scrollTop=50000")
from selenium import webdriver
import time

driver = webdriver.Chrome()

driver.get('https://mall.jd.com/view_search-1056224-9061630-99-1-20-1.html')
driver.execute_script("var action=document.documentElement.scrollTop=50000")

time.sleep(2)

el = driver.find_elements_by_xpath('//span[@class="jdNum"]')
for e in el:
    print(e.text)
driver.close()

 

# !/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Author:TaoXF
Date:23/6/2021  4:36 下午
Python Version:3.7.4
"""
import time
from selenium import webdriver
from lxml import etree

class zhongChong:
        def __init__(self):
                self.url = "https://mall.jd.com/view_search-1056224-9061630-99-1-20-1.html"
                self.driver_path = "/Users/taoxiaofeng/opt/anaconda3/envs/python3.7.4/bin/chromedriver"
                self.chrome = webdriver.Chrome(executable_path=self.driver_path)

        def run(self):
                self.chrome.get(self.url)  #获取页面
                time.sleep(10)
                self.chrome.find_element_by_xpath('//*[@id="258367167"]/div/div[1]/div/div/div/div[2]/div/a').click() #点击所有商品再进行查找,否则默认
                time.sleep(5)
                source = self.chrome.page_source
                self.parse_list_page(source)

        def parse_list_page(self, source):
                html = etree.HTML(source)
                # title_list = html.xpath("//div[@class='jDesc']//a/text()")
                shop_urls = html.xpath("//div[@class='jDesc']//a/@href")
                #price_list = html.xpath("//span[@class='jdNum']/@preprice")
                counts = html.xpath("//em[@class='jCommentNum']/text()")
                price_list = html.xpath('//*[@id="J_GoodsList"]/ul/li/div/div[3]/div[1]/div/span[2]/text()')
                # driver.execute_script("var action=document.documentElement.scrollTop=50000")
                print(price_list)

zc = zhongChong()
zc.run()

 

我现在要崩溃了,运行了一会又不行了@江天暮雪