地址信息为:https://mall.jd.com/view_search-1056224-9061630-99-1-20-1.html
干脆别用selenium了,我找到了两个接口:
import requests
from lxml import etree
import json
# 必须加 referer, 否则第一次请求无结果
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/74.0.3729.157 Safari/537.36',
'referer': "https://mall.jd.com/"
}
info = requests.get(
'https://module-jshop.jd.com/module/getModuleHtml.html?orderBy=99&direction=1&pageNo=1&categoryId=9061630'
'&pageSize=20&pagePrototypeId=8&pageInstanceId=111774908&moduleInstanceId=258367169&prototypeId=55555&templateId'
'=905542&appId=1056224&layoutInstanceId=258367169&origin=0&shopId=813953&venderId=815911&callback'
'=jshop_module_render_callback', headers=headers).text
info = info[29:-1]
json_info = json.loads(info)
# 抓取商品SKU
tree = etree.HTML(json_info['moduleText'])
skus = tree.xpath('//span[@class="jdNum"]/@jdprice')
# 商品SKU前加上 J_
new_skus = ['J_' + sku for sku in skus]
# print(new_skus)
# 使用多个商品SKU 请求价格接口
skus_string = ','.join(new_skus)
result = requests.get(f'https://f-mall.jd.com/prices/mgets?source=jshop&type=mgets&area=18_1482_48939_0&skuids={skus_string}&_=1624516262221').json()
print(result)
# result: 结果是一个json,包含所有请求商品的价格
# [{'p': '69.00', 'op': '69.00', 'm': '169.00', 'cbf': 0, 'id': 'J_69200853812'}, {'p': '179.00',
# 'op': '179.00', 'm': '369.00', 'cbf': 0, 'id': 'J_66266801909'}, ... 省略
//*[@id="J_GoodsList"]/ul/li/div/div[3]/div[1]/div/span[2]
还是这个问题,我用你的放到代码中是只有四个
我检查了一下,这个span标签中的text以及preprice属性在最初是隐藏的,使用滚轮往下滑它才会出现
尝试用selenium控制滚轮滑动到最底部以显示所有价格:
driver.execute_script("var action=document.documentElement.scrollTop=50000")
from selenium import webdriver
import time
driver = webdriver.Chrome()
driver.get('https://mall.jd.com/view_search-1056224-9061630-99-1-20-1.html')
driver.execute_script("var action=document.documentElement.scrollTop=50000")
time.sleep(2)
el = driver.find_elements_by_xpath('//span[@class="jdNum"]')
for e in el:
print(e.text)
driver.close()
# !/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Author:TaoXF
Date:23/6/2021 4:36 下午
Python Version:3.7.4
"""
import time
from selenium import webdriver
from lxml import etree
class zhongChong:
def __init__(self):
self.url = "https://mall.jd.com/view_search-1056224-9061630-99-1-20-1.html"
self.driver_path = "/Users/taoxiaofeng/opt/anaconda3/envs/python3.7.4/bin/chromedriver"
self.chrome = webdriver.Chrome(executable_path=self.driver_path)
def run(self):
self.chrome.get(self.url) #获取页面
time.sleep(10)
self.chrome.find_element_by_xpath('//*[@id="258367167"]/div/div[1]/div/div/div/div[2]/div/a').click() #点击所有商品再进行查找,否则默认
time.sleep(5)
source = self.chrome.page_source
self.parse_list_page(source)
def parse_list_page(self, source):
html = etree.HTML(source)
# title_list = html.xpath("//div[@class='jDesc']//a/text()")
shop_urls = html.xpath("//div[@class='jDesc']//a/@href")
#price_list = html.xpath("//span[@class='jdNum']/@preprice")
counts = html.xpath("//em[@class='jCommentNum']/text()")
price_list = html.xpath('//*[@id="J_GoodsList"]/ul/li/div/div[3]/div[1]/div/span[2]/text()')
# driver.execute_script("var action=document.documentElement.scrollTop=50000")
print(price_list)
zc = zhongChong()
zc.run()
我现在要崩溃了,运行了一会又不行了@江天暮雪