selenium爬虫爬取电影详情页遇到报错
import re
import time
import pymongo
from selenium import webdriver
from selenium.webdriver.common.by import By
from undetected_chromedriver import ChromeOptions
from lxml import etree
class Dyspider(object):
def __init__(self):
option = ChromeOptions() # CDP反爬模块
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_experimental_option('useAutomationExtension', False)
prefs = {'credentials_enable_service': False, 'profile.password_manager_enabled': False} # 屏蔽保存密码提示框
option.add_experimental_option('prefs', prefs)
self.driver = webdriver.Chrome(options=option)
self.driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
'source': 'Object.defineProperty(navigator,"webdriver",{get:() => undefined})'})
self.url = 'https://spa5.scrape.center/'
def run(self):
self.driver.get(self.url)
for i in range(503):
time.sleep(1)
scours = self.driver.page_source
self.parse(scours)
self.driver.find_element(By.XPATH, '//*[@id="index"]/div[2]/div/div/div/button[2]').click()
def parse(self, scours):
html = etree.HTML(scours)
datas = html.xpath('//div/div/div/div[2]')
dicts = []
for data in datas:
dict = {'链接': ''.join(data.xpath("./div[@class='el-col el-col-24']/a/@href"))}
dicts.append(dict)
self.fd(dicts)
def fd(self, dicts):
for kk in dicts:
self.driver.get('https://spa5.scrape.center' + str(kk['链接']))
time.sleep(1)
foods = self.driver.page_source
res = etree.HTML(foods)
uus = res.xpath('//*[@class="info"]')
dts = []
for uu in uus:
dt = {
'定价': ''.join(uu.xpath('./p[1]/span/text()')).replace('\n', '').replace(' ', ''),
'作者': ''.join(uu.xpath('./p[2]/text()')).replace('\n', '').replace(' ', ''),
'出版时间': ''.join(uu.xpath('./p[3]/text()')).replace('\n', '').replace(' ', ''),
'出版社': ''.join(uu.xpath('./p[4]/text()')).replace('\n', '').replace(' ', ''),
'页数': ''.join(uu.xpath('./p[5]/text()')).replace('\n', '').replace(' ', ''),
'ISBM': ''.join(uu.xpath('./p[6]/text()')).replace('\n', '').replace(' ', '')
}
dts.append(dt)
print(dts)
def save(self, dicts):
# for dd in dicts:
# client=pymongo.MongoClient(host='localhost',port=27017)
# db=client['test']
# collection=db['电影']
# relust=collection.insert_one(dd)
pass
if __name__ == '__main__':
spider = Dyspider()
spider.run()
第一页的电影信息爬取完毕后,无法跳转到下一页,下一页按钮报错
说明你的下一页的xpath有问题