import scrapy
from selenium import webdriver
from time import sleep
from ceshi.items import CeshiItem
from selenium.webdriver.common.by import By
class NewsSpider(scrapy.Spider):
name = 'news'
start_urls = ['https://news.sina.com.cn/china/']
def __init__(self):
self.bro = webdriver.Chrome(executable_path='E:\learn\python\pc\ceshi\ceshi\spiders\chromedriver.exe')
#解析标题和链接
def parse(self, response):
for i in range(5):
div_list = response.xpath('//*[@id="feedCardContent"]/div[1]/div')
for div in div_list:
title = div.xpath('./h2/a/text()').extract_first()
content = div.xpath('./h2/a/@href').extract_first()
item = CeshiItem()
item['title'] = title
item['content'] = content
yield item
#翻页
part_url = '//*[@id="feedCardContent"]/div[3]/span[7]/a'
self.bro.find_element_by_xpath(part_url).click()
sleep(5)
i += 1
#关闭
def close(self,spider):
self.bro.quit()
```回答率下降 50%
想让它点击5次下一页,然后爬取5个页面中的标题和链接
翻页不成功,检查翻页节点的xpath是否正确