一部分代码, 加了while循环后,不应该遍历三次吗,翻页的话不应该有24个url吗
bro = webdriver.Chrome(executable_path=r'C:\Users\wanghao\AppData\Local\Programs\Python\Python38\chromedriver.exe')
page = 1
def parse(self, response):
self.bro.get(response.url)
time.sleep(2)
while self.page < 3:
listmain = response.xpath('//*[@id="reportFirst1"]/div[2]/div')[0:8]
for each in listmain:
item = {}
url = ''.join(each.xpath('./div/a/@href').extract())
urls = 'https:' + url
video_name = ' '.join(each.xpath('./div/a/div/p[1]/text()').extract())
item['title'] = video_name # 拿下标题
part_number = urls.split('/av')[1]
cid = requests.get(url=urls).text # 用来组成api
need_part = re.findall('","cid":(.*?),"', cid, re.S)
need_part = ''.join(need_part)
barrage_api = 'https://api.bilibili.com/x/v1/dm/list.so?oid=' + need_part
collection_api = 'https://api.bilibili.com/x/web-interface/archive/' + 'stat?aid=' + part_number
watching_url = 'https://api.bilibili.com/x/player.so?id=cid%3A' + need_part + '&aid=' + part_number + '&buvid=D7512C54-9EB9-4D8A-ADF9-040A66C06A6C190950infoc'
item['barrage_api'] = ''.join(barrage_api)
item['watching_url'] = ''.join(watching_url)
item['collection_api'] = ''.join(collection_api)
yield scrapy.Request(url=item['collection_api'], callback=self.collection, meta={'item': item})
button = self.bro.find_element_by_xpath('//*[@class="btn next"]') # 翻页按钮
button.click()
time.sleep(1)
self.page += 1
self.bro.quit()
你使用的是scrapy??
翻页按钮被点击的地方,添加一个print输出,看看到底有没有点击。
还有,self。page的原始值是1,结束条件是self.page<3,所以self。page=3的时候,就结束了