import json
import logging
import scrapy
from news.items import NewsItem
class AbcSpider(scrapy.Spider):
name = 'abc'
allowed_domains = ['gelonghui.com']
start_urls = ['https://www.gelonghui.com/api/channels/web_home_page/articles/v8?']
def parse(self, response):
item = NewsItem()
dicts = json.loads(response.text)
for dict in dicts['result']:
item['title'] = dict['data']['title']
logging.critical(item)
yield item
next_timestamp = dicts['result'][14]['data']['timestamp'] - 1
url_ajax = 'https://www.gelonghui.com/api/channels/web_home_page/articles/v8?'
data = {
'timestamp': str(next_timestamp),
'loaded': '15'
}
yield scrapy.FormRequest(url=url_ajax,
method='get',
formdata=data,
dont_filter=True,
callback=self.parse
)
```python
```
可以用meta累计请求次数,在后续的请求中获取请求次数。
def parse(self, response):
meta = {'crawlCnt': 0}
if response.meta:
meta = response.meta
meta['crawlCnt'] += 1
......
yield scrapy.FormRequest(url=url_ajax,
method='get',
formdata=data,
dont_filter=True,
meta=meta,
callback=self.parse
)
最简单的办法,可能不好用,就是把你爬取的标题存入列表数组,每次判断这个列表的长度,达到你想要你数量结束函数