代码如下:
import requests
import scrapy
import bs4
from crawler51job.items import Crawler51JobItem
from scrapy.http import Request
class Spider51jobSpider(scrapy.Spider):
name = 'spider51job'
allowed_domains = ['www.51job.com']
start_urls = ['https://search.51job.com/list/000000,000000,0000,00,9,99,python,2,1.html']
def parse(self, response):
item = Crawler51JobItem()
item['position'] = response.xpath('//p[@class="t1"]/span/a/@title').extract()
item['company'] = response.xpath('//span[@class="t2"]/a/@title').extract()
item['place'] = response.xpath('//div[@class="el"]/span[@class="t3"]/text()').extract()
item['salary'] = response.xpath('//div[@class="el"]/span[@class="t4"]/text()').extract()
yield item
for i in range(2,5):
url='https://search.51job.com/list/000000,000000,0000,00,9,99,python,2,"+str(i)+".html'
yield Request(url,callback=self.parse)
不知道你这个问题是否已经解决, 如果还没有解决的话: