我在爬取静态的招聘网,定位的xpath爬出来的为空列表

使用scrapy框架爬取静态的青州招聘网时,定位的xpath爬出来的为空列表,使用静态爬取测试代码时也是如此,但使用selenum却可以爬取,有什么办法可以不使用selenum的情况下简单爬出xpath的对应内容,测试的代码如下:

img


```python
import requests
from lxml import etree
import time
import pandas as pd

headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'}
url = 'http://www.0536qz.com/post/zhaopin/pn1/'
page_text = requests.get(url, headers = headers)#请求url
page_text.encoding = 'gbk'
tree = etree.HTML(page_text.text)
title = tree.xpath('//*[@id="jobshow7683"]/div[1]/div[1]/span/em/text()')
```python
import scrapy
from scrapy.spiders import CrawlSpider
from scrapy.selector import Selector
from zhaopin.items import ZhaopinItem
from scrapy.http import Request
from lxml import etree
import csv
import json
class ZhaopinspiderSpider(scrapy.Spider):
    name = 'zhaopinspider'#唯一(必要)
    #allowed_domains = ['zhaopin.com']
    start_urls = ['http://www.0536qz.com/post/zhaopin/pn1/']#爬虫的首页
    ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'

    def parse(self, response):  #解析并提取网页内容
        titles = response.xpath('/html/body/div[5]/div[3]/div[2]/ul/li[1]/div[1]/div/a/p/text()').extract()  #标题
        prices = response.xpath('/html/body/div[5]/div[3]/div[2]/ul/li[1]/div[1]/p[1]/text()').extract()  # 工资
        detail_links = response.xpath('/html/body/div[5]/div[3]/div[2]/ul/li[1]/div[1]/div/a/@href').extract() #详细页超链接
        base_url = 'http://www.0536qz.com'#青州人才网主页
        if detail_links:
            #使用超链接的url构建Requ对象,并使用yield方法返回
            yield scrapy.Request(url=base_url+detail_links, callback=self.parse, headers={'User-Agent': self.ua})
        for ind,detail_link in enumerate(detail_links):
            title = titles[ind]
            price = prices[ind]
            #通过meta在request与response间传递数据
            yield scrapy.Request(detail_link, callback=self.parse_detail, meta={'title': title,
                                                                                'price': price},
                                 headers={'User-Agent': self.ua})
            #通过“后页”定位构建爬虫的下一个地址,即爬虫所爬的下一页
            next_page = response.xpath('//*[@id="page_x"]/a[9]/@href').extract_frist()
            if next_page:
                # 使用下一页的url构建Requ对象,并使用yield方法返回
                yield scrapy.Request(url=base_url + next_page, callback=self.parse, headers={'User-Agent': self.ua})
    def parse_detail(self, response):
        title = response.mrta['title'] #标题
        price = response.mrta['price'] #工资
        time = response.xpath('/html/body/div[5]/div[2]/div[2]/div[1]/span[1]/text()').extract_frist()# 更新时间
        attention = response.xpath('/html/body/div[5]/div[2]/div[2]/div[1]/span[2]/em/text()').extract_frist()# 关注度
        job = response.xpath('//*[@id="baseInfo"]/ul[1]/li[1]/span[2]/text()').extract_frist()# 招聘职位
        job_num = response.xpath('//*[@id="baseInfo"]/ul[1]/li[3]/span[2]/text()').extract_frist()# 招聘人数
        Education = response.xpath('//*[@id="baseInfo"]/ul[1]/li[2]/span[2]/text()').extract_frist()# 学历
        contacts = response.xpath('//*[@id="baseInfo"]/ul[1]/li[5]/span[2]/text()').extract_frist()# 联系人
        #创建ZhaopinItem对象
        item = ZhaopinItem()
        item['title'] = title
        item['time'] = time
        item['attention'] = attention
        item['price'] = price
        item['job'] = job
        item['job_num'] = job_num
        item['Education'] = Education
        item['contacts'] = contacts
        yield item



你是用requests,可能有部分数据是ajax,所以你的xpath无法获取数据,requests得到的数据要在其他位置分析,如图

img