上代码
spider代码:
from textsc.items import TextscItem
from scrapy.selector import Selector
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors import LinkExtractor
class Baispider(CrawlSpider):
name = "Baidu"
allowed_domains = ["baidu.com"]
start_urls = [
"https://zhidao.baidu.com/list"
]
rules = (
Rule(LinkExtractor(allow=('/shop', ), deny=('fr', )), callback='parse_item'),
)
def parse_item(self, response):
sel= Selector(response)
items=[]
item=TextscItem()
title=sel.xpath('//div[@class="shop-menu"]/ul/li/a/text()').extract()
for i in title:
items.append(i)
item['TitleName'] = items
print (item['TitleName'])
return item
items.py代码
import scrapy
import json
class TextscItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
TitleName = scrapy.Field()
pass
scrapy的版本是1.4.0
运行没有报错
但是json文件里面什么都没有
求解答
谢过!..
先看你的爬虫有没有爬取到任何数据,打印log看看。爬取语法时候匹配
你都没保存。。。你首先可以调用urllib模块下的urlretrieve模块保存到本地指定的文件夹就好了。。全程只有输出语句。。怎么能看到保存的数据