使用scarpy爬取图片无法保存
itme能传入管道但是保存不到本地文件夹
主代码
```python
import scrapy
from ..items import HgfhItem
class XiaoshuSpider(scrapy.Spider):
name = 'xiaoshu'
#allowed_domains = ['www.baidu.com']
url='https://www.xintp.com/bizhi/shoujibizhi/page/{}/'
cc=2
start_urls = [url.format(str(cc))]
def parse(self, response):
ff= response.xpath('//*[@id="main"]/article/div/figure/span/a/@href').extract()
for ll in ff:
yield scrapy.Request(url=ll,callback= self.parse_a,dont_filter= True )
# if self.cc<=222 :
# self.cc+=1
# yield scrapy.Request(url=self.url.format(str(self.cc )),callback=self.parse,dont_filter= True )
def parse_a(self,resource):
try:
cj=resource.xpath('//*[@id="main"]/article/div/div[3]/a/@href').extract()
del cj[0],cj[-1]
for lk in cj:
yield scrapy.Request(url=lk ,callback= self.parse_b,dont_filter= True )
except :
lj=resource.url
print('没有下一页')
yield scrapy.Request(url=lj,callback= self.parse_b,dont_filter= True )
def parse_b(self,resource):
oi = resource.xpath('//*[@id="main"]/article/div/div[2]/div/figure/a/img/@src | //*[@id="main"]/article/div/div[2]/div/p/a/img/@src').extract()
itme = HgfhItem()
itme['file'] =oi
yield itme
pass
time代码
import scrapy
class HgfhItem(scrapy.Item):
# define the fields for your item here like:
file= scrapy.Field()
pass
管道代码
from scrapy.pipelines.images import ImagesPipeline
import scrapy
from scrapy.exceptions import DropItem
class ImgePipeline(ImagesPipeline):
def get_media_requests(self, item, info):
print(item['file'])
for url in item ['file']:
yield scrapy.Request(url=url,dont_filter= True )
def file_path(self, request, response=None,info=None):
yi='meinv'+request.url.split('/')[-1]
print(yi)
return yi
def item_completed(self, results, item, info):
return item
def __del__(self):
print('ok')
设置
ITEM_PIPELINES = {
'hgfh.pipelines.ImgePipeline': 300,
}
IMAGES_STORE = './hgfhimg'