用Scrapy爬取数据并将数据保存到数据库时报错Message: 'Scraper close failure'
完整报错日志:
Traceback (most recent call last):
File "F:\python\Lib\logging\__init__.py", line 1110, in emit
msg = self.format(record)
^^^^^^^^^^^^^^^^^^^
File "F:\python\Lib\logging\__init__.py", line 953, in format
return fmt.format(record)
^^^^^^^^^^^^^^^^^^
File "F:\python\Lib\logging\__init__.py", line 695, in format
record.exc_text = self.formatException(record.exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "F:\python\Lib\logging\__init__.py", line 645, in formatException
traceback.print_exception(ei[0], ei[1], tb, None, sio)
File "F:\python\Lib\traceback.py", line 124, in print_exception
te = TracebackException(type(value), value, tb, limit=limit, compact=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "F:\python\Lib\traceback.py", line 690, in __init__
self.stack = StackSummary._extract_from_extended_frame_gen(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "F:\python\Lib\traceback.py", line 416, in _extract_from_extended_frame_gen
for f, (lineno, end_lineno, colno, end_colno) in frame_gen:
File "F:\python\Lib\traceback.py", line 353, in _walk_tb_with_full_positions
positions = _get_code_position(tb.tb_frame.f_code, tb.tb_lasti)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "F:\python\Lib\traceback.py", line 366, in _get_code_position
positions_gen = code.co_positions()
^^^^^^^^^^^^^^^^^
AttributeError: '_Code' object has no attribute 'co_positions'
Call stack:
File "", line 198, in _run_module_as_main
File "", line 88, in _run_code
File "F:\python\Scripts\scrapy.exe\__main__.py", line 7, in
sys.exit(execute())
File "F:\python\Lib\site-packages\scrapy\cmdline.py", line 158, in execute
_run_print_help(parser, _run_command, cmd, args, opts)
File "F:\python\Lib\site-packages\scrapy\cmdline.py", line 111, in _run_print_help
func(*a, **kw)
File "F:\python\Lib\site-packages\scrapy\cmdline.py", line 166, in _run_command
cmd.run(args, opts)
File "F:\python\Lib\site-packages\scrapy\commands\crawl.py", line 31, in run
self.crawler_process.start()
File "F:\python\Lib\site-packages\scrapy\crawler.py", line 383, in start
reactor.run(installSignalHandlers=False) # blocking call
File "F:\python\Lib\site-packages\twisted\internet\asyncioreactor.py", line 255, in run
self._asyncioEventloop.run_forever()
File "F:\python\Lib\asyncio\base_events.py", line 607, in run_forever
self._run_once()
File "F:\python\Lib\asyncio\base_events.py", line 1919, in _run_once
handle._run()
File "F:\python\Lib\asyncio\events.py", line 80, in _run
self._context.run(self._callback, *self._args)
File "F:\python\Lib\site-packages\twisted\internet\asyncioreactor.py", line 271, in _onTimer
self.runUntilCurrent()
File "F:\python\Lib\site-packages\twisted\internet\base.py", line 991, in runUntilCurrent
call.func(*call.args, **call.kw)
File "F:\python\Lib\site-packages\scrapy\utils\reactor.py", line 54, in __call__
return self._func(*self._a, **self._kw)
File "F:\python\Lib\site-packages\scrapy\core\engine.py", line 172, in _next_request
self._spider_idle()
File "F:\python\Lib\site-packages\scrapy\core\engine.py", line 396, in _spider_idle
self.close_spider(self.spider, reason=ex.reason)
File "F:\python\Lib\site-packages\scrapy\core\engine.py", line 424, in close_spider
dfd.addErrback(log_failure("Scraper close failure"))
File "F:\python\Lib\site-packages\twisted\internet\defer.py", line 516, in addErrback
return self.addCallbacks(
File "F:\python\Lib\site-packages\twisted\internet\defer.py", line 477, in addCallbacks
self._runCallbacks()
File "F:\python\Lib\site-packages\twisted\internet\defer.py", line 857, in _runCallbacks
current.result = callback( # type: ignore[misc]
File "F:\python\Lib\site-packages\scrapy\core\engine.py", line 414, in errback
logger.error(
Message: 'Scraper close failure'
Arguments: ()
pipelines.py:
import openpyxl
from itemadapter import ItemAdapter
import pymysql
class DbPipeline(object):
def __int__(self):
self.conn = pymysql.connect(host='localhost',user='root',password='123456',database='movie_douban_top250',charset='utf8mb4')
self.cursor = self.conn.cursor()
def close_spider(self,spider):
self.conn.commit()
self.conn.close()
def process_item(self,item,spider):
title = item.get( 'title', '' )
quote = item.get( 'quote', '' )
marks = item.get( 'marks', '' )
ranks = item.get( 'ranks', 0 )
self.cursor.execute(
'insert into movie_douban_top250 (title, ranks, marks, quote) values(%s,%s,%s,%s ) ', (title, ranks, marks, quote)
)
return item
```python
douban.py:
```python
import scrapy
from scrapy import Selector, Request
from ..items import MovieItem
class DoubanSpider(scrapy.Spider):
name = "douban"
allowed_domains = ["movie.douban.com"]
def start_requests(self):
for page in range(10):
yield Request(url=f'https://movie.douban.com/top250?start={page*25}&filter=')
def parse(self, response):
sel = Selector(response)
list_items = sel.css('.grid_view > li')
for list_item in list_items:
movie_item = MovieItem()
movie_item['title'] = list_item.css(' div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > a:nth-child(1) > span:nth-child(1)::text').extract_first()
movie_item['quote'] = list_item.css(' div:nth-child(1) > div:nth-child(2) > div:nth-child(2) > p:nth-child(3) > span:nth-child(1)::text').extract_first()
movie_item[ 'marks' ] = list_item.css('div:nth-child(1) > div:nth-child(2) > div:nth-child(2) > div:nth-child(2) > span:nth-child(4)::text' ).extract_first()
movie_item[ 'ranks' ] = list_item.css('div:nth-child(1) > div:nth-child(2) > div:nth-child(2) > div:nth-child(2) > span:nth-child(2)::text' ).extract_first()
yield movie_item
hrefs_list = sel.css('div.paginator > a::attr(href)')
for href in hrefs_list:
url = response.urljoin( href.extract())
yield Request(url=url)
不知道你这个问题是否已经解决, 如果还没有解决的话: