关于#Scrapy#的问题,如何解决?

用Scrapy爬取数据并将数据保存到数据库时报错Message: 'Scraper close failure'
完整报错日志:

     
    Traceback (most recent call last):
      File "F:\python\Lib\logging\__init__.py", line 1110, in emit
        msg = self.format(record)
              ^^^^^^^^^^^^^^^^^^^
      File "F:\python\Lib\logging\__init__.py", line 953, in format
        return fmt.format(record)
               ^^^^^^^^^^^^^^^^^^
      File "F:\python\Lib\logging\__init__.py", line 695, in format
        record.exc_text = self.formatException(record.exc_info)
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "F:\python\Lib\logging\__init__.py", line 645, in formatException
        traceback.print_exception(ei[0], ei[1], tb, None, sio)
      File "F:\python\Lib\traceback.py", line 124, in print_exception
        te = TracebackException(type(value), value, tb, limit=limit, compact=True)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "F:\python\Lib\traceback.py", line 690, in __init__
        self.stack = StackSummary._extract_from_extended_frame_gen(
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "F:\python\Lib\traceback.py", line 416, in _extract_from_extended_frame_gen
        for f, (lineno, end_lineno, colno, end_colno) in frame_gen:
      File "F:\python\Lib\traceback.py", line 353, in _walk_tb_with_full_positions
        positions = _get_code_position(tb.tb_frame.f_code, tb.tb_lasti)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "F:\python\Lib\traceback.py", line 366, in _get_code_position
        positions_gen = code.co_positions()
                        ^^^^^^^^^^^^^^^^^
    AttributeError: '_Code' object has no attribute 'co_positions'
    Call stack:
      File "", line 198, in _run_module_as_main
      File "", line 88, in _run_code
      File "F:\python\Scripts\scrapy.exe\__main__.py", line 7, in 
        sys.exit(execute())
      File "F:\python\Lib\site-packages\scrapy\cmdline.py", line 158, in execute
        _run_print_help(parser, _run_command, cmd, args, opts)
      File "F:\python\Lib\site-packages\scrapy\cmdline.py", line 111, in _run_print_help
        func(*a, **kw)
      File "F:\python\Lib\site-packages\scrapy\cmdline.py", line 166, in _run_command
        cmd.run(args, opts)
      File "F:\python\Lib\site-packages\scrapy\commands\crawl.py", line 31, in run
        self.crawler_process.start()
      File "F:\python\Lib\site-packages\scrapy\crawler.py", line 383, in start
        reactor.run(installSignalHandlers=False)  # blocking call
      File "F:\python\Lib\site-packages\twisted\internet\asyncioreactor.py", line 255, in run
        self._asyncioEventloop.run_forever()
      File "F:\python\Lib\asyncio\base_events.py", line 607, in run_forever
        self._run_once()
      File "F:\python\Lib\asyncio\base_events.py", line 1919, in _run_once
        handle._run()
      File "F:\python\Lib\asyncio\events.py", line 80, in _run
        self._context.run(self._callback, *self._args)
      File "F:\python\Lib\site-packages\twisted\internet\asyncioreactor.py", line 271, in _onTimer
        self.runUntilCurrent()
      File "F:\python\Lib\site-packages\twisted\internet\base.py", line 991, in runUntilCurrent
        call.func(*call.args, **call.kw)
      File "F:\python\Lib\site-packages\scrapy\utils\reactor.py", line 54, in __call__
        return self._func(*self._a, **self._kw)
      File "F:\python\Lib\site-packages\scrapy\core\engine.py", line 172, in _next_request
        self._spider_idle()
      File "F:\python\Lib\site-packages\scrapy\core\engine.py", line 396, in _spider_idle
        self.close_spider(self.spider, reason=ex.reason)
      File "F:\python\Lib\site-packages\scrapy\core\engine.py", line 424, in close_spider
        dfd.addErrback(log_failure("Scraper close failure"))
      File "F:\python\Lib\site-packages\twisted\internet\defer.py", line 516, in addErrback
        return self.addCallbacks(
      File "F:\python\Lib\site-packages\twisted\internet\defer.py", line 477, in addCallbacks
        self._runCallbacks()
      File "F:\python\Lib\site-packages\twisted\internet\defer.py", line 857, in _runCallbacks
        current.result = callback(  # type: ignore[misc]
      File "F:\python\Lib\site-packages\scrapy\core\engine.py", line 414, in errback
        logger.error(
    Message: 'Scraper close failure'
    Arguments: ()

pipelines.py:

import openpyxl
from itemadapter import ItemAdapter
 
import pymysql
 
class DbPipeline(object):
 
    def __int__(self):
        self.conn = pymysql.connect(host='localhost',user='root',password='123456',database='movie_douban_top250',charset='utf8mb4')
        self.cursor = self.conn.cursor()
 
    def close_spider(self,spider):
        self.conn.commit()
        self.conn.close()
 
    def process_item(self,item,spider):
        title = item.get( 'title', '' )
        quote = item.get( 'quote', '' )
        marks = item.get( 'marks', '' )
        ranks = item.get( 'ranks', 0 )
        self.cursor.execute(
            'insert into movie_douban_top250 (title, ranks, marks, quote) values(%s,%s,%s,%s ) ', (title, ranks, marks, quote)
                            )
        return item
```python

douban.py:

```python
import scrapy
from scrapy import Selector, Request
 
from ..items import MovieItem
 
class DoubanSpider(scrapy.Spider):
    name = "douban"
    allowed_domains = ["movie.douban.com"]
    def start_requests(self):
        for page in range(10):
            yield Request(url=f'https://movie.douban.com/top250?start={page*25}&filter=')
    def parse(self, response):
        sel = Selector(response)
        list_items = sel.css('.grid_view > li')
        for list_item in list_items:
             movie_item = MovieItem()
             movie_item['title'] = list_item.css('  div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > a:nth-child(1) > span:nth-child(1)::text').extract_first()
             movie_item['quote']  = list_item.css(' div:nth-child(1) > div:nth-child(2) > div:nth-child(2) > p:nth-child(3) > span:nth-child(1)::text').extract_first()
             movie_item[ 'marks' ] = list_item.css('div:nth-child(1) > div:nth-child(2) > div:nth-child(2) > div:nth-child(2) > span:nth-child(4)::text' ).extract_first()
             movie_item[ 'ranks' ] = list_item.css('div:nth-child(1) > div:nth-child(2) > div:nth-child(2) > div:nth-child(2) > span:nth-child(2)::text' ).extract_first()
             yield movie_item
             hrefs_list = sel.css('div.paginator > a::attr(href)')
             for href in hrefs_list:
                 url = response.urljoin( href.extract())
                 yield Request(url=url)

不知道你这个问题是否已经解决, 如果还没有解决的话:

如果你已经解决了该问题, 非常希望你能够分享一下解决方案, 写成博客, 将相关链接放在评论区, 以帮助更多的人 ^-^