raise etree.ParserError( lxml.etree.ParserError: Document is empty

原始代码

import requests 
import lxml.html 
import csv 

doubanUrl = 'https://movie.douban.com/top250?start={}&filter='

def getSource(url):
    response = requests.get(url)
    response.encoding = 'utf-8'
    return response.content 
def getEveryItem(source):
    selector = lxml.html.document_fromstring(source)
    movieitemlist = selector.Xpath('//div[@class="info"]')
    movieList = []
    
    for eachMovie in movieitemlist:
        movieDict = {}
        title = eachMovie.Xpath('div[@class="hd"/a/span[@class="title"]/text()')
        otherTitle = eachMovie.Xpath('div[@class="hd"/a/span[@class="other"]/text()')
        link = eachMovie.Xpath('div[@class="hd"/a/@href')
        star = eachMovie.Xpath('div[@class="bd"/div[@class="star"]/span[@class="rating_num"]/text()')
        quote = eachMovie.Xpath('div[@class="bd"/p[@class="quote"]/span/text()')

        movieDict['title'] = ''.join(title+otherTitle)
        movieDict['url'] = link
        movieDict['star'] = star
        movieDict['quote'] = quote
        print(movieDict)
        movieList.append(movieDict)
    return movieList

def writeData(movieList):
    with open('MovieDouban.csv','w',encoding='UTF-8') as f:
        writer = csv.DictWriter(f,fieldnames=['title','star','quote','url']) 
        writer.writeheader() 
        for each in movieList:
            write.writerow(each)

if __name__=='__main__': 
    movieList = []
    for i in range(10):

        pageLink = doubanUrl.format(i * 25)
        print(pageLink)
        source = getSource(pageLink) 
        movieList += getEveryItem(source)
        #movieList = movieList + getEveryItem(source)
    print(movieList[:10])

    writeData(movieList)

报错如下

C:\Users\abc\AppData\Local\Programs\Python\Python38-32\python.exe C:/Users/abc/.PyCharmCE2019.3/config/scratches/scratch_1.py
https://movie.douban.com/top250?start=0&filter=
Traceback (most recent call last):
  File "C:/Users/abc/.PyCharmCE2019.3/config/scratches/scratch_1.py", line 63, in <module>
    movieList += getEveryItem(source)
  File "C:/Users/abc/.PyCharmCE2019.3/config/scratches/scratch_1.py", line 18, in getEveryItem
    selector = lxml.html.document_fromstring(source)
  File "C:\Users\abc\AppData\Local\Programs\Python\Python38-32\lib\site-packages\lxml\html\__init__.py", line 763, in document_fromstring
    raise etree.ParserError(
lxml.etree.ParserError: Document is empty


Process finished with exit code 1

系统报错该怎么解决?

其实你这个是网址设置了反爬,你可以试一下这个代码
import requests
url = 'https://movie.douban.com/top250?start=0&filter='
response = requests.get(url)
print(response)
返回值是 Response [418],说明你请求失败了
你的第一个函数getSource的返回值格式不管是responsr.text还是response.content,其结果都为空,既然getSource的返回值为空,那么后面第二个函数getEveryItem中selector的赋值自然就会报错,因为在 lxml.html.document_fromstring()的源代码中说了()里不能为空。
所以解决的方法就是设置请求头header

def getSource(url):
response = requests.get(url)
response.encoding = 'utf-8'
return response.content

这里调试下,看看返回的是不是合法完整的xml