用xpath页面解析的时候遇到打印乱码问题:
第一种情况是搜索国内电影出现乱码:
import requests
from lxml import etree
import prettytable as pt
from urllib.parse import quote
import sys
# 响应模块
def response_url(html_url):
response = requests.get(html_url)
response.encoding = response.apparent_encoding
return response
# 数据解析模块
def xml(response_text):
html = etree.HTML(response_text)
html_data = html.xpath('//div[@class="co_content8"]/ul//a[2]')
tb = pt.PrettyTable()
tb.field_names = ["序号","名字"]
page = 0
movie_url = []
for item in html_data:
if item.text == '德凯奥特曼 第01话':
break
else:
html_data_url =item.xpath('@href')
html_url = (f'https://dy.dytt8.net{html_data_url}'.replace("['", '').replace("']", ''))
movie_url.append(html_url)
tb.add_row([page,item.text])
page += 1
print(tb)
num = input('输入你想查看的电影序号:')
return movie_url[int(num)]
#页面解析模块
def movies_xml(movies_url):
movies = []
movies_infor = response_url(movies_url).text
movies_html = etree.HTML(movies_infor)
movies_html_data =movies_html.xpath('//div[@id="Zoom"]')
for item in movies_html_data:
movies_trans = item.xpath('.//text()[2]')
movies_name = item.xpath('.//text()[3]')
movies_time = item.xpath('.//text()[4]')
movies_url = f'下载连接:{item.xpath(".//@href")}'
movies.append(movies_name)
movies.append(movies_trans)
movies.append(movies_time)
movies.append(movies_url)
for i in movies:
print(''.join(i))
#搜索模块
def search_html(key_name):
key_word = quote(key_name.encode('gbk'))
search_url =response_url(f'http://s.ygdy8.com/plus/so.php?typeid=1&keyword={key_word}').text
search_url_data = etree.HTML(search_url)
search_url_info = search_url_data.xpath('//div[@class="co_content8"]/ul//a')
tb = pt.PrettyTable()
tb.field_names = ["序号","名字"]
sys.getdefaultencoding()
for i in search_url_info:
movie_name = i.xpath('.//text()')
print(''.join(movie_name))
#主函数
def main(num):
if num == '查看':
url = 'https://dy.dytt8.net/index2.htm'
html_data = response_url(url)
movies_url = xml(html_data.text)
movies_xml(movies_url)
else:
search_html(input("输入你想要搜素的电影名称(不低于三个字):"))
if __name__ == '__main__':
main(num = input("(查看2022最新电影输入查看/使用搜索功能输入搜索):"))
但是两次搜索结果却是不一样
要把编码改成gbk即可解决
responsen.encoding = gbk