运用xpath进行,每次都只能运行出来一个数据

目标网站http://www.piaofang.biz/
爬取页面所有电影名及票房
保存csv
import requests
from lxml import etree
import csv
url = 'http://www.piaofang.biz/'
headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Mobile Safari/537.36'}
response = requests.get(url, headers=headers)
response.encoding = 'gb2312'
data = response.text
html = etree.HTML(data)
div_tag = html.xpath('//div[@class="zhuti"]')
lst = []
for div in div_tag:
dic = {}
title = div.xpath('.//td[@class="title"]/a/text()')
piaofang = div.xpath('.//td[@class="piaofang"]/span/text()')
dic['title'] = title[0]
dic['piaofang'] = piaofang[0]
print(dic)
lst.append(dic)

原因是列表div_tag只有一项, 而变量title, piaofang有很多项。
改成这样:

import requests
from lxml import etree
import csv
url = 'http://www.piaofang.biz/'
headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Mobile Safari/537.36'}
response = requests.get(url, headers=headers)
response.encoding = 'gb2312'
data = response.text
html = etree.HTML(data)
div_tag = html.xpath('//div[@class="zhuti"]')
div=div_tag[0] # div_tag只有一项
lst = []
dic = {}
title = div.xpath('.//td[@class="title"]/a/text()')
piaofang = div.xpath('.//td[@class="piaofang"]/span/text()')
for t,p in zip(title,piaofang): # zip()作用是同时遍历两个列表
    dic['title'] = t
    dic['piaofang'] = p
    lst.append(dic)
    print(dic)

如果有帮助, 点个「采纳」按钮不谢

你字典的使用只是储存了一个键值对,我改好了,不清楚的地方可以问

img

import requests
from lxml import etree
import csv
url = 'http://www.piaofang.biz/'
headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Mobile Safari/537.36'}
response = requests.get(url, headers=headers)
response.encoding = 'gb2312'
data = response.text
# print(data)
html = etree.HTML(data)
div_tag = html.xpath('//div[@class="zhuti"]')

lst = []
for div in div_tag:
    dic = {}
    title = div.xpath('.//td[@class="title"]/a/text()')
    piaofang = div.xpath('.//td[@class="piaofang"]/span/text()')
    # ===============================修改
    for t in title:
        dic['title'] = t
    for p in piaofang:
        dic['piaofang'] = p
    # print(dic)
        lst.append(dic)
    # =====================================
print(lst)