import requests
from lxml import etree
import urllib
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36 Edg/93.0.961.47',
'Referer': 'https://cn.bing.com/',
}
def get_url(url):
response = requests.get(url=url, headers=headers)
return response.text
def resolve(response):
html = etree.HTML(response)
title = html.xpath('//[@id="navjz"]/ul/li/a/span/text()')
date = html.xpath('//[@id="navjz"]/ul/li/span/text()')
address = html.xpath('//*[@id="navjz"]/ul/li/a/@href')
for zip_link in zip(title, date, address):
dic = {
'标题': zip_link[0],
'日期': zip_link[1],
'连接': zip_link[2],
}
for key, values in dic.items():
values = "".join([v.strip() for v in values])
print(f"{key}: {values}")
def main():
start=int(input("输入开始页码"))
end = int(input("输入结束页码"))
for i in range(start,end+1):
print(f'爬取中第{i}页')
url = f"http://sjy.mas.gov.cn/content/column/4716972?pageIndex=%22+str(i)
re_url = get_url(url=url)
resolve(response=re_url)
print(f'\n第{i}页爬取完成')
if name == 'main':
main()
源代码在这里,想把数据存储到Excel中,请问该添加哪些代码?
python 操作excel,参考一下
https://www.cnblogs.com/zhoujie/p/python18.html
你好,如果没有学python操作excel,或者不会的话,可以将数据存为csv文件,因为execel可以导入csv文件,并且csv文件内容也可以直接导入数据库,都是支持的,下面这个例子写的很清楚,如何存为csv文件,你自己按这个思想到代码中就行!
有帮助的话采纳一下哦!
with open("商品数据.csv", "w", encoding="utf-8", newline='') as csvfile:
fieldnames = ["用户名", "用户等级", "评价星级", "评论内容", "产品颜色", "产品名称", "评价时间"]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
...
# range里是取从第几页到第n-1页
for i in range(0, 121):
...
for c in comments:
temp = {}
temp['用户名'] = c['nickname'] # 用户名
temp['用户等级'] = c['plusAvailable'] # 用户等级
temp['评价星级'] = c['score'] # 评价星级
temp['评论内容'] = c['content'].replace("\n","") # 评论内容
temp['产品颜色'] = c['productColor'] # 产品颜色
temp['产品名称'] = c['referenceName'] # 产品名称
temp['评价时间'] = c['creationTime'] # 评价时间
#print(temp)
writer.writerow(temp)
print(f"id={id}的商品第{i+1}页存储完成!!!")
百度一下,py操作数据库。