采集指定网站菜价、如何能以excel表格下载下来 。

采集采集只要当天的价格,比如最新发布的时间,每天都要更新需求,做个文件出来自动下载。
采集网址http://www.xinfadi.com.cn/priceDetail.html


import requests
import openpyxl

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36"
}


def write_data(json: dict, file_name: str):
    """
    创建一个file_name.xlsx来存储json
    :param json:
    :param file_name:
    :return:
    """
    data_array = json['list']
    label_array = ["品名", "最低价", "平均价", "最高价", "规格", "产地", "单位", "发布日期"]
    book = openpyxl.Workbook()
    sheet = book.active
    sheet.append(label_array)
    for i in data_array:
        prodName = i['prodName']
        lowPrice = i["lowPrice"]
        avgPrice = i["avgPrice"]
        highPrice = i["highPrice"]
        specInfo = i["specInfo"]
        place = i["place"]
        unitInfo = i["unitInfo"]
        pubDate = i["pubDate"]
        array = [prodName, lowPrice, avgPrice, highPrice, specInfo, place, unitInfo, pubDate]
        sheet.append(array)
    book.save(f"{file_name}.xlsx")
    print(f"{file_name}.xlsx 下载完成!")


class Spider:
    def __init__(self):
        self.url = "http://www.xinfadi.com.cn/getCat.html"
        self.json = {"蔬菜": 1186, "水果": 1187, "肉禽蛋": 1189, "水产": 1190, "粮油": 1188, "豆制品": 1203, "调料": 1204}
        self.session = requests.session()

    def get_data(self, num_id: int):
        """
        获取选择的数据
        :param num_id:
        :return:
        """
        data = {
            "prodCatid": num_id
        }
        req = self.session.post(url=self.url, data=data, headers=headers)
        json = req.json()
        req.close()
        return json

    def download(self):
        keys = list(self.json.keys())
        for i in range(len(keys)):
            print(i, keys[i])
        num = input("输入下标:")
        the_id = self.json[keys[eval(num)]]
        the_json = self.get_data(the_id)
        write_data(json=the_json, file_name=keys[eval(num)])


if __name__ == '__main__':
    User = Spider()
    User.download()

img

img

img

运行程序,会在当前目录生成一个.xlsx文件,就是你需要的数据文件
需要你安装一下openpyxl 和 requests
有用的话点一下采纳


#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author: Roc-xb
"""
import requests
import openpyxl
import time


def write_excel(dataList, filename):
    label_array = ["品名", "最低价", "平均价", "最高价", "规格", "产地", "单位", "发布日期"]
    book = openpyxl.Workbook()
    sheet = book.active
    sheet.append(label_array)
    for item in dataList:
        prodName = item['prodName']
        lowPrice = item["lowPrice"]
        avgPrice = item["avgPrice"]
        highPrice = item["highPrice"]
        specInfo = item["specInfo"]
        place = item["place"]
        unitInfo = item["unitInfo"]
        pubDate = item["pubDate"]
        row = [prodName, lowPrice, avgPrice, highPrice, specInfo, place, unitInfo, pubDate]
        sheet.append(row)
    book.save(f"{filename}.xlsx")


if __name__ == '__main__':
    today = time.strftime("%Y/%m/%d", time.localtime())
    filename = time.strftime("%Y年%m月%d日", time.localtime())
    print("今日是:", today)
    url = "http://www.xinfadi.com.cn/getPriceData.html"
    payload = "limit=1000&current=1&pubDateStartTime={}&pubDateEndTime={}&prodPcatid=&prodCatid=&prodName=".format(
        today, today)
    headers = {
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36'
    }
    response = requests.post(url, headers=headers, data=payload).json()
    dataList = response['list']
    print("今日查询结果条数:", len(dataList))
    write_excel(dataList, filename)

img

img

直接调接口 处理数据 用xlwt 库写数据到excel

img

看页面抓取分析也挺简单的,直接请求里面的api 解析json 然后就拿到数据源了,剩下的就是转excel了

请参考:
股票和菜是一样的
https://blog.csdn.net/qq285679784/article/details/109229295