请问有人知道为啥我的数据全为空吗?

1这是爬取英雄联盟所有英雄和技能,并保存为json格式


from selenium import webdriver
from lxml import etree
import requests, json

driver = webdriver.Chrome()
base_url = 'https://lol.qq.com/data/info-heros.shtml'
driver.get(base_url)
html = etree.HTML(driver.page_source)
# //*[@id="jSearchHeroDiv"]/li[1]/a
# //*[@id="jSearchHeroDiv"]/li[2]/a
hero_url_list = html.xpath('.//ul[@id="jSearchHeroDiv"]/li/a/@href')
hero_list = []  # 存放所有英雄的列表
for hero_url in hero_url_list:
    id = hero_url.split('=')[-1]
    # print(id)
    detail_url = 'https://game.gtimg.cn/images/lol/act/img/js/hero/' + id + '.js'

    # print(detail_url)
    headers = {
        'Referer': 'https://lol.qq.com/data/info-defail.shtml?id =4',
        'Sec-Fetch-Mode': 'cors',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'
    }
    response = requests.get(detail_url, headers=headers)
    n = json.loads(response.text)
    # print(n)
    hero = []  # 存放单个英雄
    item_name = {}
    item_name['英雄名字'] = n['hero']['name'] + ' ' + n['hero']['title']
    hero.append(item_name)
    for i in n['spells']:  # 技能
        item_skill = {}
        item_skill['技能名字'] = i['name']
        item_skill['技能描述'] = i['description']
        hero.append(item_skill)
    hero_list.append(hero)
    # print(hero_list)
with open('hero.json', 'w') as file:
    json.dump(hero_list, file)
# 将json解码后打印
with open('hero.json', 'r') as file:
    hero_list = json.load(file)
    for heroo in hero_list:
        print(heroo)

2这是将json格式的数据读取出来并保存到excel中

import pandas as pd
import json

columns = ['英雄', '技能1', '被动', '技能2', '技能3', '技能4']
df = []
with open('hero.json', 'r', encoding='utf-8') as f:
    data_dict = json.load(f)
    for key in data_dict[0]:
        df.append(key)
print(df)
df = pd.read_json('hero.json', orient='records', encoding='utf-8')  # 读取json数据
print(df)
d = pd.DataFrame(df, columns=columns)
d.to_excel('英雄联盟英雄详情.xlsx', index=False)
print(d)

以上为两个.py文件
但是运行第二个程序后,print(d)的结果中间是nan(空),请问在座的各位有人懂这方面的知识吗?

下面取其中一种就行,read_json是已经处理过的,直接可以to_excel。read_json指定orient='records',json数据源格式列名称不对,要统一一下

img
来源: https://blog.csdn.net/qq_41562377/article/details/90203805 https://blog.csdn.net/qq_41562377/article/details/90203805

效果如下,有帮助或启发麻烦点个采纳【本回答右上角】,谢谢~~

img

img


import pandas as pd
import json
columns = ['英雄', '技能1', '被动', '技能2', '技能3', '技能4']
df = []
with open('hero.json', 'r', encoding='utf-8') as f:
    df = json.load(f)
d = pd.DataFrame(df, columns=columns)
d.to_excel('英雄联盟英雄详情.xlsx', index=False)
print(d)

或者

import pandas as pd
df = pd.read_json('hero.json', orient='records', encoding='utf-8')  # 读取json数据
df.to_excel('英雄联盟英雄详情.xlsx', index=False)
print(df)

从json中读取到数据后,由于是一个字典列表,想要获取每列字典值的话,需要用apply函数对每列进行处理。写入excel的函数这样写就可以了:

import pandas as pd
import json

df = pd.read_json('hero.json', orient='records', encoding='utf-8')  # 读取json数据 
df.columns = ['英雄', '技能1', '被动', '技能2', '技能3', '技能4']
for x in df.columns:
    df[x]=df[x].apply(lambda x:''.join(list(x.values())))
df.to_excel('hero.xlsx',index=False)

如果解答对你有帮助,请点击我回答的右上方采纳按钮给予采纳,谢谢~~