python 中用 pandas读取的矩阵转成字典形式

如图所示,怎么把pandas 读取的矩阵,转化成如下字典形式.

img

这个第一列alternatives 中 a1 不止是到a5,真正数据集可能有几百个,所以行数是m,另外y的数量较少,只有三到四列。所以需要一个通用的代码。

几行代码的事,

import pandas as pd
##path是你的数据文件路径,其他代码不用改
path='D:/1.csv'
df1= pd.read_csv(path)


col1=df1.columns[1:]  ## 较少的Y列名
df1.index=df1.Alternatives
dic={(i,j):df1.loc[j,i] for i in col1 for j in df1.index}
print(dic)  ##打印结果


有Demo数据吗


代码:

import pandas as pd

_data = {'Y1': [0, 5, 0, 1, 4],
         'Y2': [5, 4, 2, 3, 1]}
_index = ['A1', 'A2', 'A3', 'A4', 'A5']
df = pd.DataFrame( _data, index=_index)

_dict = df.to_dict('dict')
print("原先字典为:\n", _dict)
result = {}
for y in _dict.keys():
    x_dict = _dict[y]
    for x in x_dict.keys():
        value = x_dict[x]
        result[(y, x)] = value
print("处理后的字典为:\n", result)
# {('Y1', 'A1'): 0, ('Y1', 'A2'): 5, ('Y1', 'A3'): 0, ('Y1', 'A4'): 1, ('Y1', 'A5'): 4, ('Y2', 'A1'): 5, ('Y2', 'A2'): 4, ('Y2', 'A3'): 2, ('Y2', 'A4'): 3, ('Y2', 'A5'): 1}

输出为

原先字典为:
 {'Y1': {'A1': 0, 'A2': 5, 'A3': 0, 'A4': 1, 'A5': 4}, 'Y2': {'A1': 5, 'A2': 4, 'A3': 2, 'A4': 3, 'A5': 1}}
处理后的字典为:
 {('Y1', 'A1'): 0, ('Y1', 'A2'): 5, ('Y1', 'A3'): 0, ('Y1', 'A4'): 1, ('Y1', 'A5'): 4, ('Y2', 'A1'): 5, ('Y2', 'A2'): 4, ('Y2', 'A3'): 2, ('Y2', 'A4'): 3, ('Y2', 'A5'): 1}

如有问题及时沟通

import pandas as pd

df = pd.DataFrame({'Y1': [0,5,0,1,4], 'Y2': [5,4,2,3,1]}, index=['A1','A2','A3','A4','A5'])

a = df.to_dict('dict')
b = {}
for i, j in a.items():
    for x,y in j.items():
        b[(i,x)]=y

print(b)

输出:

{('Y1', 'A1'): 0, ('Y1', 'A2'): 5, ('Y1', 'A3'): 0, ('Y1', 'A4'): 1, ('Y1', 'A5'): 4, ('Y2', 'A1'): 5, ('Y2', 'A2'): 4, ('Y2', 'A3'): 2, ('Y2', 'A4'): 3, ('Y2', 'A5'): 1}
import pandas as pd
df = pd.DataFrame({'Alternatives': ['A1', 'A2', 'A3', 'A4', 'A5'],
                  'Y1': [0, 5, 0, 1, 4],
                  'Y2': [5, 4, 2, 3, 1]})
G = {}
for i in range(df.shape[1]-1):
    for j in range(df.shape[0]):
        G.update({(df.columns[i+1], df.iloc[j,0]):df.iloc[j,i+1]})

img

>>> import pandas as pd
>>> df = pd.DataFrame({'Y1': [0,5,0,1,4], 'Y2': [5,4,2,3,1]}, index=['A1','A2','A3','A4','A5'])
>>> dic = {(i,x):y for x,y in j.items() for i,j in df.to_dict('dict').items()}
>>> dic
{('Y1', 'A1'): 5, ('Y2', 'A1'): 5, ('Y1', 'A2'): 4, ('Y2', 'A2'): 4, ('Y1', 'A3'): 2, ('Y2', 'A3'): 2, ('Y1', 'A4'): 3, ('Y2', 'A4'): 3, ('Y1', 'A5'): 1, ('Y2', 'A5'): 1}

先把原来的数据转换成这样的 {0:[Y1,Y2]},然后再转换成{(Y1,Y2):0},


df = pd.DataFrame({'Y1': [0,5,0,1,4], 'Y2': [5,4,2,3,1]}, index=['A1','A2','A3','A4','A5'])
dic=df.T.stack().to_dict()
print(dic)
>>> import numpy as np
>>> import pandas as pd
>>> def df2dict(df):
    result = dict()
    A = df[df.columns[0]].values
    for cname in df.columns[1:]:
        Y = np.repeat(np.array(cname), A.shape[0])
        result.update({k:v for k,v in zip(zip(Y,A), df[cname].values)})
    return result

>>> data = {
    'Alternatives': ['A1','A2','A3'],
    'Y1': [0,1,4],
    'Y2': [2,3,1],
    'Y3': [5,4,2]
}
>>> df = pd.DataFrame(data)
>>> df
  Alternatives  Y1  Y2  Y3
0           A1   0   2   5
1           A2   1   3   4
2           A3   4   1   2
>>> df2dict(df)
{('Y1', 'A1'): 0, 
('Y1', 'A2'): 1, 
('Y1', 'A3'): 4, 
('Y2', 'A1'): 2, 
('Y2', 'A2'): 3, 
('Y2', 'A3'): 1, 
('Y3', 'A1'): 5, 
('Y3', 'A2'): 4, 
('Y3', 'A3'): 2}

demo数据文件csv

Alternatives,Y1,Y2,Y3,Y4
A1,0,5,10,15
A2,5,4,3,2
A3,10,3,-4,-11
A4,15,2,-11,-24
A5,20,1,-18,-37
A6,25,0,-25,-50
A7,30,-1,-32,-63
A8,35,-2,-39,-76
A9,40,-3,-46,-89
A10,45,-4,-53,-102
A11,50,-5,-60,-115
A12,55,-6,-67,-128
A13,60,-7,-74,-141
A14,65,-8,-81,-154
A15,70,-9,-88,-167
A16,75,-10,-95,-180
A17,80,-11,-102,-193
A18,85,-12,-109,-206
A19,90,-13,-116,-219
A20,95,-14,-123,-232

import json

import pandas as pd

# 读取数据文件 假如是csv
df = pd.read_csv('./demo_data.csv')

# 设置索引列为Ym所在列
df.set_index('Alternatives', inplace=True)

# 转化成字典
df_dict = df.to_dict()

# 定义结果字典
result = {}

# 循环第一层
for first_key, first_value in df_dict.items():
    # 循环第二层
    for second_key, second_value in first_value.items():
        # 将第一层key和第二层key作为新key,存储第二层value
        result[f"{first_key, second_key}"] = second_value
        # print(first_key, second_key, second_value)

print(json.dumps(result, indent=4))

输出结果如下

{
    "('Y1', 'A1')": 0,
    "('Y1', 'A2')": 5,
    "('Y1', 'A3')": 10,
    "('Y1', 'A4')": 15,
    "('Y1', 'A5')": 20,
    "('Y1', 'A6')": 25,
    "('Y1', 'A7')": 30,
    "('Y1', 'A8')": 35,
    "('Y1', 'A9')": 40,
    "('Y1', 'A10')": 45,
    "('Y1', 'A11')": 50,
    "('Y1', 'A12')": 55,
    "('Y1', 'A13')": 60,
    "('Y1', 'A14')": 65,
    "('Y1', 'A15')": 70,
    "('Y1', 'A16')": 75,
    "('Y1', 'A17')": 80,
    "('Y1', 'A18')": 85,
    "('Y1', 'A19')": 90,
    "('Y1', 'A20')": 95,
    "('Y2', 'A1')": 5,
    "('Y2', 'A2')": 4,
    "('Y2', 'A3')": 3,
    "('Y2', 'A4')": 2,
    "('Y2', 'A5')": 1,
    "('Y2', 'A6')": 0,
    "('Y2', 'A7')": -1,
    "('Y2', 'A8')": -2,
    "('Y2', 'A9')": -3,
    "('Y2', 'A10')": -4,
    "('Y2', 'A11')": -5,
    "('Y2', 'A12')": -6,
    "('Y2', 'A13')": -7,
    "('Y2', 'A14')": -8,
    "('Y2', 'A15')": -9,
    "('Y2', 'A16')": -10,
    "('Y2', 'A17')": -11,
    "('Y2', 'A18')": -12,
    "('Y2', 'A19')": -13,
    "('Y2', 'A20')": -14,
    "('Y3', 'A1')": 10,
    "('Y3', 'A2')": 3,
    "('Y3', 'A3')": -4,
    "('Y3', 'A4')": -11,
    "('Y3', 'A5')": -18,
    "('Y3', 'A6')": -25,
    "('Y3', 'A7')": -32,
    "('Y3', 'A8')": -39,
    "('Y3', 'A9')": -46,
    "('Y3', 'A10')": -53,
    "('Y3', 'A11')": -60,
    "('Y3', 'A12')": -67,
    "('Y3', 'A13')": -74,
    "('Y3', 'A14')": -81,
    "('Y3', 'A15')": -88,
    "('Y3', 'A16')": -95,
    "('Y3', 'A17')": -102,
    "('Y3', 'A18')": -109,
    "('Y3', 'A19')": -116,
    "('Y3', 'A20')": -123,
    "('Y4', 'A1')": 15,
    "('Y4', 'A2')": 2,
    "('Y4', 'A3')": -11,
    "('Y4', 'A4')": -24,
    "('Y4', 'A5')": -37,
    "('Y4', 'A6')": -50,
    "('Y4', 'A7')": -63,
    "('Y4', 'A8')": -76,
    "('Y4', 'A9')": -89,
    "('Y4', 'A10')": -102,
    "('Y4', 'A11')": -115,
    "('Y4', 'A12')": -128,
    "('Y4', 'A13')": -141,
    "('Y4', 'A14')": -154,
    "('Y4', 'A15')": -167,
    "('Y4', 'A16')": -180,
    "('Y4', 'A17')": -193,
    "('Y4', 'A18')": -206,
    "('Y4', 'A19')": -219,
    "('Y4', 'A20')": -232
}




import pandas as pd
##df1 可以通过csv 或者 excel 读进来 
df1= pd.DataFrame({'Alternatives': ['A1', 'A2', 'A3', 'A4', 'A5'],
                  'Y1': [0, 5, 0, 1, 4],
                  'Y2': [5, 4, 2, 3, 1]})

col1=['Y1','Y2']  ## 较少的Y列名
col2=df1.Alternatives  ##任意长的 A1-An

dic={}
for i in col1:
    for j in col2:
        k=(i,j)  ##行列组成的字典key
        v=df1[df1.Alternatives==j][i].values[0]  ##字典值
        dic[k]=v

print(dic)  ##打印结果

a=pd.DataFrame({"Y1":[1,2,0],"Y2":[0,9,6]},index=["A1","A2","A3"])
    r=dict()
    for  k,v in a.to_dict().items():
        for  k_,v_ in v.items():
            r[(k,k_)]=v_
    print(r)