如图所示,怎么把pandas 读取的矩阵,转化成如下字典形式.
这个第一列alternatives 中 a1 不止是到a5,真正数据集可能有几百个,所以行数是m,另外y的数量较少,只有三到四列。所以需要一个通用的代码。
几行代码的事,
import pandas as pd
##path是你的数据文件路径,其他代码不用改
path='D:/1.csv'
df1= pd.read_csv(path)
col1=df1.columns[1:] ## 较少的Y列名
df1.index=df1.Alternatives
dic={(i,j):df1.loc[j,i] for i in col1 for j in df1.index}
print(dic) ##打印结果
有Demo数据吗
代码:
import pandas as pd
_data = {'Y1': [0, 5, 0, 1, 4],
'Y2': [5, 4, 2, 3, 1]}
_index = ['A1', 'A2', 'A3', 'A4', 'A5']
df = pd.DataFrame( _data, index=_index)
_dict = df.to_dict('dict')
print("原先字典为:\n", _dict)
result = {}
for y in _dict.keys():
x_dict = _dict[y]
for x in x_dict.keys():
value = x_dict[x]
result[(y, x)] = value
print("处理后的字典为:\n", result)
# {('Y1', 'A1'): 0, ('Y1', 'A2'): 5, ('Y1', 'A3'): 0, ('Y1', 'A4'): 1, ('Y1', 'A5'): 4, ('Y2', 'A1'): 5, ('Y2', 'A2'): 4, ('Y2', 'A3'): 2, ('Y2', 'A4'): 3, ('Y2', 'A5'): 1}
输出为:
原先字典为:
{'Y1': {'A1': 0, 'A2': 5, 'A3': 0, 'A4': 1, 'A5': 4}, 'Y2': {'A1': 5, 'A2': 4, 'A3': 2, 'A4': 3, 'A5': 1}}
处理后的字典为:
{('Y1', 'A1'): 0, ('Y1', 'A2'): 5, ('Y1', 'A3'): 0, ('Y1', 'A4'): 1, ('Y1', 'A5'): 4, ('Y2', 'A1'): 5, ('Y2', 'A2'): 4, ('Y2', 'A3'): 2, ('Y2', 'A4'): 3, ('Y2', 'A5'): 1}
如有问题及时沟通
import pandas as pd
df = pd.DataFrame({'Y1': [0,5,0,1,4], 'Y2': [5,4,2,3,1]}, index=['A1','A2','A3','A4','A5'])
a = df.to_dict('dict')
b = {}
for i, j in a.items():
for x,y in j.items():
b[(i,x)]=y
print(b)
输出:
{('Y1', 'A1'): 0, ('Y1', 'A2'): 5, ('Y1', 'A3'): 0, ('Y1', 'A4'): 1, ('Y1', 'A5'): 4, ('Y2', 'A1'): 5, ('Y2', 'A2'): 4, ('Y2', 'A3'): 2, ('Y2', 'A4'): 3, ('Y2', 'A5'): 1}
import pandas as pd
df = pd.DataFrame({'Alternatives': ['A1', 'A2', 'A3', 'A4', 'A5'],
'Y1': [0, 5, 0, 1, 4],
'Y2': [5, 4, 2, 3, 1]})
G = {}
for i in range(df.shape[1]-1):
for j in range(df.shape[0]):
G.update({(df.columns[i+1], df.iloc[j,0]):df.iloc[j,i+1]})
>>> import pandas as pd
>>> df = pd.DataFrame({'Y1': [0,5,0,1,4], 'Y2': [5,4,2,3,1]}, index=['A1','A2','A3','A4','A5'])
>>> dic = {(i,x):y for x,y in j.items() for i,j in df.to_dict('dict').items()}
>>> dic
{('Y1', 'A1'): 5, ('Y2', 'A1'): 5, ('Y1', 'A2'): 4, ('Y2', 'A2'): 4, ('Y1', 'A3'): 2, ('Y2', 'A3'): 2, ('Y1', 'A4'): 3, ('Y2', 'A4'): 3, ('Y1', 'A5'): 1, ('Y2', 'A5'): 1}
先把原来的数据转换成这样的 {0:[Y1,Y2]},然后再转换成{(Y1,Y2):0},
df = pd.DataFrame({'Y1': [0,5,0,1,4], 'Y2': [5,4,2,3,1]}, index=['A1','A2','A3','A4','A5'])
dic=df.T.stack().to_dict()
print(dic)
>>> import numpy as np
>>> import pandas as pd
>>> def df2dict(df):
result = dict()
A = df[df.columns[0]].values
for cname in df.columns[1:]:
Y = np.repeat(np.array(cname), A.shape[0])
result.update({k:v for k,v in zip(zip(Y,A), df[cname].values)})
return result
>>> data = {
'Alternatives': ['A1','A2','A3'],
'Y1': [0,1,4],
'Y2': [2,3,1],
'Y3': [5,4,2]
}
>>> df = pd.DataFrame(data)
>>> df
Alternatives Y1 Y2 Y3
0 A1 0 2 5
1 A2 1 3 4
2 A3 4 1 2
>>> df2dict(df)
{('Y1', 'A1'): 0,
('Y1', 'A2'): 1,
('Y1', 'A3'): 4,
('Y2', 'A1'): 2,
('Y2', 'A2'): 3,
('Y2', 'A3'): 1,
('Y3', 'A1'): 5,
('Y3', 'A2'): 4,
('Y3', 'A3'): 2}
demo数据文件csv
Alternatives,Y1,Y2,Y3,Y4
A1,0,5,10,15
A2,5,4,3,2
A3,10,3,-4,-11
A4,15,2,-11,-24
A5,20,1,-18,-37
A6,25,0,-25,-50
A7,30,-1,-32,-63
A8,35,-2,-39,-76
A9,40,-3,-46,-89
A10,45,-4,-53,-102
A11,50,-5,-60,-115
A12,55,-6,-67,-128
A13,60,-7,-74,-141
A14,65,-8,-81,-154
A15,70,-9,-88,-167
A16,75,-10,-95,-180
A17,80,-11,-102,-193
A18,85,-12,-109,-206
A19,90,-13,-116,-219
A20,95,-14,-123,-232
import json
import pandas as pd
# 读取数据文件 假如是csv
df = pd.read_csv('./demo_data.csv')
# 设置索引列为Ym所在列
df.set_index('Alternatives', inplace=True)
# 转化成字典
df_dict = df.to_dict()
# 定义结果字典
result = {}
# 循环第一层
for first_key, first_value in df_dict.items():
# 循环第二层
for second_key, second_value in first_value.items():
# 将第一层key和第二层key作为新key,存储第二层value
result[f"{first_key, second_key}"] = second_value
# print(first_key, second_key, second_value)
print(json.dumps(result, indent=4))
输出结果如下
{
"('Y1', 'A1')": 0,
"('Y1', 'A2')": 5,
"('Y1', 'A3')": 10,
"('Y1', 'A4')": 15,
"('Y1', 'A5')": 20,
"('Y1', 'A6')": 25,
"('Y1', 'A7')": 30,
"('Y1', 'A8')": 35,
"('Y1', 'A9')": 40,
"('Y1', 'A10')": 45,
"('Y1', 'A11')": 50,
"('Y1', 'A12')": 55,
"('Y1', 'A13')": 60,
"('Y1', 'A14')": 65,
"('Y1', 'A15')": 70,
"('Y1', 'A16')": 75,
"('Y1', 'A17')": 80,
"('Y1', 'A18')": 85,
"('Y1', 'A19')": 90,
"('Y1', 'A20')": 95,
"('Y2', 'A1')": 5,
"('Y2', 'A2')": 4,
"('Y2', 'A3')": 3,
"('Y2', 'A4')": 2,
"('Y2', 'A5')": 1,
"('Y2', 'A6')": 0,
"('Y2', 'A7')": -1,
"('Y2', 'A8')": -2,
"('Y2', 'A9')": -3,
"('Y2', 'A10')": -4,
"('Y2', 'A11')": -5,
"('Y2', 'A12')": -6,
"('Y2', 'A13')": -7,
"('Y2', 'A14')": -8,
"('Y2', 'A15')": -9,
"('Y2', 'A16')": -10,
"('Y2', 'A17')": -11,
"('Y2', 'A18')": -12,
"('Y2', 'A19')": -13,
"('Y2', 'A20')": -14,
"('Y3', 'A1')": 10,
"('Y3', 'A2')": 3,
"('Y3', 'A3')": -4,
"('Y3', 'A4')": -11,
"('Y3', 'A5')": -18,
"('Y3', 'A6')": -25,
"('Y3', 'A7')": -32,
"('Y3', 'A8')": -39,
"('Y3', 'A9')": -46,
"('Y3', 'A10')": -53,
"('Y3', 'A11')": -60,
"('Y3', 'A12')": -67,
"('Y3', 'A13')": -74,
"('Y3', 'A14')": -81,
"('Y3', 'A15')": -88,
"('Y3', 'A16')": -95,
"('Y3', 'A17')": -102,
"('Y3', 'A18')": -109,
"('Y3', 'A19')": -116,
"('Y3', 'A20')": -123,
"('Y4', 'A1')": 15,
"('Y4', 'A2')": 2,
"('Y4', 'A3')": -11,
"('Y4', 'A4')": -24,
"('Y4', 'A5')": -37,
"('Y4', 'A6')": -50,
"('Y4', 'A7')": -63,
"('Y4', 'A8')": -76,
"('Y4', 'A9')": -89,
"('Y4', 'A10')": -102,
"('Y4', 'A11')": -115,
"('Y4', 'A12')": -128,
"('Y4', 'A13')": -141,
"('Y4', 'A14')": -154,
"('Y4', 'A15')": -167,
"('Y4', 'A16')": -180,
"('Y4', 'A17')": -193,
"('Y4', 'A18')": -206,
"('Y4', 'A19')": -219,
"('Y4', 'A20')": -232
}
import pandas as pd
##df1 可以通过csv 或者 excel 读进来
df1= pd.DataFrame({'Alternatives': ['A1', 'A2', 'A3', 'A4', 'A5'],
'Y1': [0, 5, 0, 1, 4],
'Y2': [5, 4, 2, 3, 1]})
col1=['Y1','Y2'] ## 较少的Y列名
col2=df1.Alternatives ##任意长的 A1-An
dic={}
for i in col1:
for j in col2:
k=(i,j) ##行列组成的字典key
v=df1[df1.Alternatives==j][i].values[0] ##字典值
dic[k]=v
print(dic) ##打印结果
a=pd.DataFrame({"Y1":[1,2,0],"Y2":[0,9,6]},index=["A1","A2","A3"])
r=dict()
for k,v in a.to_dict().items():
for k_,v_ in v.items():
r[(k,k_)]=v_
print(r)