我想简化for loop
import pandas as pd
import random
df_norms = pd.DataFrame()
df = pd.DataFrame({'Individual_Scale' : ['Scale A','Scale B','Scale C','Scale D','Scale E','Scale F'],
'Score' : [random.random()*100 for _ in range(6)]})
for scale in ['Scale A','Scale B','Scale C','Scale D','Scale E','Scale F']:
df_norms[scale] = sorted([random.random()*100 for _ in range(100)])
df = df.append(pd.Series([scale,random.random()*100], index = df.columns), ignore_index=True)
df_norms = df_norms.T.reset_index()
df_norms = df_norms.round(2)
df_norms
简化下面的for loop
def assesment_function(df,df_norms):
perc = []
for i in range(len(df)):
for j in range(len(df_norms)):
if df['Individual_Scale'][i] == df_norms.iloc[j,0]:
k=1
while df['Score'][i] >= df_norms.iloc[j,k]:
if k == 100:
break
else:
k += 1
perc.append(k-1)
df['assesment'] = perc
return df
assesment_function(df,df_norms)
最外面两层for没法简化,你就是需要三层for
但是中间的while可以简化
#原始代码
k=1
while df['Score'][i] >= df_norms.iloc[j,k]:
if k == 100:
break
else:
k += 1
perc.append(k-1)
#简化后的代码
for k in range(1,101):
if df['Score'][i] < df_norms.iloc[j,k]:
perc.append(k)
break
import pandas as pd
import random
df_norms = pd.DataFrame()
df = pd.DataFrame({'Individual_Scale' : ['Scale A','Scale B','Scale C','Scale D','Scale E','Scale F'],
'Score' : [random.random()*100 for _ in range(6)]})
for scale in ['Scale A','Scale B','Scale C','Scale D','Scale E','Scale F']:
df_norms[scale] = sorted([random.random()*100 for _ in range(100)])
d = pd.DataFrame([[scale,random.random()*100]], columns = df.columns)
df = pd.concat([df, d], ignore_index = True)
df_norms = df_norms.T.reset_index()
df_norms = df_norms.round(2)
def assesment_function1(df,df_norms):
def fun(sel):
res = df_norms[df_norms['index']==sel[0]].values.tolist()
return len([i for i in res[0][1:] if i < sel[1]])
df['assesment'] = df.apply(fun, axis = 1)
return df
res = assesment_function1(df, df_norms)
print(res)
"""
--result
Individual_Scale Score assesment
0 Scale A 59.052440 52
1 Scale B 73.205221 70
2 Scale C 37.023651 39
3 Scale D 64.912138 70
4 Scale E 83.933522 86
5 Scale F 87.561825 84
6 Scale A 27.724605 32
7 Scale B 54.371713 50
8 Scale C 29.194051 35
9 Scale D 51.997275 53
10 Scale E 40.301953 37
11 Scale F 39.797092 39
"""
给一个简化版的,一次循环就可以解决问题,起始就是按照Scale X取df中的score,再取df_norms中的对应行,计算一行中小于score的数量,在numpy中有where函数可以直接计算,不用循环
import pandas as pd
import numpy as np
import random
df_norms = pd.DataFrame()
df = pd.DataFrame({'Individual_Scale' : ['Scale A','Scale B','Scale C','Scale D','Scale E','Scale F'],'Score' : [random.random()*100 for _ in range(6)]})
for scale in ['Scale A','Scale B','Scale C','Scale D','Scale E','Scale F']:
df_norms[scale] = sorted([random.random()*100 for _ in range(100)])
data = pd.DataFrame({'Individual_Scale' : [scale],'Score':random.random()*100})
df = pd.concat([df, data], ignore_index = True)
df_norms = df_norms.T.reset_index()
df_norms = df_norms.round(2)
def assesment_function(df,df_norms):
df_norms.index = df_norms['index'] #把df_norms的index设置为索引,方便取值
perc = []
for i in range(len(df['Individual_Scale'])): #遍历df的Individual_Scale列
scale = df['Individual_Scale'][i]
score = df.iloc[i,1] #取对应df列的score
line_norms = np.array(df_norms.loc[scale][1:]) #取df_norms中对应行的数据
perc.append(len(np.where(line_norms<score)[0])) #计算行中小于score的数量
df['assesment'] = perc
return df
df1 = assesment_function(df,df_norms)
print(df1)
pandas 使用index遍历不慢才怪, 你可以使用, apply itertuples 这些遍历性能能大约提升10-50倍
甚至不用for循环
import pandas as pd
import random
import numpy as np
df_norms = pd.DataFrame()
df = pd.DataFrame({'Individual_Scale' : ['Scale A','Scale B','Scale C','Scale D','Scale E','Scale F'],
'Score' : [random.random()*100 for _ in range(6)]})
for scale in ['Scale A','Scale B','Scale C','Scale D','Scale E','Scale F']:
df_norms[scale] = sorted([random.random()*100 for _ in range(100)])
df = df.append(pd.Series([scale,random.random()*100], index = df.columns), ignore_index=True)
df_norms = df_norms.T
data_arr=list(np.array(df_norms).round(2))
df_norms = pd.DataFrame(data={'Individual_Scale':df_norms.index,'zip_data':data_arr})
df_merge=pd.merge(df,df_norms,on='Individual_Scale')
zip_series=df_merge['Score'].apply(lambda x:[x])+df_merge['zip_data'].apply(lambda x:[x])
assessment=zip_series.apply(lambda x:(x[1]<=x[0]).sum())
df_merge['assessment']=assessment
del df_merge['zip_data']
print(df_merge)
emmm前面人答的我看好一楼和三楼的,如果还有问题可以联系我作答