python简化for loop

问题遇到的现象和发生背景

我想简化for loop

问题相关代码,请勿粘贴截图

import pandas as pd
import random

df_norms = pd.DataFrame()
df = pd.DataFrame({'Individual_Scale' : ['Scale A','Scale B','Scale C','Scale D','Scale E','Scale F'],
'Score' : [random.random()*100 for _ in range(6)]})

for scale in ['Scale A','Scale B','Scale C','Scale D','Scale E','Scale F']:
df_norms[scale] = sorted([random.random()*100 for _ in range(100)])
df = df.append(pd.Series([scale,random.random()*100], index = df.columns), ignore_index=True)

df_norms = df_norms.T.reset_index()
df_norms = df_norms.round(2)
df_norms

简化下面的for loop
def assesment_function(df,df_norms):
perc = []

for i in range(len(df)):
    for j in range(len(df_norms)):
        if df['Individual_Scale'][i] == df_norms.iloc[j,0]:
            k=1
            while df['Score'][i] >= df_norms.iloc[j,k]:
                if k == 100:
                    break
                else:
                    k += 1
            perc.append(k-1)
df['assesment'] = perc
return df

assesment_function(df,df_norms)

我想要达到的结果

img

最外面两层for没法简化,你就是需要三层for
但是中间的while可以简化

#原始代码
k=1
while df['Score'][i] >= df_norms.iloc[j,k]:
    if k == 100:
        break
    else:
        k += 1
perc.append(k-1)

#简化后的代码
for k in range(1,101):
    if df['Score'][i] < df_norms.iloc[j,k]:
        perc.append(k)
        break
import pandas as pd
import random

df_norms = pd.DataFrame()
df = pd.DataFrame({'Individual_Scale' : ['Scale A','Scale B','Scale C','Scale D','Scale E','Scale F'],
'Score' : [random.random()*100 for _ in range(6)]})
for scale in ['Scale A','Scale B','Scale C','Scale D','Scale E','Scale F']:
    df_norms[scale] = sorted([random.random()*100 for _ in range(100)])
    d = pd.DataFrame([[scale,random.random()*100]], columns = df.columns)  
    df = pd.concat([df, d], ignore_index = True)
df_norms = df_norms.T.reset_index()
df_norms = df_norms.round(2)    

def assesment_function1(df,df_norms):
    def fun(sel):
        res = df_norms[df_norms['index']==sel[0]].values.tolist()
        return len([i for i in res[0][1:] if i < sel[1]])
    df['assesment'] = df.apply(fun, axis = 1)
    return df
res = assesment_function1(df, df_norms)
print(res)


"""
--result
   Individual_Scale      Score  assesment
0           Scale A  59.052440         52
1           Scale B  73.205221         70
2           Scale C  37.023651         39
3           Scale D  64.912138         70
4           Scale E  83.933522         86
5           Scale F  87.561825         84
6           Scale A  27.724605         32
7           Scale B  54.371713         50
8           Scale C  29.194051         35
9           Scale D  51.997275         53
10          Scale E  40.301953         37
11          Scale F  39.797092         39
"""

给一个简化版的,一次循环就可以解决问题,起始就是按照Scale X取df中的score,再取df_norms中的对应行,计算一行中小于score的数量,在numpy中有where函数可以直接计算,不用循环

import pandas as pd
import numpy as np
import random

df_norms = pd.DataFrame()
df = pd.DataFrame({'Individual_Scale' : ['Scale A','Scale B','Scale C','Scale D','Scale E','Scale F'],'Score' : [random.random()*100 for _ in range(6)]})

for scale in ['Scale A','Scale B','Scale C','Scale D','Scale E','Scale F']:
    df_norms[scale] = sorted([random.random()*100 for _ in range(100)])
    data = pd.DataFrame({'Individual_Scale' : [scale],'Score':random.random()*100})
    df = pd.concat([df, data], ignore_index = True)

df_norms = df_norms.T.reset_index()
df_norms = df_norms.round(2)

def assesment_function(df,df_norms):
    df_norms.index = df_norms['index'] #把df_norms的index设置为索引,方便取值
    perc = []
    for i in range(len(df['Individual_Scale'])): #遍历df的Individual_Scale列
        scale = df['Individual_Scale'][i] 
        score = df.iloc[i,1] #取对应df列的score
        line_norms = np.array(df_norms.loc[scale][1:]) #取df_norms中对应行的数据
        perc.append(len(np.where(line_norms<score)[0])) #计算行中小于score的数量
    df['assesment'] = perc
    return df
df1 = assesment_function(df,df_norms)
print(df1)

pandas 使用index遍历不慢才怪, 你可以使用, apply itertuples 这些遍历性能能大约提升10-50倍

甚至不用for循环

import pandas as pd
import random
import numpy as np

df_norms = pd.DataFrame()
df = pd.DataFrame({'Individual_Scale' : ['Scale A','Scale B','Scale C','Scale D','Scale E','Scale F'],
'Score' : [random.random()*100 for _ in range(6)]})

for scale in ['Scale A','Scale B','Scale C','Scale D','Scale E','Scale F']:
    df_norms[scale] = sorted([random.random()*100 for _ in range(100)])
    df = df.append(pd.Series([scale,random.random()*100], index = df.columns), ignore_index=True)

df_norms = df_norms.T
data_arr=list(np.array(df_norms).round(2))
df_norms = pd.DataFrame(data={'Individual_Scale':df_norms.index,'zip_data':data_arr})

df_merge=pd.merge(df,df_norms,on='Individual_Scale')
zip_series=df_merge['Score'].apply(lambda x:[x])+df_merge['zip_data'].apply(lambda x:[x])
assessment=zip_series.apply(lambda x:(x[1]<=x[0]).sum())
df_merge['assessment']=assessment
del df_merge['zip_data']
print(df_merge)

emmm前面人答的我看好一楼和三楼的,如果还有问题可以联系我作答