import pandas as pd
from numpy import nan as NA
columns -['chinese', 'math', 'english']
data=pdDataFrame([[81.5,76.5,73.5],[71,68,NA],[71.,68, NA], [NA, NA NA],[NA, 96.5,93.5]],columns=columns) data
1、保留non-na值>=2的行,删除重复行,将处理后的结果赋值给data_new
2、应用'chinese’'english'的均值填充剩下的nac
chinese math english
0 81.50 76.5 73.5
1 71.00 68.0 83.5
4 76.25 96.5 93.5
import pandas as pd
import numpy as np
columns = ['chinese', 'math', 'english']
data = pd.DataFrame([[81.5, 76.5, 73.5], [71, 68, np.nan], [71, 68, np.nan], [np.nan, np.nan, np.nan], [np.nan, 96.5, 93.5]], columns=columns)
# 1. 保留非 NA 值>=2的行,删除重复行,将处理后的结果赋值给 data_new
data_new = data.dropna(thresh=2).drop_duplicates()
# 2. 应用 'chinese' 和 'english' 的均值填充剩下的 NA
data_new['chinese'].fillna(data_new['chinese'].mean(), inplace=True)
data_new['english'].fillna(data_new['english'].mean(), inplace=True)
# 打印处理后的结果
print(data_new)
import pandas as pd
import numpy as np
columns = ['chinese', 'math', 'english']
data = pd.DataFrame([[81.5, 76.5, 73.5], [71, 68, np.nan], [71, 68, np.nan], [np.nan, np.nan, np.nan], [np.nan, 96.5, 93.5]], columns=columns)
# 1. 保留非 NA 值>=2的行,删除重复行,将处理后的结果赋值给 data_new
data_new = data.dropna(thresh=2).drop_duplicates()
# 2. 应用 'chinese' 和 'english' 的均值填充剩下的 NA
data_new['chinese'].fillna(data_new['chinese'].mean(), inplace=True)
data_new['english'].fillna(data_new['english'].mean(), inplace=True)
# 计算平均分
data_new['mean']=round(data_new[columns].mean(axis=1),2)
#设置等级
def setLevel(v):
if v<60:
return 'failed'
if 60<=v<75:
return 'median'
if 75<=v<85:
return 'great'
return 'excellent'
data_new["Level"]=data_new["mean"].apply(setLevel)#计算学生等级放到Level列
print(data_new)#打印结果
将练习3中处理后的数据,计算三位学生三科的平均分,并将平均分添加在data new的mean列中
2、将平均分data new mean 按照“mean-60-failed,60-<mean <75-
median75 s-<mean<85-great85-mean-excellent”的次序,确定三位学生的评级,
一起的这两个问题