缺失值填充出现错误
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
#2.读取数据
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
data=pd.read_excel('train_data.xlsx')
#print(data)
#print(np.any(data.isnull()))
#3.数据预处理
data=data.drop(labels=['机器编号','统一规范代码','具体故障类别'],axis=1)#删除不需要的列
data.replace('L',1.0,inplace=True)
data.replace('M',2.0,inplace=True)
data.replace('H',3.0,inplace=True)
data=data.dropna()
#print(data)
X = data.iloc[:,0:-1]
y = data.iloc[:,-1]
#4、分割测试集和训练集
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.33,random_state=40)
#5、选择模型
model = DecisionTreeClassifier(max_depth=6)
#6、训练模型
model.fit(X_train,y_train)
#7、评价模型:要求用F1
#y_predict = model.predict(X_test)
#score = f1_score(y_predict,y_test,average="macro")
#plt.show()
y_predict = model.predict(X_test)
score = f1_score(y_predict,y_test,average="macro")
print(score)
# 缺失值填充
forecast=pd.read_excel("test_data.xlsx")
forecast=forecast.drop(labels=['机器编号','统一规范代码'],axis=1) #删除不需要的列
forecast.replace('L',1.0,inplace=True)
forecast.replace('M',2.0,inplace=True)
forecast.replace('H',3.0,inplace=True)
y_predict=model.predict(forecast)
print(y_predict)
score=f1_score(y_predict,y_test.iloc[:,:-2],average='macro')
#
forecast.insert(loc=len(forecast.columns),column='是否发生故障',value=y_predict)
forecast.to_excel('test_data.xlsx')
报错如下:
数据集test_data.xlsx如下,缺失后两列,要求填充缺失值
真的心累,想帮你连train_data.xlsx和test_data.xlsx都不发出来,有心无力。。。