import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
# 读取含有热误差数据的CSV文件
train_file = open('data8.csv', encoding='utf-8')
train_df = pd.read_csv(train_file)
# 读取测试集的含有热误差数据的CSV文件
test_file = open('data.csv', encoding='utf-8')
test_df = pd.read_csv(test_file)
# 对训练集进行数据预处理
X_train = train_df.iloc[:, :-1]
Y_train = train_df.iloc[:, -1]
X_train_scaled = preprocessing.scale(X_train)
# 对测试集进行数据预处理
X_test = test_df.iloc[:, :-1]
Y_test = test_df.iloc[:, -1]
X_test_scaled = preprocessing.scale(X_test, with_mean=X_train_scaled.mean(axis=0)[0], with_std=X_train_scaled.mean(axis=0)[0])
X = pd.concat([test_df.iloc[:, :-1], train_df.iloc[:, :-1]], axis=1)
Y = pd.concat([test_df.iloc[:, -1], train_df.iloc[:, -1]], axis=1)
X_test.columns = X_train.columns
# 创建决策树模型
dt = DecisionTreeRegressor()
# 定义网格搜索参数
param_grid = {
'max_depth': [1,2,3,4,5,6,7,8,9],
'min_samples_split': [2, 4, 6],
'min_samples_leaf': [1, 2, 3]
}
# 进行网格搜索优化
grid = GridSearchCV(dt, param_grid, cv=5)
grid.fit(X, Y)
grid_search = GridSearchCV(DecisionTreeRegressor(), param_grid, cv=5)
grid_search.fit(X_train, Y_train)
best_model = grid_search.best_estimator_
# 输出最优参数和模型得分
print('Best Parameters:', grid.best_params_)
# 定义新的温度数据
# 输出预测结果
Y_pred = best_model.predict(X_test)
mse = mean_squared_error(Y_test, Y_pred)
print(f"MSE: {mse:.4f}")
print(Y_pred)
X_test = pd.concat([X_train, X_test], axis=0, ignore_index=True)
下面的错误怎么改
ValueError:
All the 405 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.
Below are more details about the failures:
--------------------------------------------------------------------------------
405 fits failed with the following error:
Traceback (most recent call last):
File "C:\Users\86191\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "C:\Users\86191\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 1247, in fit
super().fit(
File "C:\Users\86191\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 186, in fit
X, y = self._validate_data(
File "C:\Users\86191\anaconda3\lib\site-packages\sklearn\base.py", line 579, in _validate_data
X = check_array(X, input_name="X", **check_X_params)
File "C:\Users\86191\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 921, in check_array
_assert_all_finite(
File "C:\Users\86191\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 161, in _assert_all_finite
raise ValueError(msg_err)
ValueError: Input X contains NaN.
DecisionTreeRegressor does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values
输入的数据没有经常数据处理nan值要么剔除要么设置为均值或者0