import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
# 读取含有热误差数据的CSV文件
train_file = open('train_data.csv', encoding='utf-8')
train_df = pd.read_csv(train_file)
# 读取测试集的含有热误差数据的CSV文件
test_file = open('test_data.csv', encoding='utf-8')
test_df = pd.read_csv(test_file)
# 对训练集进行数据预处理
X_train = train_df.iloc[:, :-1]
Y_train = train_df.iloc[:, -1]
X_train_scaled = preprocessing.scale(X_train)
# 对测试集进行数据预处理
X_test = test_df.iloc[:, :-1]
Y_test = test_df.iloc[:, -1]
X_test_scaled = preprocessing.scale(X_test, with_mean=X_train_scaled.mean(axis=0)[0],
with_std = X_train_scaled.mean(axis=0)[0])
with_std=X_train_scaled.mean(axis=0))
X = X_test + X_train
Y = Y_test + Y_train
# 创建决策树模型
dt = DecisionTreeRegressor()
# 定义网格搜索参数
param_grid = {
'max_depth': [1,2,3,4,5,6,7,8,9],
'min_samples_split': [2, 4, 6],
'min_samples_leaf': [1, 2, 3]
}
# 进行网格搜索优化
grid = GridSearchCV(dt, param_grid, cv=5)
grid.fit(X, Y)
grid_search = GridSearchCV(DecisionTreeRegressor(), param_grid, cv=5)
grid_search.fit(X_train, Y_train)
best_model = grid_search.best_estimator_
# 输出最优参数和模型得分
print('Best Parameters:', grid.best_params_)
# 定义新的温度数据
# 输出预测结果
Y_pred = best_model.predict(X_test)
mse = mean_squared_error(Y_test, Y_pred)
print(f"MSE: {mse:.4f}")
下面的问题怎么解决
runfile('C:/Users/86191/.spyder-py3/untitled6.py', wdir='C:/Users/86191/.spyder-py3')
File :32
with_std=X_train_scaled.mean(axis=0))
^
IndentationError: unexpected indent
X_test_scaled = preprocessing.scale(X_test, with_mean=X_train_scaled.mean(axis=0)[0],
with_std = X_train_scaled.mean(axis=0)[0])
with_std=X_train_scaled.mean(axis=0))
24行代码是干啥呢?
最小编辑距离是一种经典的距离计算方法,用来度量字符串之间的差异。它认为,将字符串A不断修改(增删改)、直至成为字符串B,所需要的修改次数代表了字符串A和B的差异大小。当然了,将A修改为B的方案非常多,选哪一种呢?我们可以用动态规划找到修改次数最小的方案,然后用对应的次数来表示A和B的距离。