import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
# 导入数据集,进行预处理
df = pd.read_csv('world_bank_data.csv')
df['GDP Growth Rate'] = df['GDP Growth Rate'].interpolate()
le = LabelEncoder()
df['Country Code'] = le.fit_transform(df['Country Name'])
df.dropna(inplace=True)
X = df[['Country Code', 'Year']].values
y = df['GDP Growth Rate'].values
scaler = MinMaxScaler()
y = scaler.fit_transform(y.reshape(-1, 1))
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 构建模型并训练
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# 可视化预测结果
plt.figure(figsize=(12, 6))
for i in range(len(le.classes_)):
country_idx = np.where(X_test[:, 0] == i)[0]
if len(country_idx) > 0:
plt.plot(X_test[country_idx, 1], scaler.inverse_transform(y_test[country_idx]), label=le.classes_[i])
plt.plot(X_test[country_idx, 1], scaler.inverse_transform(y_pred[country_idx]), '--')
plt.xlabel('Year')
plt.ylabel('GDP Growth Rate')
plt.title('GDP Growth Rate Prediction')
plt.legend(loc='best',scatterpoints=1,labels=['first','second'])
plt.show()
看起来你在绘制回归图像的时候出现了一个错误。在if语句中,你只针对一个国家绘制了回归线,而且缺少了对回归线的标签。建议你调整代码如下:
plt.figure(figsize=(12, 6))
for i in range(len(le.classes_)):
country_idx = np.where(X_test[:, 0] == i)[0]
if len(country_idx) > 0:
plt.plot(X_test[country_idx, 1], scaler.inverse_transform(y_test[country_idx]), label=le.classes_[i])
plt.plot(X_test[country_idx, 1], scaler.inverse_transform(y_pred[country_idx]), label=le.classes_[i] + ' Predicted')
plt.xlabel('Year')
plt.ylabel('GDP Growth Rate')
plt.title('GDP Growth Rate Prediction')
plt.legend(loc='best', scatterpoints=1)
plt.show()
这样修改后,你会得到带有标签的回归图像。
不知道你这个问题是否已经解决, 如果还没有解决的话:# 在一组λ上测试结果
def ridgeTest(xArr, yArr):
xMat = mat(xArr); # 将列表转为矩阵
yMat = mat(yArr).T # y 需要的是一个列矩阵,mat(yArr)得到的是一个行矩阵, 所以需要转置
# 计算均值
yMean = mean(yMat, 0) # 压缩行,对各列求平均值
yMat = yMat - yMean # to eliminate X0 take mean off of Y
# regularize X's
xMeans = mean(xMat, 0) # 均值
xVar = var(xMat, 0) # 方差
np.seterr(divide='ignore', invalid='ignore') # xVar中存在0元素
# 特征标准化: (特征-均值)/方差
xMat = (xMat - xMeans) / xVar
numTestPts = 30
wMat = zeros((numTestPts, shape(xMat)[1]))
for i in range(numTestPts):
# λ以指数级变化 = exp(i - 10)
ws = ridgeRegres(xMat, yMat, exp(i - 10))
wMat[i, :] = ws.T
# 所有的回归系数输出到一个矩阵, 返回
return wMat
for循环改下
for i in range(len(le.classes_)):
country_idx = np.where(X_test[:, 0] == i)[0]
if len(country_idx) > 0:
plt.plot(X_test[country_idx, 1], scaler.inverse_transform(y_test[country_idx]), label=le.classes_[i])
plt.plot(X_test[country_idx, 1], scaler.inverse_transform(y_pred[country_idx]), '--')
修改后代码
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
# 导入数据集,进行预处理
df = pd.read_csv('world_bank_data.csv')
df['GDP Growth Rate'] = df['GDP Growth Rate'].interpolate()
le = LabelEncoder()
df['Country Code'] = le.fit_transform(df['Country Name'])
df.dropna(inplace=True)
X = df[['Country Code', 'Year']].values
y = df['GDP Growth Rate'].values
scaler = MinMaxScaler()
y = scaler.fit_transform(y.reshape(-1, 1))
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 构建模型并训练
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# 可视化预测结果
plt.figure(figsize=(12, 6))
for i in range(len(le.classes_)):
country_idx = np.where(X_test[:, 0] == i)[0]
if len(country_idx) > 0:
plt.plot(X_test[country_idx, 1], scaler.inverse_transform(y_test[country_idx]), label=le.classes_[i])
plt.plot(X_test[country_idx, 1], scaler.inverse_transform(y_pred[country_idx]), '--')
plt.xlabel('Year')
plt.ylabel('GDP Growth Rate')
plt.title('GDP Growth Rate Prediction')
plt.legend(loc='best', scatterpoints=1, labels=le.classes_)
plt.show()
如果还不行
把数据文件给我