目的:使用2009-2021年的月度发病人数,预测2022的月发病人数
数据格式如下所示:
问题:对2009年-2021年数据进行LSTM建模,然后在做预测,发现预测效果极差,请问有没有什么模型优化的方法。(或者如下代码有没有那里有问题)
代码如下:
from pandas import read_csv
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.utils.vis_utils import plot_model
#数据预处理,把Value值转换为float
DA = read_csv("C:\\Users\\Administrator\\Desktop\\123.csv")
data = DA['value'].values.astype(float)
#划分测试集和训练集
test_data_size = 12
train_data = data[:-test_data_size]
test_data = data[-test_data_size:]
#进行数据归一化,消除量纲的影响
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_normalized = scaler.fit_transform(train_data .reshape(-1, 1))
print(train_data_normalized[:5])
print(train_data_normalized[-5:])
import torch
import torch.nn as nn
train_data_normalized = torch.FloatTensor(train_data_normalized).view(-1)
#该数据为月度数据,所以把序列长度设为12
train_window = 12
def create_inout_sequences(input_data, tw):
inout_seq = []
L = len(input_data)
for i in range(L-tw):
train_seq = input_data[i:i+tw]
train_label = input_data[i+tw:i+tw+1]
inout_seq.append((train_seq ,train_label))
return inout_seq
train_inout_seq = create_inout_sequences(train_data_normalized, train_window)
#创建LSTM模型
class LSTM(nn.Module):
def __init__(self, input_size=1, hidden_layer_size=200, output_size=1):
super().__init__()
self.hidden_layer_size = hidden_layer_size
self.lstm = nn.LSTM(input_size, hidden_layer_size)
self.linear = nn.Linear(hidden_layer_size, output_size)
self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
torch.zeros(1,1,self.hidden_layer_size))
def forward(self, input_seq):
lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
predictions = self.linear(lstm_out.view(len(input_seq), -1))
return predictions[-1]
model = LSTM()
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 200
for i in range(epochs):
for seq, labels in train_inout_seq:
optimizer.zero_grad()
model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
torch.zeros(1, 1, model.hidden_layer_size))
y_pred = model(seq)
single_loss = loss_function(y_pred, labels)
single_loss.backward()
optimizer.step()
if i%25 == 1:
print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')
print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')
#做预测
fut_pred = 12
test_inputs = train_data_normalized[-train_window:].tolist()
print(test_inputs)
model.eval()
for i in range(fut_pred):
seq = torch.FloatTensor(test_inputs[-train_window:])
with torch.no_grad():
model.hidden = (torch.zeros(1, 1, model.hidden_layer_size),
torch.zeros(1, 1, model.hidden_layer_size))
test_inputs.append(model(seq).item())
test_inputs[fut_pred:]
#得到预测值
import numpy as np
actual_predictions = scaler.inverse_transform(np.array(test_inputs[train_window:] ).reshape(-1, 1))
print(actual_predictions)
“Devil组”引证GPT后的撰写: