关于LSTM预测值恒定,输出维度的问题

问题遇到的现象和发生背景

LSTM模型预测,输出验证集的预测和真实值的图时,有很多根线,预测值是恒定的且横坐标不等于验证集的数量,求手把手指导,以下是读取数据后的代码部分,请过目。

用代码块功能插入代码,请勿粘贴截图

data_path = "C:/Users/azdlbj/Desktop.xlsx"  
data = pd.read_excel(data_path, sheet_name="Sheet1") 
data = data.loc[:, ["前1(mg/L)", "1(mg/L)","O","R","温度","进水","水","总","T"]]  
print(data.head())
print(len(data))

train_data_ratio = 0.8
train_data_size = ceil(train_data_ratio * len(data))
train_data = data.iloc[:train_data_size]
eval_data = data.iloc[train_data_size:]
seq_len = 5


# 数据集的定义
class SeqDataset(Dataset):

    def __init__(self, data, seq_len):
        super().__init__()
        self.x = data.loc[:, ["前1(mg/L)", "1(mg/L)","R","温度","进水","水","总","T"]].values # 冒号内指定x列名称
        self.y = data.loc[:, "O"].values
        self.seq_len = seq_len  # 序列长度
        self.data_size = len(data)

    def __len__(self):
        return self.data_size - self.seq_len + 1

    def __getitem__(self, index):
        # 可能需要改写这里的映射关系
        start_index, end_index = index, index + self.seq_len
        return self.x[start_index:end_index], self.y[start_index:end_index]


class LstmRNN(nn.Module):
    """
        Parameters:
        - input_size: feature size
        - hidden_size: number of hidden units
        - output_size: number of output
        - num_layers: layers of LSTM to stack
    """

    def __init__(self, input_size, hidden_size=1, output_size=1, num_layers=3):
        super().__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers)  # utilize the LSTM model in torch.nn
        self.linear1 = nn.Linear(hidden_size, output_size)  # 全连接层

    def forward(self, _x):

        _x = _x.permute(1, 0, 2).contiguous()
        s, b, h = _x.shape  # x is output, size (seq_len, batch, hidden_size)
        # x输入的时候加入nn.linear以及激活函数
        h0 = torch.zeros(self.num_layers, b, self.hidden_size)
        c0 = torch.zeros(self.num_layers, b, self.hidden_size)
        x, _ = self.lstm(_x, (h0, c0))  # _x is input, size (seq_len, batch, input_size)
        s, b, h = x.shape  # x is output, size (seq_len, batch, hidden_size)
        x = x.contiguous().view(s * b, h)
        x = self.linear1(x)
        x = x.view(s, b, 1)
        x = x.permute(1, 0, 2).contiguous()
        x = x.squeeze(-1)
        return x

# 模型的训练和验证
batch_size = 64 # 进行一次iteration(迭代)所训练数据的数量。
n_epoch = 100 # 一次epoch是指将所有数据训练一遍的次数,epoch所代表的数字是指所有数据被训练的总轮数。
train_dataset = SeqDataset(train_data, seq_len)
eval_dataset = SeqDataset(eval_data, seq_len)
train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=0,
)
eval_loader = DataLoader(
    dataset=eval_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
)

# 可以改成加载模型
model = LstmRNN(input_size=8, hidden_size=64, output_size=1) # input_size是x变量个数,batch_size可以选择32 64和128

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) # 1e-2 = 0.01  7e-2 = 0.07

min_eval_loss = float("inf")
for id_epoch in range(n_epoch):
    train_pbar = tqdm(train_loader)
    eval_pbar = tqdm(eval_loader)

    # 训练模型
    model.train()
    for (x, y) in train_pbar:
        x = x.float()
        y = y.float()

        pred = model(x)
        loss = criterion(pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_pbar.set_description(f"[train], loss: {loss}")

    # 验证模型
    eval_loss = 0

    with torch.no_grad():
        for (x, y) in eval_pbar:
            x = x.float()
            y = y.float()
            pred = model(x)
            loss = torch.mean(torch.abs(pred - y))

            eval_pbar.set_description(f"[eval], loss: {loss}")
            eval_loss += loss * x.shape[0]

        eval_loss /= len(eval_dataset)
        print(f"({id_epoch}/{n_epoch}), loss: {eval_loss}")
        min_eval_loss = min(eval_loss, min_eval_loss)

        # 保存模型    
print("min eval loss:", min_eval_loss)
plt.plot(pred)
plt.plot(y)
plt.legend(['true','pred'])
plt.show()
运行结果及报错内容

img

我想要达到的结果

img

目测上去您print图的代码不对。不应该是在for循环里把每一步的x和pred都存成数组,最后用这个数组画图么?建议您把数据样本抛出来,方便调试。

LSTM输入参数有input_size, hidden_size, num_layers, bidrectional

input_size为输入序列维度的最后一维([8,56,768]),输入的input_size填写768

hidden_size可以理解为输出的序列的最后一维

num_layers表示LSTM堆叠的层数

bidrectional为布尔类型,True时表示使用双向的LSTM,False表示为单向的LSTM

例如:我想 [8,56,768] 经过LSTM后维度不变,则需要设置:

a=torch.randn(8,56,768)
lstm=torch.nn.LSTM(768,384,10,bidirectional=True, batch_first=True)
out,(h,c)=lstm(a)
print("out:",out.size())
此处为什么设置hidden_size等于384,因为使用的是双向的LSTM,hidden_size输出时会翻倍

如果是单向的LSTM则设置hidden_size为768