我用两层的lstm编码坐标,然后用三层感知器解码,预测后五帧的轨迹,用的是mse和adam,尝试了从0.00001到0.3的学习率,batch size也改过train loss一直在小幅度的波动,test loss一直不变,想请问出现这种情况是可能是什么原因?
import torch
import torch.nn as nn
import torch.utils.data as data
import numpy as np
import random
from torch import optim
from helper import AverageMeter, adjust_learning_rate
class net(torch.nn.Module):
def __init__(self):
super(net, self).__init__()
self.gru = nn.GRU(2, 128, 2)
self.lstm = nn.LSTM(2, 128, 2)
def forward(self, input1, hidden):
next_h_list = []
out_list = []
for i in range(20):
input_tr = input1[:,i,:].unsqueeze(0)
output, next_h = self.lstm(input_tr, (hidden[i][0] , hidden[i][1]))
next_h_list.append(next_h)
out_list.append(output.squeeze(0))
output_tr = torch.stack(out_list, 1)
return output_tr, next_h_list
def init_hidden(self, batch_size):
return [[torch.zeros(2, batch_size, 128, requires_grad=True).cuda() for _ in range(2)] for _ in range(20)]
#这里的维度(2,5,128)是从外向内的,最里面是128维
class decoder(torch.nn.Module):
def __init__(self):
super(decoder, self).__init__()
self.fc1 = torch.nn.Linear(128 , 2)
def forward(self, input):
de_list = []
for i in range(20):
output = self.fc1(input[:,i,:])
de_list.append(output)
out = torch.stack(de_list, 1)
return out
class Model:
def init(self):
self.lr = 0.1
self.weight_decay = 5e-3
self.n_epochs = 500
self.loss_list = []
self.time_window = 300
self.window_size = 1000
self.m1 = net().cuda()
self.m2 = decoder().cuda()
self.m1_optim = optim.Adam(self.m1.parameters(), lr = self.lr, weight_decay = self.weight_decay)
self.m2_optim = optim.Adam(self.m1.parameters(), lr = self.lr, weight_decay = self.weight_decay)
self.batch_size = 256
self.test_times = 5
self.dataload('data/GC.npz')
def dataload(self, path):
data = np.load(path)
tr_X, tr_Y = data['train_X'], data['train_Y']
te_X, te_Y = data['test_X'], data['test_Y']
tr_input = torch.FloatTensor(tr_X)
tr_target = torch.FloatTensor(tr_Y)
self.tr_input = tr_input.cuda()
self.tr_target = tr_target.cuda()
self.te_input = torch.FloatTensor(te_X).cuda()
self.te_target = torch.FloatTensor(te_Y).cuda()
# data loader
train = torch.utils.data.TensorDataset(tr_input, tr_target)
self.train_loader = torch.utils.data.DataLoader(train, batch_size=self.batch_size, shuffle=True, num_workers=4)
def run(self, tr_input, tr_target):
batch_size = tr_input.size(0)
encoder_hidden = self.m1.init_hidden(batch_size)
#print('batch_size: ',batch_size)
tr_final = tr_input[:,:,4]
#选择第五帧
for i in range(4):
output , hidden = self.m1(tr_input[:,:,i], encoder_hidden)
re_list = []
for i in range(5):
output , hidden = self.m1(tr_final, encoder_hidden)
output_decoder = self.m2(output)
tr_final = output_decoder
re_list.append(output_decoder)
predict = torch.stack(re_list, 2)
#loss
L2_loss = ((tr_target - predict) **2).sum() / 20
MSE_loss = ((tr_target - predict) **2).sum(3).sqrt().mean()
self.loss = L2_loss
return predict, L2_loss.item(), MSE_loss.item()
def train(self, epoch):
MSE_loss_meter = AverageMeter()
L2_square_loss_meter = AverageMeter()
adjust_learning_rate([self.m1_optim, self.m2_optim], self.lr, epoch)
for i ,(tr_input, tr_target)in enumerate(self.train_loader):
tr_input = tr_input.cuda()
tr_target = tr_target.cuda()
self.m1_optim.zero_grad()
self.m2_optim.zero_grad()
predict, L2_loss, MSE_loss = self.run(tr_input, tr_target)
MSE_loss_meter.update(MSE_loss)
L2_square_loss_meter.update(L2_loss)
self.loss.backward()
self.m1_optim.step()
self.m2_optim.step()
return MSE_loss_meter.avg, L2_square_loss_meter.avg
def test(self):
with torch.no_grad():
predi, L2_square_lo, MSE_lo = self.run(self.te_input, self.te_target)
return MSE_lo, L2_square_lo
def final(self, epoch):
self.init()
for i in range(1, epoch + 1):
MSE_loss, L2_square_loss = self.train(epoch)
print('----------------epoch------------------: ',i+1)
print('mse: ', MSE_loss)
print('l2: ', L2_square_loss)
self.loss_list.append(MSE_loss)
if i % self.test_times == 0:
test_loss_MSE , test_loss_L2 = self.test()
print('----TEST----\n' + 'MSE Loss:%s' % test_loss_MSE)
print('----TEST----\n' + 'L2 Loss:%s' % test_loss_L2)
def set_random_seed(random_seed=0):
np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
random.seed(random_seed)
def main():
set_random_seed()
M = Model()
M.final(1000)
if __name__ == '__main__':
main()
数据量是不是不够,造成学不动。把你的模型的神经元调大一些,层数少一些。