import torch import torch.nn as nn import torch.utils.data as Data from torchvision import models from torchvision import transforms from torchvision.datasets import ImageFolder import IPython.display train_data_transforms =transforms.Compose([ transforms.Scale([224,224]), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485,0.56,0.406],[0.229,0.224,0.225]) ]) train_data_dir ='C:\\Users\\yang\\Desktop\\ynn' train_data = ImageFolder(train_data_dir,transform=train_data_transforms) train_data_loader = Data.DataLoader(train_data,batch_size=16, shuffle=True) class RNN(nn.Module): def __init__(self): super(RNN, self).__init__() self.rnn = nn.LSTM( # LSTM 效果要比 nn.RNN() 好多了 input_size=224, # 图片每行的数据像素点 hidden_size=256, # rnn hidden unit num_layers=1, # 有几层 RNN layers batch_first=True, # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size) ) self.out = torch.nn.Sequential( nn.Linear(256, 16), nn.ReLU(), nn.Linear(16,3) ) def forward(self, x): r_out,(h_n,h_c) = self.rnn(x, None) out = self.out(r_out[:, -1, :]) return out rnn = RNN() print(rnn) optimizer = torch.optim.Adam(rnn.parameters(), lr=0.01) # optimize all parameters loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted # training and testing for epoch in range(1): for step, (b_x, b_y) in enumerate(train_data_loader): # gives batch data b_x = b_x.view(-1,224,224)# reshape x to (batch, time_step, input_size) print(b_x.shape) output = rnn(b_x)
torch.Size([48, 224, 224])
应该输出([16,224,224])
不知道你这个问题是否已经解决, 如果还没有解决的话:
输入项
Inputs: input, h_0
input: input各维度内容所代表的的含义 (seq_len, batch, input_size),seq_len表示句子的长度,batch每个batch包含句子的个数,input_size每个词向量的维度。
h_0: h_0不一定非要输入。h_0各维度内容所代表的的含义(num_layers * num_directions, batch, hidden_size),num_layers表示隐藏层神经元的个数,num_directions 表示RNN的方向数,batch每个batch包含句子的个数,hidden_size隐藏层神经元的个数。
输出项
Outputs: output, h_n
output: 各维度内容所代表的的含义(seq_len, batch, num_directions * hidden_size)。
h_n: 各维度内容所代表的的含义(num_layers * num_directions, batch, hidden_size)。