pytorch训练报错:RuntimeError: mat1 and mat2 shapes cannot be multiplied (8x576 and 400x120)

问题描述(训练报错):RuntimeError: mat1 and mat2 shapes cannot be multiplied (8x576 and 400x120)
(本人初步学习深度学习,忘大家解答解答!)
以下为卷积神经网络:


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        #卷积层+池化层
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5, padding=2)
        self.relu = nn.ReLU()
        self.avg_pool1 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.relu = nn.ReLU()
        self.avg_pool2 = nn.AvgPool2d(kernel_size=2, stride=2)
        #全连接层
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, X):
        # avgpooling 1
        X = self.relu(self.conv1(X))
        X = self.avg_pool1(X)
        # avgpooling 2
        X = self.relu(self.conv2(X))
        X = self.avg_pool2(X)
        
        X = self.flatten(X)
        X = self.relu(self.fc1(X))
        X = self.relu(self.fc2(X))
        X = self.fc3(X)
        return X

以下为每层的输出显示:

img


训练报错详细:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Input In [7], in <cell line: 2>()
      1 lr, num_epochs=0.01, 5
----> 2 d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())

File D:\DeepLearning\Anaconda_3\lib\site-packages\d2l\torch.py:510, in train_ch6(net, train_iter, test_iter, num_epochs, lr, device)
    507         if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:
    508             animator.add(epoch + (i + 1) / num_batches,
    509                          (train_l, train_acc, None))
--> 510     test_acc = evaluate_accuracy_gpu(net, test_iter)
    511     animator.add(epoch + 1, (None, None, test_acc))
    512 print(f'loss {train_l:.3f}, train acc {train_acc:.3f}, '
    513       f'test acc {test_acc:.3f}')

File D:\DeepLearning\Anaconda_3\lib\site-packages\d2l\torch.py:472, in evaluate_accuracy_gpu(net, data_iter, device)
    470             X = X.to(device)
    471         y = y.to(device)
--> 472         metric.add(d2l.accuracy(net(X), y), d2l.size(y))
    473 return metric[0] / metric[1]

File D:\DeepLearning\Anaconda_3\lib\site-packages\torch\nn\modules\module.py:1110, in Module._call_impl(self, *input, **kwargs)
   1106 # If we don't have any hooks, we want to skip the rest of the logic in
   1107 # this function, and just call forward.
   1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1109         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110     return forward_call(*input, **kwargs)
   1111 # Do not call functions when jit is used
   1112 full_backward_hooks, non_full_backward_hooks = [], []

Input In [4], in Net.forward(self, X)
     23 X = self.avg_pool2(X)
     25 X = self.flatten(X)
---> 26 X = self.relu(self.fc1(X))
     27 X = self.relu(self.fc2(X))
     28 X = self.fc3(X)

File D:\DeepLearning\Anaconda_3\lib\site-packages\torch\nn\modules\module.py:1110, in Module._call_impl(self, *input, **kwargs)
   1106 # If we don't have any hooks, we want to skip the rest of the logic in
   1107 # this function, and just call forward.
   1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1109         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110     return forward_call(*input, **kwargs)
   1111 # Do not call functions when jit is used
   1112 full_backward_hooks, non_full_backward_hooks = [], []

File D:\DeepLearning\Anaconda_3\lib\site-packages\torch\nn\modules\linear.py:103, in Linear.forward(self, input)
    102 def forward(self, input: Tensor) -> Tensor:
--> 103     return F.linear(input, self.weight, self.bias)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (8x576 and 400x120)

img


你这贴出来的代码和你实际运行的代码不一致啊。这里应该是你的X = X.view(X.size(0), -1)这句结果不对,下一层fc1要求你输入的格式为[batch,5x5x16],也就是第二维度的通道数要为400,你自己看你view之后的格式是否为batch,400
如果你的网络设计就是这样子的话,输入28x28,最后全连接层应该是16X6X6才对,你变成了16x5x5了

1.解决方法
最开始我只将训练集resize为28×28,而没有将测试集resize为28×28,导致测试集经过神经网络时在第一个全连接层报错,无法衔接。最后将所有数据集统一resize后测试代码成功运行。
2.代码对比
修改前:

transform_train = torchvision.transforms.Compose([
    torchvision.transforms.Resize([28,28]),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

修改后:

transform_train = torchvision.transforms.Compose([
    torchvision.transforms.Resize([28,28]),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = torchvision.transforms.Compose([
    torchvision.transforms.Resize([28,28]),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])