问题描述(训练报错):RuntimeError: mat1 and mat2 shapes cannot be multiplied (8x576 and 400x120)
(本人初步学习深度学习,忘大家解答解答!)
以下为卷积神经网络:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
#卷积层+池化层
self.conv1 = nn.Conv2d(3, 6, kernel_size=5, padding=2)
self.relu = nn.ReLU()
self.avg_pool1 = nn.AvgPool2d(kernel_size=2, stride=2)
self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
self.relu = nn.ReLU()
self.avg_pool2 = nn.AvgPool2d(kernel_size=2, stride=2)
#全连接层
self.flatten = nn.Flatten()
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, X):
# avgpooling 1
X = self.relu(self.conv1(X))
X = self.avg_pool1(X)
# avgpooling 2
X = self.relu(self.conv2(X))
X = self.avg_pool2(X)
X = self.flatten(X)
X = self.relu(self.fc1(X))
X = self.relu(self.fc2(X))
X = self.fc3(X)
return X
以下为每层的输出显示:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Input In [7], in <cell line: 2>()
1 lr, num_epochs=0.01, 5
----> 2 d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())
File D:\DeepLearning\Anaconda_3\lib\site-packages\d2l\torch.py:510, in train_ch6(net, train_iter, test_iter, num_epochs, lr, device)
507 if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:
508 animator.add(epoch + (i + 1) / num_batches,
509 (train_l, train_acc, None))
--> 510 test_acc = evaluate_accuracy_gpu(net, test_iter)
511 animator.add(epoch + 1, (None, None, test_acc))
512 print(f'loss {train_l:.3f}, train acc {train_acc:.3f}, '
513 f'test acc {test_acc:.3f}')
File D:\DeepLearning\Anaconda_3\lib\site-packages\d2l\torch.py:472, in evaluate_accuracy_gpu(net, data_iter, device)
470 X = X.to(device)
471 y = y.to(device)
--> 472 metric.add(d2l.accuracy(net(X), y), d2l.size(y))
473 return metric[0] / metric[1]
File D:\DeepLearning\Anaconda_3\lib\site-packages\torch\nn\modules\module.py:1110, in Module._call_impl(self, *input, **kwargs)
1106 # If we don't have any hooks, we want to skip the rest of the logic in
1107 # this function, and just call forward.
1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1109 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110 return forward_call(*input, **kwargs)
1111 # Do not call functions when jit is used
1112 full_backward_hooks, non_full_backward_hooks = [], []
Input In [4], in Net.forward(self, X)
23 X = self.avg_pool2(X)
25 X = self.flatten(X)
---> 26 X = self.relu(self.fc1(X))
27 X = self.relu(self.fc2(X))
28 X = self.fc3(X)
File D:\DeepLearning\Anaconda_3\lib\site-packages\torch\nn\modules\module.py:1110, in Module._call_impl(self, *input, **kwargs)
1106 # If we don't have any hooks, we want to skip the rest of the logic in
1107 # this function, and just call forward.
1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1109 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110 return forward_call(*input, **kwargs)
1111 # Do not call functions when jit is used
1112 full_backward_hooks, non_full_backward_hooks = [], []
File D:\DeepLearning\Anaconda_3\lib\site-packages\torch\nn\modules\linear.py:103, in Linear.forward(self, input)
102 def forward(self, input: Tensor) -> Tensor:
--> 103 return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (8x576 and 400x120)
1.解决方法
最开始我只将训练集resize为28×28,而没有将测试集resize为28×28,导致测试集经过神经网络时在第一个全连接层报错,无法衔接。最后将所有数据集统一resize后测试代码成功运行。
2.代码对比
修改前:
transform_train = torchvision.transforms.Compose([
torchvision.transforms.Resize([28,28]),
torchvision.transforms.RandomHorizontalFlip(),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_test = torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
修改后:
transform_train = torchvision.transforms.Compose([
torchvision.transforms.Resize([28,28]),
torchvision.transforms.RandomHorizontalFlip(),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_test = torchvision.transforms.Compose([
torchvision.transforms.Resize([28,28]),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])