我在Python,编写如下程序,
# model.py
import torch
from torch import nn
from torchvision import models
# 解码模块
class DecoderBlock(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size):
"""
in_channels: 输入通道
out_channels: 输出通道
kernel_size: 卷积核大小
"""
super(DecoderBlock, self).__init__()
self.conv1 = nn.Conv2d(
in_channels, in_channels // 4, kernel_size, padding=1, bias=False
)
self.bn1 = nn.BatchNorm2d(in_channels // 4)
self.relu1 = nn.ReLU(inplace=True)
# 反卷积
self.deconv = nn.ConvTranspose2d(
in_channels // 4,
in_channels // 4,
kernel_size=3,
stride=2,
padding=1,
output_padding=1,
bias=False,
)
self.bn2 = nn.BatchNorm2d(in_channels // 4)
self.relu2 = nn.ReLU(inplace=True)
self.conv3 = nn.Conv2d(
in_channels // 4,
out_channels,
kernel_size=kernel_size,
padding=1,
bias=False,
)
self.bn3 = nn.BatchNorm2d(out_channels)
self.relu3 = nn.ReLU(inplace=True)
def forward(self, x):
x = self.relu1(self.bn1(self.conv1(x)))
x = self.relu2(self.bn2(self.deconv(x)))
x = self.relu3(self.bn3(self.conv3(x)))
return x
class ResNet101Unet(nn.Module):
def __init__(self, num_classes=2, pretrained=True):
super(ResNet101Unet, self).__init__()
# base没有作为类属性,这样可以避免保存模型时保存过多无用参数
base = models.resnet101(weights=models.ResNet101_Weights.IMAGENET1K_V1)
# 将ResNet的前几层复制到ResNet101Unet中
self.firstconv = base.conv1
self.firstbn = base.bn1
self.firstrelu = base.relu
self.firstmaxpool = base.maxpool
# 将ResNet中的layer作为编码器
self.encoder1 = base.layer1
self.encoder2 = base.layer2
self.encoder3 = base.layer3
self.encoder4 = base.layer4
# 解码器的输出通道数量
out_channels = [64, 128, 256, 512]
# 使用DecoderBlock定义解码器
self.center = DecoderBlock(
in_channels=out_channels[3],
out_channels=out_channels[3],
kernel_size=3,
)
self.decoder4 = DecoderBlock(
in_channels=out_channels[3] + out_channels[2],
out_channels=out_channels[2],
kernel_size=3,
)
self.decoder3 = DecoderBlock(
in_channels=out_channels[2] + out_channels[1],
out_channels=out_channels[1],
kernel_size=3,
)
self.decoder2 = DecoderBlock(
in_channels=out_channels[1] + out_channels[0],
out_channels=out_channels[0],
kernel_size=3,
)
self.decoder1 = DecoderBlock(
in_channels=out_channels[0] + out_channels[0],
out_channels=out_channels[0],
kernel_size=3,
)
# 最后添加一个卷积层将特征图维度整理成图片对应的尺寸
self.finalconv = nn.Sequential(
nn.Conv2d(out_channels[0], 32, 3, padding=1, bias=False),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.Dropout2d(0.1, False),
nn.Conv2d(32, num_classes, 1),
)
def forward(self, x):
# 预处理
x = self.firstconv(x)
x = self.firstbn(x)
x = self.firstrelu(x)
x_ = self.firstmaxpool(x)
# 编码器的下采样过程
e1 = self.encoder1(x_)
e2 = self.encoder2(e1)
e3 = self.encoder3(e2)
e4 = self.encoder4(e3)
# 解码器的上采样过程
center = self.center(e4)
d4 = self.decoder4(torch.cat([center, e3], 1))
d3 = self.decoder3(torch.cat([d4, e2], 1))
d2 = self.decoder2(torch.cat([d3, e1], 1))
d1 = self.decoder1(torch.cat([d2, x], 1))
# 输出图片
f = self.finalconv(d1)
return f
if __name__ == "__main__":
net = ResNet101Unet()
img = torch.rand(1, 3, 320, 320)
out = net(img)
print(out.shape)
运行结果如下:
Given groups=1, weight of size [128, 512, 3, 3], expected input[1, 2048, 10, 10] to have 512 channels, but got 2048 channels instead
卷积设置的输入channels和实际输入的尺寸没有匹配上
weight of size [128, 512, 3, 3], expected input[1, 2048, 10, 10]
第二个参数值,要统一。要么都为512要么都为2048
该回答引用GPTᴼᴾᴱᴺᴬᴵ
这个问题是模型参数错误导致的。模型中的一层卷积权重大小是[128, 512, 3, 3],但是输入的特征通道数量是64,导致了维度不匹配的错误。
你可以检查代码,找出哪一行出现了这个错误,或者检查print(net)的输出以查看每一层的输出尺寸,以定位这个错误。
修改建议: