讲res2net添加到yolov5网络,训练好的权重13.4比resnet的权重14.4还要小,这是正常的吗
根据参考资料和已有知识,可以知道res2net是在resnet基础上进行了改进,并且通常来说res2net相比resnet更轻量化,所以得到的权重大小也相应会更小。
具体而言,res2net是将resnet的单个通道拆分成多个分支,每个分支后面跟着不同的卷积层,并将分支结果合并起来,这样可以增加网络的宽度和深度,提升模型的表现。而resnet则是通过残差块的方式将输入绕过卷积层,以保留更多的特征信息,减轻梯度消失问题。
所以,从理论上来说,res2net相比resnet更轻量化是有可能的。至于具体的权重大小不同,应该是由于网络结构和参数设置的不同导致的,具体的验证需要在相同的条件下进行实验对比。
实现代码:
# 加载相关库
import torch
import torch.nn as nn
import torch.nn.functional as F
# 定义Res2Net模块
class Res2Block(nn.Module):
def __init__(self, in_channels, mid_channels, out_channels, stride=1, scale=4):
super(Res2Block, self).__init__()
self.residual_function = nn.Sequential(
nn.Conv2d(in_channels, mid_channels, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(mid_channels),
nn.ReLU(inplace=True),
nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=stride, padding=1, groups=scale, bias=False),
nn.BatchNorm2d(mid_channels),
nn.ReLU(inplace=True),
nn.Conv2d(mid_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(out_channels)
)
self.shortcut = nn.Sequential()
if stride != 1 or in_channels != out_channels:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels)
)
def forward(self, x):
out = self.residual_function(x)
out += self.shortcut(x)
return F.relu(out)
# 定义Res2Net模型
class Res2Net(nn.Module):
def __init__(self):
super(Res2Net, self).__init__()
self.conv = nn.Conv2d(3, 32, 3, 1, 1, bias=False)
self.bn = nn.BatchNorm2d(32)
self.layer1 = self._make_layer(32, 16, 64, 3, 1, scale=4)
self.layer2 = self._make_layer(64, 32, 128, 4, 2, scale=4)
self.layer3 = self._make_layer(128, 64, 256, 6, 2, scale=4)
self.layer4 = self._make_layer(256, 128, 512, 3, 2, scale=4)
self.avg = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Linear(512, 10)
def forward(self, x):
x = F.relu(self.bn(self.conv(x)))
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avg(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def _make_layer(self, in_channels, mid_channels, out_channels, num_blocks, stride, scale=4):
layers = []
layers.append(Res2Block(in_channels, mid_channels, out_channels, stride))
for i in range(1, num_blocks):
layers.append(Res2Block(out_channels, mid_channels, out_channels, scale=scale))
return nn.Sequential(*layers)
# 加载数据并进行训练和测试
import d2lzh as d2l
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=32, resize=96)
net = Res2Net()
lr, num_epochs = 0.05, 10
optimizer = torch.optim.SGD(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'), num_epochs=num_epochs)
# 计算模型大小并输出
total_params = sum(p.numel() for p in net.parameters())
print("Model Size is {:.2f} MB".format(total_params * 4 / 1024 / 1024))