pytorch：googLeNet 测试集准确率异常

基于pytorch编写的googLeNet，使用CIFAR-10数据集训练
训练集loss、acc均正常。测试集的loss也和训练集的loss差不多，但测试集的acc基本等于训练集第一次初始化时的acc非常低。

代码结果如下图：

训练代码：

num_epoch1 = 20
train_loss = []
train_accuracy = []
for epoch in np.arange(num_epoch1):
    torch.cuda.empty_cache()        #释放显存
    net.train()
    loss_sum = 0.0
    accuracy_sum = 0.0
    n = 0
    for id_iter, (feature, label) in enumerate(train_iter):
        feature, label = feature.to(device), label.to(device)
        optimizer.zero_grad()
        logits, aux_logits1, aux_logits2 = net.forward(feature)
        output = 0.6 * logits + 0.2 * aux_logits1 + 0.2 * aux_logits2#; print("output=%s\nlabel=%s" %(output, label))
        loss0 = loss_func(logits, label)
        loss1 = loss_func(aux_logits1, label)
        loss2 = loss_func(aux_logits2, label)
        loss = loss0 + 0.3 * loss1 + 0.3 * loss2
        loss.backward()
        optimizer.step()
        loss_sum += (loss * label.size(0)).item()
        accuracy = (output.argmax(dim=1) == label).sum().item()#; print("accuracy=", accuracy)
        accuracy_sum += (output.argmax(dim=1) == label).sum().item()#; print("accuracy_sum=", accuracy_sum)
        n += label.size(0)#; print("n=", n)
        print("Epoch[%s/%s],step[%s/%s] | loss=%s,accuracy=%s" % (epoch+1, num_epoch1, id_iter, len(train_iter), loss.item(), accuracy/label.size(0)))
    train_loss.append(loss_sum/n)
    train_accuracy.append(accuracy_sum / n)
    print("Epoch[%s/%s] | loss=%s,accuracy=%s" % (epoch + 1, num_epoch1, train_loss[epoch], train_accuracy[epoch]))

测试代码：

num_epoch2 = 20
test_loss = []
test_accuracy = []
with torch.no_grad():
    for epoch in np.arange(num_epoch2):
        net.eval()
        loss_sum = 0.0
        accuracy_sum = 0.0
        n = 0
        for id_iter, (feature, label) in enumerate(test_iter):
            feature, label = feature.to(device), label.to(device)
            output = net(feature)
            loss = loss_func(output, label); print("output=%s\nlabel=%s" % (output, label))
            loss_sum += (loss * label.size(0)).item()
            accuracy = (output.argmax(dim=1) == label).sum().item(); print("accuracy=", accuracy)
            accuracy_sum += (output.argmax(dim=1) == label).sum().item(); print("accuracy_sum=", accuracy_sum)
            n += label.size(0); print("n=", n)
            print("Epoch[%s/%s],step[%s/%s] | loss=%s,accuracy=%s" % (epoch+1, num_epoch2, id_iter, len(test_iter), loss.item(), accuracy/label.size(0)))
        test_loss.append(loss_sum/n)
        test_accuracy.append(accuracy_sum/n)
        print("Epoch[%s/%s] | loss=%s,accuracy=%s" % (epoch+1, num_epoch2, test_loss[epoch], test_accuracy[epoch]))

打印测试集输出发现很不对劲，但不知道问题出在哪：

output=tensor([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]], device='cuda:0')
label=tensor([1, 6, 9, 6, 4, 0, 6, 1, 9, 5, 4, 8, 5, 3, 7, 2, 8, 1, 9, 0, 7, 8, 7, 5,
        3, 3, 2, 7, 3, 2, 0, 3, 4, 5, 0, 8, 8, 3, 0, 3, 3, 4, 9, 3, 8, 1, 3, 1,
        2, 5, 3, 4, 7, 0, 4, 0, 6, 1, 7, 1, 4, 0, 6, 2], device='cuda:0')
accuracy= 7
accuracy_sum= 7.0
n= 64

完整代码：

import os
import gc
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.utils.data as Data
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms



#----------------------------------------------------------------------------------------------------------------------#
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('device=', device)
print(torch.cuda.is_available())

#-------------------------------------------------数据处理--------------------------------------------------------------#
train_trans = torchvision.transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(0.3),      #依据概率p对PIL图片进行水平翻转
    transforms.RandomCrop(224),      #依据给定的size随机裁剪
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
test_trans = torchvision.transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
#CIFAR10数据值图片为(3*32*32)，标签为0-9共10类;训练集50000张，测试集10000张
train_datasets = torchvision.datasets.CIFAR10('./Datasets/CIFAR', train=True, download=True, transform=train_trans)
test_datasets = torchvision.datasets.CIFAR10('./Datasets/CIFAR', train=False, download=True, transform=test_trans)
#取训练集5000张，测试集1000张
train_X = []
train_y = []
for i in range(5000):
    X = train_datasets[i][0]
    train_X.append(X)
    y = train_datasets[i][1]
    train_y.append(y)
train_datasets = [(X, y) for X, y in zip(train_X, train_y)]
test_X = []
test_y = []
for i in range(1000):
    X = test_datasets[i][0]
    test_X.append(X)
    y = test_datasets[i][1]
    test_y.append(y)
test_datasets = [(X, y) for X, y in zip(test_X, test_y)]
batch_size = 64
train_iter = Data.DataLoader(train_datasets, batch_size=batch_size, shuffle=True, num_workers=0)
test_iter = Data.DataLoader(test_datasets, batch_size=batch_size, shuffle=True, num_workers=0)
#检查样本数据大小
# for i, (X, y) in enumerate(train_iter):
#     print("i=%d | X.size()=%s, y.size()=%s" % (i, X.size(), y.size()))        #X.size()=torch.Size([256, 3, 224, 224]), y.size()=torch.Size([256])
#     print("i=%d | X=%s\n y=%s" % (i, X, y))



#------------------------------------------------网络构建---------------------------------------------------------------#
class Inception(nn.Module):
    def __init__(self, in_channels, c1, c2, c3, c4):        #c1-c4为四个pass的outchannel数，其中c2-c4为元组；kwargs是可变关键词参数
        super(Inception, self).__init__()
        self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
        self.p4_1 = nn.MaxPool2d(kernel_size=3, padding=1, stride=1)
        self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)
    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2(F.relu(self.p4_1(x))))
        #print("p1.size()=%s,p2.size()=%s,p3.size()=%s,p4.size()=%s" % (p1.size(), p2.size(), p3.size(), p4.size()))
        return torch.cat([p1, p2, p3, p4], dim=1)       #在dim=1方向上，即通道数方向上连接张量
class InceptionAux(nn.Module):
    def __init__(self, in_channel, num_class):
        super(InceptionAux, self).__init__()
        self.avg_pool = nn.AvgPool2d(kernel_size=5, stride=3)       #output(256,512,4,4) (256,528,4,4)
        self.conv = nn.Conv2d(in_channel, 128, kernel_size=1)       #output(256,128,4,4)
        self.fc1 = nn.Linear(2048, 1024)        #output(256,1024)
        self.fc2 = nn.Linear(1024, num_class)       #output(256,10)
    def forward(self, x):
        x = self.avg_pool(x)
        x = self.conv(x)
        x = torch.flatten(x, 1)     #output(256,2048)
        x = self.fc1(x)
        x = F.relu(x, inplace=True)
        x = self.fc2(x)
        return x
class GoogLeNet(nn.Module):
    def __init__(self, num_class, aux_logits=False, init_weights=False):      #aux_logits:是否使用辅助分类器（训练的时候为True, 验证的时候为False)
        super(GoogLeNet, self).__init__()
        self.aux_logits = aux_logits
        #input(3,224,224)
        self.b1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),       #output(256,64,112,112)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)            #output(256,64,56,56)
        )
        self.b2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=1),       #output(256,64,56,56)
            nn.ReLU(),
            nn.Conv2d(64, 192, kernel_size=3, padding=1),       #output(256,192,56,56)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)        #output(256,192,28,28)
        )
        self.b3 = nn.Sequential(
            Inception(192, 64, (96, 128), (16, 32), 32),            #output(256,256,28,28)
            Inception(256, 128, (128, 192), (32, 96), 64),          #output(256,480,28,28)
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)        #output(256,480,14,14)
        )

        self.b4_a = Inception(480, 192, (96, 208), (16, 48), 64)           #output(256,512,14,14)
        if aux_logits:
            self.b4_a_aux = InceptionAux(512, num_class)
        self.b4_b = Inception(512, 160, (112, 224), (24, 64), 64)          #output(256,512,14,14)
        self.b4_c = Inception(512, 128, (128, 256), (24, 64), 64)          #output(256,512,14,14)
        self.b4_d = Inception(512, 112, (144, 288), (32, 64), 64)          #output(256,528,14,14)
        if aux_logits:
            self.b4_d_aux = InceptionAux(528, num_class)
        self.b4_e = Inception(528, 256, (160, 320), (32, 128), 128)        #output(256,832,14,14)
        self.max = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)        #output(256,832,7,7)

        self.b5 = nn.Sequential(
            Inception(832, 256, (160, 320), (32, 128), 128),        #output(256,832,7,7)
            Inception(832, 384, (192, 384), (48, 128), 128),        #输出通道为1024      #output(256,1024,7,7)
            nn.AdaptiveAvgPool2d((1, 1)),       #自适应平均池化，参数为输出(H,W)；将1024个通道的二维张量均变为1*1         #output(256,1024,1,1)
            nn.Flatten(1)        #降维函数，将张量按指定维度展开     #output(256,1024)
        )
        self.fc = nn.Sequential(
            nn.Linear(1024, num_class),     #output(256,10)
            nn.Softmax(dim=1)       #output(256,10)
        )

        if init_weights:        #是否初始化参数
            self._initialize_weights()
    def forward(self, x):
        x = self.b1(x)
        x = self.b2(x)
        x = self.b3(x)
        x = self.b4_a(x)
        if self.training and self.aux_logits:
            aux1 = self.b4_a_aux(x)
        x = self.b4_b(x)
        x = self.b4_c(x)
        x = self.b4_d(x)
        if self.training and self.aux_logits:
            aux2 = self.b4_d_aux(x)
        x = self.b4_e(x)
        x = self.max(x)
        x = self.b5(x)
        x = self.fc(x)
        if self.training and self.aux_logits:
            return x, aux1, aux2
        return x
    #初始化参数函数
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, torch.nn.Conv2d):
                torch.nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    torch.nn.init.constant_(m.bias, 0)
            elif isinstance(m, torch.nn.Linear):
                torch.nn.init.normal_(m.weight, 0, 0.01)
                torch.nn.init.constant_(m.bias, 0)




#------------------------------------------------实例化模型-------------------------------------------------------------#
net = GoogLeNet(10, True, True)
# for i in net.children():
#     print(i)
net.to(device)
print(net)
# X = torch.rand((256, 3, 224, 224), device=device)       #需要注释掉InceptionAux不然报错
# for blk in net.children():
#     X = blk(X)
#     print(blk.__class__.__name__, 'output.size()=', X.size())
#学习准则：经验风险最小化
loss_func = nn.CrossEntropyLoss()
#优化算法：小批量梯度下降法
optimizer = optim.Adam(net.parameters(), lr=0.0005)



#---------------------------------------------------训练----------------------------------------------------------------#
gc.collect()        #垃圾回收，占用内存较多时可以选择调用
num_epoch1 = 30

train_loss = []
train_accuracy = []
for epoch in np.arange(num_epoch1):
    torch.cuda.empty_cache()        #释放显存
    net.train()
    for name, parameters in net.named_parameters():
        print(name, ':', parameters)

    loss_sum = 0.0
    accuracy_sum = 0.0
    n = 0
    for id_iter, (feature, label) in enumerate(train_iter):
        feature, label = feature.to(device), label.to(device)
        optimizer.zero_grad()
        logits, aux_logits1, aux_logits2 = net.forward(feature)
        output = 0.6 * logits + 0.2 * aux_logits1 + 0.2 * aux_logits2#; print("output=%s\nlabel=%s" %(output, label))
        loss0 = loss_func(logits, label)
        loss1 = loss_func(aux_logits1, label)
        loss2 = loss_func(aux_logits2, label)
        loss = loss0 + 0.3 * loss1 + 0.3 * loss2
        loss.backward()
        optimizer.step()

        loss_sum += (loss * label.size(0)).item()
        accuracy = (output.argmax(dim=1) == label).sum().item()#; print("accuracy=", accuracy)
        accuracy_sum += (output.argmax(dim=1) == label).sum().item()#; print("accuracy_sum=", accuracy_sum)
        n += label.size(0)#; print("n=", n)
        print("Epoch[%s/%s],step[%s/%s] | loss=%s,accuracy=%s" % (epoch+1, num_epoch1, id_iter, len(train_iter), loss.item(), accuracy/label.size(0)))
    train_loss.append(loss_sum/n)
    train_accuracy.append(accuracy_sum / n)
    print("Epoch[%s/%s] | loss=%s,accuracy=%s" % (epoch + 1, num_epoch1, train_loss[epoch], train_accuracy[epoch]))



#----------------------------------------------------模型保存-------------------------------------------------------------#
if not os.path.exists("./googLeNet_Models"):
    os.mkdir("./googLeNet_Models")
torch.save(net, './googLeNet_Models/model.pth')       #模型保存
net = torch.load('./googLeNet_Models/model.pth', map_location=device)       #模型加载



#---------------------------------------------------测试----------------------------------------------------------------#
num_epoch2 = 20

test_loss = []
test_accuracy = []
with torch.no_grad():
    for epoch in np.arange(num_epoch2):
        net.eval()
        for name, parameters in net.named_parameters():
            print(name, ':', parameters)

        loss_sum = 0.0
        accuracy_sum = 0.0
        n = 0
        for id_iter, (feature, label) in enumerate(test_iter):
            feature, label = feature.to(device), label.to(device)
            output = net(feature)
            loss = loss_func(output, label); print("output=%s\nlabel=%s" % (output, label))
            loss_sum += (loss * label.size(0)).item()
            accuracy = (output.argmax(dim=1) == label).sum().item(); print("accuracy=", accuracy)
            accuracy_sum += (output.argmax(dim=1) == label).sum().item(); print("accuracy_sum=", accuracy_sum)
            n += label.size(0); print("n=", n)
            print("Epoch[%s/%s],step[%s/%s] | loss=%s,accuracy=%s" % (epoch+1, num_epoch2, id_iter, len(test_iter), loss.item(), accuracy/label.size(0)))
        test_loss.append(loss_sum/n)
        test_accuracy.append(accuracy_sum/n)
        print("Epoch[%s/%s] | loss=%s,accuracy=%s" % (epoch+1, num_epoch2, test_loss[epoch], test_accuracy[epoch]))


#---------------------------------------------------绘制图像----------------------------------------------------------#
fig = plt.figure(figsize=(16, 9))
fig.suptitle('Visualization')
x1 = np.arange(num_epoch1)
x2 = np.arange(num_epoch2)
ax = fig.add_subplot(1, 1, 1)
ax.set_title('googLeNet')
ax.set_xlabel('epoch')
ax.set_ylabel('loss_or_accuracy')
ax.plot(x1, train_loss, color='black', label='train_loss', marker='.')
ax.plot(x1, train_accuracy, color='black', label='train_accuracy', marker='*')
ax.plot(x2, test_loss, color='red', label='test_loss', marker='.')
ax.plot(x2, test_accuracy, color='red', label='test_accuracy', marker='*')
plt.legend()
plt.show()

卡了快两周了向大家请教

测试过超参数了没，我有空的话晚上跑一下源代码！

这篇博客: 【pytorch图像分类】GoogLeNet网络结构中的引言部分也许能够解决你的问题, 你可以仔细阅读以下内容或跳转源博客中阅读:
原文连接：GoogLeNet
为了提升网络的性能可以选择增加网络深度和宽度，问题：
- 导致带来大量的参数
- 较深的网络需要更多的数据，容易产生过拟合现象
- 增加网络的深度也容易带来梯度消失现象
·