基于pytorch编写的googLeNet,使用CIFAR-10数据集训练
训练集loss、acc均正常。测试集的loss也和训练集的loss差不多,但测试集的acc基本等于训练集第一次初始化时的acc非常低。
代码结果如下图:
训练代码:
num_epoch1 = 20
train_loss = []
train_accuracy = []
for epoch in np.arange(num_epoch1):
torch.cuda.empty_cache() #释放显存
net.train()
loss_sum = 0.0
accuracy_sum = 0.0
n = 0
for id_iter, (feature, label) in enumerate(train_iter):
feature, label = feature.to(device), label.to(device)
optimizer.zero_grad()
logits, aux_logits1, aux_logits2 = net.forward(feature)
output = 0.6 * logits + 0.2 * aux_logits1 + 0.2 * aux_logits2#; print("output=%s\nlabel=%s" %(output, label))
loss0 = loss_func(logits, label)
loss1 = loss_func(aux_logits1, label)
loss2 = loss_func(aux_logits2, label)
loss = loss0 + 0.3 * loss1 + 0.3 * loss2
loss.backward()
optimizer.step()
loss_sum += (loss * label.size(0)).item()
accuracy = (output.argmax(dim=1) == label).sum().item()#; print("accuracy=", accuracy)
accuracy_sum += (output.argmax(dim=1) == label).sum().item()#; print("accuracy_sum=", accuracy_sum)
n += label.size(0)#; print("n=", n)
print("Epoch[%s/%s],step[%s/%s] | loss=%s,accuracy=%s" % (epoch+1, num_epoch1, id_iter, len(train_iter), loss.item(), accuracy/label.size(0)))
train_loss.append(loss_sum/n)
train_accuracy.append(accuracy_sum / n)
print("Epoch[%s/%s] | loss=%s,accuracy=%s" % (epoch + 1, num_epoch1, train_loss[epoch], train_accuracy[epoch]))
测试代码:
num_epoch2 = 20
test_loss = []
test_accuracy = []
with torch.no_grad():
for epoch in np.arange(num_epoch2):
net.eval()
loss_sum = 0.0
accuracy_sum = 0.0
n = 0
for id_iter, (feature, label) in enumerate(test_iter):
feature, label = feature.to(device), label.to(device)
output = net(feature)
loss = loss_func(output, label); print("output=%s\nlabel=%s" % (output, label))
loss_sum += (loss * label.size(0)).item()
accuracy = (output.argmax(dim=1) == label).sum().item(); print("accuracy=", accuracy)
accuracy_sum += (output.argmax(dim=1) == label).sum().item(); print("accuracy_sum=", accuracy_sum)
n += label.size(0); print("n=", n)
print("Epoch[%s/%s],step[%s/%s] | loss=%s,accuracy=%s" % (epoch+1, num_epoch2, id_iter, len(test_iter), loss.item(), accuracy/label.size(0)))
test_loss.append(loss_sum/n)
test_accuracy.append(accuracy_sum/n)
print("Epoch[%s/%s] | loss=%s,accuracy=%s" % (epoch+1, num_epoch2, test_loss[epoch], test_accuracy[epoch]))
打印测试集输出发现很不对劲,但不知道问题出在哪:
output=tensor([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]], device='cuda:0')
label=tensor([1, 6, 9, 6, 4, 0, 6, 1, 9, 5, 4, 8, 5, 3, 7, 2, 8, 1, 9, 0, 7, 8, 7, 5,
3, 3, 2, 7, 3, 2, 0, 3, 4, 5, 0, 8, 8, 3, 0, 3, 3, 4, 9, 3, 8, 1, 3, 1,
2, 5, 3, 4, 7, 0, 4, 0, 6, 1, 7, 1, 4, 0, 6, 2], device='cuda:0')
accuracy= 7
accuracy_sum= 7.0
n= 64
完整代码:
import os
import gc
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.utils.data as Data
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
#----------------------------------------------------------------------------------------------------------------------#
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('device=', device)
print(torch.cuda.is_available())
#-------------------------------------------------数据处理--------------------------------------------------------------#
train_trans = torchvision.transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(0.3), #依据概率p对PIL图片进行水平翻转
transforms.RandomCrop(224), #依据给定的size随机裁剪
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
test_trans = torchvision.transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
#CIFAR10数据值图片为(3*32*32),标签为0-9共10类;训练集50000张,测试集10000张
train_datasets = torchvision.datasets.CIFAR10('./Datasets/CIFAR', train=True, download=True, transform=train_trans)
test_datasets = torchvision.datasets.CIFAR10('./Datasets/CIFAR', train=False, download=True, transform=test_trans)
#取训练集5000张,测试集1000张
train_X = []
train_y = []
for i in range(5000):
X = train_datasets[i][0]
train_X.append(X)
y = train_datasets[i][1]
train_y.append(y)
train_datasets = [(X, y) for X, y in zip(train_X, train_y)]
test_X = []
test_y = []
for i in range(1000):
X = test_datasets[i][0]
test_X.append(X)
y = test_datasets[i][1]
test_y.append(y)
test_datasets = [(X, y) for X, y in zip(test_X, test_y)]
batch_size = 64
train_iter = Data.DataLoader(train_datasets, batch_size=batch_size, shuffle=True, num_workers=0)
test_iter = Data.DataLoader(test_datasets, batch_size=batch_size, shuffle=True, num_workers=0)
#检查样本数据大小
# for i, (X, y) in enumerate(train_iter):
# print("i=%d | X.size()=%s, y.size()=%s" % (i, X.size(), y.size())) #X.size()=torch.Size([256, 3, 224, 224]), y.size()=torch.Size([256])
# print("i=%d | X=%s\n y=%s" % (i, X, y))
#------------------------------------------------网络构建---------------------------------------------------------------#
class Inception(nn.Module):
def __init__(self, in_channels, c1, c2, c3, c4): #c1-c4为四个pass的outchannel数,其中c2-c4为元组;kwargs是可变关键词参数
super(Inception, self).__init__()
self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
self.p4_1 = nn.MaxPool2d(kernel_size=3, padding=1, stride=1)
self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)
def forward(self, x):
p1 = F.relu(self.p1_1(x))
p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
p4 = F.relu(self.p4_2(F.relu(self.p4_1(x))))
#print("p1.size()=%s,p2.size()=%s,p3.size()=%s,p4.size()=%s" % (p1.size(), p2.size(), p3.size(), p4.size()))
return torch.cat([p1, p2, p3, p4], dim=1) #在dim=1方向上,即通道数方向上连接张量
class InceptionAux(nn.Module):
def __init__(self, in_channel, num_class):
super(InceptionAux, self).__init__()
self.avg_pool = nn.AvgPool2d(kernel_size=5, stride=3) #output(256,512,4,4) (256,528,4,4)
self.conv = nn.Conv2d(in_channel, 128, kernel_size=1) #output(256,128,4,4)
self.fc1 = nn.Linear(2048, 1024) #output(256,1024)
self.fc2 = nn.Linear(1024, num_class) #output(256,10)
def forward(self, x):
x = self.avg_pool(x)
x = self.conv(x)
x = torch.flatten(x, 1) #output(256,2048)
x = self.fc1(x)
x = F.relu(x, inplace=True)
x = self.fc2(x)
return x
class GoogLeNet(nn.Module):
def __init__(self, num_class, aux_logits=False, init_weights=False): #aux_logits:是否使用辅助分类器(训练的时候为True, 验证的时候为False)
super(GoogLeNet, self).__init__()
self.aux_logits = aux_logits
#input(3,224,224)
self.b1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3), #output(256,64,112,112)
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1) #output(256,64,56,56)
)
self.b2 = nn.Sequential(
nn.Conv2d(64, 64, kernel_size=1), #output(256,64,56,56)
nn.ReLU(),
nn.Conv2d(64, 192, kernel_size=3, padding=1), #output(256,192,56,56)
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1) #output(256,192,28,28)
)
self.b3 = nn.Sequential(
Inception(192, 64, (96, 128), (16, 32), 32), #output(256,256,28,28)
Inception(256, 128, (128, 192), (32, 96), 64), #output(256,480,28,28)
nn.MaxPool2d(kernel_size=3, stride=2, padding=1) #output(256,480,14,14)
)
self.b4_a = Inception(480, 192, (96, 208), (16, 48), 64) #output(256,512,14,14)
if aux_logits:
self.b4_a_aux = InceptionAux(512, num_class)
self.b4_b = Inception(512, 160, (112, 224), (24, 64), 64) #output(256,512,14,14)
self.b4_c = Inception(512, 128, (128, 256), (24, 64), 64) #output(256,512,14,14)
self.b4_d = Inception(512, 112, (144, 288), (32, 64), 64) #output(256,528,14,14)
if aux_logits:
self.b4_d_aux = InceptionAux(528, num_class)
self.b4_e = Inception(528, 256, (160, 320), (32, 128), 128) #output(256,832,14,14)
self.max = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) #output(256,832,7,7)
self.b5 = nn.Sequential(
Inception(832, 256, (160, 320), (32, 128), 128), #output(256,832,7,7)
Inception(832, 384, (192, 384), (48, 128), 128), #输出通道为1024 #output(256,1024,7,7)
nn.AdaptiveAvgPool2d((1, 1)), #自适应平均池化,参数为输出(H,W);将1024个通道的二维张量均变为1*1 #output(256,1024,1,1)
nn.Flatten(1) #降维函数,将张量按指定维度展开 #output(256,1024)
)
self.fc = nn.Sequential(
nn.Linear(1024, num_class), #output(256,10)
nn.Softmax(dim=1) #output(256,10)
)
if init_weights: #是否初始化参数
self._initialize_weights()
def forward(self, x):
x = self.b1(x)
x = self.b2(x)
x = self.b3(x)
x = self.b4_a(x)
if self.training and self.aux_logits:
aux1 = self.b4_a_aux(x)
x = self.b4_b(x)
x = self.b4_c(x)
x = self.b4_d(x)
if self.training and self.aux_logits:
aux2 = self.b4_d_aux(x)
x = self.b4_e(x)
x = self.max(x)
x = self.b5(x)
x = self.fc(x)
if self.training and self.aux_logits:
return x, aux1, aux2
return x
#初始化参数函数
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
torch.nn.init.constant_(m.bias, 0)
elif isinstance(m, torch.nn.Linear):
torch.nn.init.normal_(m.weight, 0, 0.01)
torch.nn.init.constant_(m.bias, 0)
#------------------------------------------------实例化模型-------------------------------------------------------------#
net = GoogLeNet(10, True, True)
# for i in net.children():
# print(i)
net.to(device)
print(net)
# X = torch.rand((256, 3, 224, 224), device=device) #需要注释掉InceptionAux不然报错
# for blk in net.children():
# X = blk(X)
# print(blk.__class__.__name__, 'output.size()=', X.size())
#学习准则:经验风险最小化
loss_func = nn.CrossEntropyLoss()
#优化算法:小批量梯度下降法
optimizer = optim.Adam(net.parameters(), lr=0.0005)
#---------------------------------------------------训练----------------------------------------------------------------#
gc.collect() #垃圾回收,占用内存较多时可以选择调用
num_epoch1 = 30
train_loss = []
train_accuracy = []
for epoch in np.arange(num_epoch1):
torch.cuda.empty_cache() #释放显存
net.train()
for name, parameters in net.named_parameters():
print(name, ':', parameters)
loss_sum = 0.0
accuracy_sum = 0.0
n = 0
for id_iter, (feature, label) in enumerate(train_iter):
feature, label = feature.to(device), label.to(device)
optimizer.zero_grad()
logits, aux_logits1, aux_logits2 = net.forward(feature)
output = 0.6 * logits + 0.2 * aux_logits1 + 0.2 * aux_logits2#; print("output=%s\nlabel=%s" %(output, label))
loss0 = loss_func(logits, label)
loss1 = loss_func(aux_logits1, label)
loss2 = loss_func(aux_logits2, label)
loss = loss0 + 0.3 * loss1 + 0.3 * loss2
loss.backward()
optimizer.step()
loss_sum += (loss * label.size(0)).item()
accuracy = (output.argmax(dim=1) == label).sum().item()#; print("accuracy=", accuracy)
accuracy_sum += (output.argmax(dim=1) == label).sum().item()#; print("accuracy_sum=", accuracy_sum)
n += label.size(0)#; print("n=", n)
print("Epoch[%s/%s],step[%s/%s] | loss=%s,accuracy=%s" % (epoch+1, num_epoch1, id_iter, len(train_iter), loss.item(), accuracy/label.size(0)))
train_loss.append(loss_sum/n)
train_accuracy.append(accuracy_sum / n)
print("Epoch[%s/%s] | loss=%s,accuracy=%s" % (epoch + 1, num_epoch1, train_loss[epoch], train_accuracy[epoch]))
#----------------------------------------------------模型保存-------------------------------------------------------------#
if not os.path.exists("./googLeNet_Models"):
os.mkdir("./googLeNet_Models")
torch.save(net, './googLeNet_Models/model.pth') #模型保存
net = torch.load('./googLeNet_Models/model.pth', map_location=device) #模型加载
#---------------------------------------------------测试----------------------------------------------------------------#
num_epoch2 = 20
test_loss = []
test_accuracy = []
with torch.no_grad():
for epoch in np.arange(num_epoch2):
net.eval()
for name, parameters in net.named_parameters():
print(name, ':', parameters)
loss_sum = 0.0
accuracy_sum = 0.0
n = 0
for id_iter, (feature, label) in enumerate(test_iter):
feature, label = feature.to(device), label.to(device)
output = net(feature)
loss = loss_func(output, label); print("output=%s\nlabel=%s" % (output, label))
loss_sum += (loss * label.size(0)).item()
accuracy = (output.argmax(dim=1) == label).sum().item(); print("accuracy=", accuracy)
accuracy_sum += (output.argmax(dim=1) == label).sum().item(); print("accuracy_sum=", accuracy_sum)
n += label.size(0); print("n=", n)
print("Epoch[%s/%s],step[%s/%s] | loss=%s,accuracy=%s" % (epoch+1, num_epoch2, id_iter, len(test_iter), loss.item(), accuracy/label.size(0)))
test_loss.append(loss_sum/n)
test_accuracy.append(accuracy_sum/n)
print("Epoch[%s/%s] | loss=%s,accuracy=%s" % (epoch+1, num_epoch2, test_loss[epoch], test_accuracy[epoch]))
#---------------------------------------------------绘制图像----------------------------------------------------------#
fig = plt.figure(figsize=(16, 9))
fig.suptitle('Visualization')
x1 = np.arange(num_epoch1)
x2 = np.arange(num_epoch2)
ax = fig.add_subplot(1, 1, 1)
ax.set_title('googLeNet')
ax.set_xlabel('epoch')
ax.set_ylabel('loss_or_accuracy')
ax.plot(x1, train_loss, color='black', label='train_loss', marker='.')
ax.plot(x1, train_accuracy, color='black', label='train_accuracy', marker='*')
ax.plot(x2, test_loss, color='red', label='test_loss', marker='.')
ax.plot(x2, test_accuracy, color='red', label='test_accuracy', marker='*')
plt.legend()
plt.show()
卡了快两周了向大家请教
测试过超参数了没,我有空的话晚上跑一下源代码!
原文连接:GoogLeNet
为了提升网络的性能可以选择增加网络深度和宽度,问题:
导致带来大量的参数
较深的网络需要更多的数据,容易产生过拟合现象
增加网络的深度 也容易带来梯度消失现象
·