网络在测试集上进行测试误差和准确率一直

在用Pytorch进行LeNet网络测试,使用的数据集是Mnist,训练集上误差是下降的,但测试集上时误差和准确率一直不变是什么原因导致的?

import torch
from torch import nn
from LeNet5 import LeNet5
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LeNet5().to(device)
print(model)

test_data = datasets.MNIST(
    root='./data',
    train=False,
    download=True,
    transform=ToTensor())

batch_size = 256
test_dataloader = DataLoader(dataset=test_data, batch_size=batch_size)

loss_fn = nn.CrossEntropyLoss().to(device)
size = len(test_dataloader.dataset)
print("size=%d", size)

def test(epoch):
    # 设置为验证状态
    model.eval()
    test_loss = 0.0
    correct = 0.0
    # 不设置梯度
    with torch.no_grad():
        for x, y in test_dataloader:
            x, y = x.to(device), y.to(device)
            pred = model(x)
            loss = loss_fn(pred, y)
            # item()方法把字典中每对key和value组成一个元组,并把这些元组放在列表中返回。
            test_loss = test_loss + loss.item()
            accuracy = (pred.argmax(dim=1) == y).sum().item()
            correct = correct + accuracy
    # 计算平均损失
    test_loss /= size
    correct /= size
    print(f'Test Error: Accuracy: {(100 * correct):}%, Average loss: {test_loss:}\n')

if __name__ == '__main__':
    for epoch in range(10):
        test(epoch)

img

换个代码实验cnn与minist数据集吧

下面代码是我写的,亲测可用。你可以改成torch自带的LeNet模型

import torch
import torchvision
from torch.autograd import Variable
import torch.utils.data.dataloader as Data
 
train_data = torchvision.datasets.MNIST(
    './mnist', train=True, transform=torchvision.transforms.ToTensor(), download=True
)
test_data = torchvision.datasets.MNIST(
    './mnist', train=False, transform=torchvision.transforms.ToTensor()
)
print("train_data:", train_data.train_data.size())
print("train_labels:", train_data.train_labels.size())
print("test_data:", test_data.test_data.size())
 
train_loader = Data.DataLoader(dataset=train_data, batch_size=256, shuffle=True)
test_loader = Data.DataLoader(dataset=test_data, batch_size=256)
 
 
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, 3, 1, 1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2))
        self.conv2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, 3, 1, 1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2)
        )
        self.conv3 = torch.nn.Sequential(
            torch.nn.Conv2d(64, 64, 3, 1, 1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2)
        )
        self.dense = torch.nn.Sequential(
            torch.nn.Linear(64, 32),
            torch.nn.ReLU(),
            torch.nn.Linear(32, 10)
        )
 
    def forward(self, x):
        conv1_out = self.conv1(x)
        conv2_out = self.conv2(conv1_out)
        conv3_out = self.conv3(conv2_out)
        #自适应池化, w,h都输出为1 得到全局平均池化
        res = torch.nn.functional.adaptive_avg_pool2d(conv3_out, (1, 1))
        #扁平化
        res = res.view(res.size(0), -1)
        out = self.dense(res)
        return out
 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = Net().to(device)
print(model)
optimizer = torch.optim.Adam(model.parameters())
loss_func = torch.nn.CrossEntropyLoss()
 
 
def train():
    train_loss = 0.
    train_acc = 0.
    for batch_x, batch_y in train_loader:
        batch_x, batch_y = Variable(batch_x).to(device), Variable(batch_y).to(device)
        # print(batch_x.shape,batch_y.shape)
        optimizer.zero_grad()  # 梯度置0
        out = model(batch_x)  # 前向传播
        loss = loss_func(out, batch_y)  # 计算loss
        loss.backward()  # 返向传播
        optimizer.step()  # 优化器计步
 
        # ------计算loss,acc
        train_loss += loss.item()
        # torch.max(out, 1) 指第一维最大值,返回[最大值,最大值索引]
        pred = torch.max(out, 1)[1]
        train_correct = (pred == batch_y).sum()
        train_acc += train_correct.item()
    print('Train Loss: {:.6f}, Acc: {:.6f}'.format(train_loss / (len(
        train_data)), train_acc / (len(train_data))))
 
 
def eval():
    model.eval()
    eval_loss = 0.
    eval_acc = 0.
    for batch_x, batch_y in test_loader:
        # 测试阶段不需要保存梯度信息
        with torch.no_grad():
            batch_x, batch_y = Variable(batch_x).to(device), Variable(batch_y).to(device)
 
            out = model(batch_x)
            loss = loss_func(out, batch_y)
            eval_loss += loss.item()
            pred = torch.max(out, 1)[1]
            num_correct = (pred == batch_y).sum()
            eval_acc += num_correct.item()
    print('Test Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / (len(
        test_data)), eval_acc / (len(test_data))))
 
 
for epoch in range(10):
    print('epoch {}'.format(epoch + 1))
    # training-----------------------------
    train()
    # evaluation--------------------------------
    eval()