用pytorch写了图像分类网络,但是模型训练的时候loss固定,acc固定

用自己做的数据集做一个图像分类任务,但是训练的时候模型的loss和acc一直不变

问题相关代码,请勿粘贴截图

class dp_cnn_torch(object):
def init(self, train_loader, test_loader):
self.train_loader = train_loader
self.test_loader = test_loader
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    self.model = self.dp_cnn_model()
    optimizer = torch.optim.Adam(self.model.parameters(), lr=0.05)

    epoch = 0
    best_acc = 0
    bad_count = 0
    best_epoch = 0
    patient_count = 10
    privacy_model_save_path = './trained_privacy_model/dp_cnn_torch.tar'
    while True:
        # for epoch in range(100):
        self.dp_cnn_train(self.model, self.train_loader, optimizer, epoch, self.device, delta=1e-5)
        current_acc = self.dp_cnn_test(self.model, self.test_loader, epoch, self.device)
        # early stop
        if current_acc > best_acc:
            best_acc = current_acc
            bad_count = 0
            best_epoch = epoch
            torch.save(self.model, privacy_model_save_path)
        else:
            bad_count += 1

        if bad_count != 0:
            print("The last {0} epochs have not shown improvements on the validation set.".format(bad_count))
        if bad_count > patient_count:
            if best_acc < 0.90:
                continue
            else:
                print("Early Stop!")
                print("best_acc:{}, best_epoch:{}".format(best_acc, best_epoch))
                print("dp_model finished!")
                break

        epoch += 1

def dp_cnn_model(self):
    model = torch.nn.Sequential(
        torch.nn.Conv2d(3, 16, kernel_size=8, stride=2, padding=3),
        torch.nn.ReLU(),
        torch.nn.MaxPool2d(2, 1),
        torch.nn.Conv2d(16, 32, 4, 2),
        torch.nn.ReLU(),
        torch.nn.MaxPool2d(2, 1),
        torch.nn.Flatten(),
        torch.nn.Linear(32 * 4 * 4, 32),
        torch.nn.Sigmoid(),
        torch.nn.Linear(32, 2)
    )

    print(model)

    return model


def dp_cnn_train(self, model, train_loader, optimizer, epoch, device, delta):
    model.train()
    criterion = torch.nn.CrossEntropyLoss()
    losses = []
    for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())

    epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent(delta)

    print(
        f"Train Epoch:{epoch} \t"
        f"Loss:{np.mean(losses):.6f}"
        f"(epsilon = {epsilon:.2f}, delta={delta} for alpha = {best_alpha})"
    )


def dp_cnn_test(self, model, test_loader, epoch, device):
    model.eval()
    with torch.no_grad():
        # test
        total_correct = 0
        total_num = 0
        label_error = 0
        label_zero = 0
        label_one = 0
        label_two = 0
        for batch_idx, (x, label) in enumerate(test_loader):
            # [b,3,32,32]
            # [b]
            x, label = x.to(device), label.to(device)

            # [b,10]
            logits = model(x)
            # [b]
            pred = logits.argmax(dim=1)
            # [b] vs [b] => scalar tensor
            if torch.eq(pred, label).float().sum().item():
                pass
            else:
                label_error += 1
                if label.item() == 0:
                    label_zero += 1
                elif label.item() == 1:
                    label_one += 1
                else:
                    label_two += 1
            total_correct += torch.eq(pred, label).float().sum().item()
            total_num += x.size(0)

        acc = total_correct / total_num
        print(
            f"Test Epoch:{epoch} \t"
            f"accuracy:{acc:.6f} \n"
            f"label_error:{label_error} \t"
            f"label_zero:{label_zero} \t"
            f"label_one:{label_one} \t"
            f"label_two:{label_two} \t"
        )
        return acc

if name == 'main':
dp_train_path = 'privacy_data/raw_data/dp_train.txt'
dp_test_path = 'privacy_data/raw_data/dp_test.txt'

dp_train_data = LoadData(dp_train_path, True)
dp_test_data = LoadData(dp_test_path, False)

dp_train_loader = torch.utils.data.DataLoader(dataset=dp_train_data,
                                              batch_size=8,
                                              shuffle=True)
dp_test_loader = torch.utils.data.DataLoader(dataset=dp_test_data,
                                             batch_size=1,
                                             shuffle=True)
dp_cnn_torch(dp_train_loader, dp_test_loader)
运行结果及报错内容

img

我的解答思路和尝试过的方法

一开始检查可能是标签与图像不匹配,但是检查之后发现还是这样;
又检查了各类数据数量是否均衡,处理完之后仍没有改善;
再次检查后发现模型只能识别一类,其他类识别均错误。改成二分类也只能识别一个类,好像把所有图像都归到了一个类里面

我想要达到的结果

求各位指点。

你先调整一下学习率,看看loss和acc有没有改动
如果没有的话,就是模型和数据的问题

img


你这里全连接后输出就是二分类,二分类就只能识别是否,也就是是这个物体和不是这个物体。

模型合并的时候出现的问题

loss不变,acc不变,那就是训练完成了。acc不高的可能性是,网络不适用于此类数据集的分类,更换别的模型看看,或者说两种类别的特征差别很小,网络没有办法区分开来。

看了一下你的模型代码和结果,发现你只训练了3个epoch就结束,明显是不正确的。
建议修改训练的代码,先指定epoch,不要使用最后两轮没改变就结束训练这种方法,因为深度学习训练很多时候是随机的,一旦陷入一个梯度没超过,结果就是不变。
dp_cnn_torch里面这段代码修改为:

    for epoch in range(100):
        self.dp_cnn_train(self.model, self.train_loader, optimizer, epoch, self.device, delta=1e-5)
        current_acc = self.dp_cnn_test(self.model, self.test_loader, epoch, self.device)
        # early stop
        if current_acc > best_acc:
            best_acc = current_acc
            bad_count = 0
            best_epoch = epoch
            torch.save(self.model, privacy_model_save_path)
 
        if best_acc   < 0.90:
            continue
        else:
            print("Early Stop!")
            print("best_acc:{}, best_epoch:{}".format(best_acc, best_epoch))
            print("dp_model finished!")
 

你的这个很可能是过拟合了,你的数据集会不会太少了,或者特征不够,我以前做过一个损失和精度直接不变,后面发现只有3个特征,增加特征数量就解决了

几种可能性:学习率太高,数据集太少,这些都会导致过拟合。还有可能你计算错了