模型训练的准确率一直才百分之五十多 怎么调参数

初学者,拿网上的代码跑自己的数据集,使用Resnet50预训练,调过学习率、batch_size、img_size的参数、换过优化器不过训练效果都差不多,还有什么办法提高准确率呢?

img

以下是训练代码:


import time
import torch
from torch import nn
from torch.utils.data import DataLoader
from PIL import Image
import torchvision.transforms as transforms
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from torchvision.models import resnet50  # ResNet系列
from torchvision import models
from torch.utils.data import Dataset

transform_BZ= transforms.Normalize(
    mean=[0.44660836, 0.30800995, 0.21277349],# 取决于数据集
    std=[0.18726136, 0.13087553, 0.09021662]
)


class LoadData(Dataset):
    def __init__(self, txt_path, train_flag=True):
        self.imgs_info = self.get_images(txt_path)
        self.train_flag = train_flag
        self.img_size = 512

        self.train_tf = transforms.Compose([
                transforms.Resize(self.img_size),
                transforms.RandomHorizontalFlip(),#对图片进行随机的水平翻转
                transforms.RandomVerticalFlip(),#随机的垂直翻转
                transforms.ToTensor(),#把图片改为Tensor格式
                transform_BZ#图片标准化的步骤
            ])
        self.val_tf = transforms.Compose([##简单把图片压缩了变成Tensor模式
                transforms.Resize(self.img_size),
                transforms.ToTensor(),
                transform_BZ#标准化操作
            ])

    def get_images(self, txt_path):
        with open(txt_path, 'r', encoding='utf-8') as f:
            imgs_info = f.readlines()
            imgs_info = list(map(lambda x:x.strip().split('\t'), imgs_info))
        return imgs_info#返回图片信息

    def padding_black(self, img):   # 如果尺寸太小可以扩充
        w, h  = img.size
        scale = self.img_size / max(w, h)
        img_fg = img.resize([int(x) for x in [w * scale, h * scale]])
        size_fg = img_fg.size
        size_bg = self.img_size
        img_bg = Image.new("RGB", (size_bg, size_bg))
        img_bg.paste(img_fg, ((size_bg - size_fg[0]) // 2,
                              (size_bg - size_fg[1]) // 2))
        img = img_bg
        return img

    def __getitem__(self, index):#返回真正想返回的东西
        img_path, label = self.imgs_info[index]
        img = Image.open(img_path)#打开图片
        img = img.convert('RGB')#转换为RGB 格式
        img = self.padding_black(img)
        if self.train_flag:
            img = self.train_tf(img)
        else:
            img = self.val_tf(img)
        label = int(label)

        return img, label

    def __len__(self):
        return len(self.imgs_info)

def WriteData(fname, *args):
    with open(fname, 'a+') as f:
        for data in args:
            f.write(str(data)+"\t")
        f.write("\n")


def train(dataloader, model, loss_fn, optimizer,device):
    size = len(dataloader.dataset)
    avg_loss = 0
    # 从数据加载器中读取batch(一次读取多少张,即批次数),X(图片数据),y(图片真实标签)。
    for batch, (X, y) in enumerate(dataloader):#固定格式:batch:第几批数据,不是批次大小,(X,y):数值用括号

        # print(size)
        # 将数据存到显卡
        X, y = X.to(device), y.to(device)
        # 得到预测的结果pred
        pred = model(X)
        loss = loss_fn(pred, y)
        avg_loss += loss
        # 反向传播,更新模型参数
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # 每训练10次,输出一次当前信息
        if batch % 10 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

    # 当一个epoch完了后返回平均 loss
    avg_loss /= size
    avg_loss = avg_loss.detach().cpu().numpy()
    return avg_loss


def validate(dataloader, model, loss_fn, device):
    size = len(dataloader.dataset)
    # 将模型转为验证模式
    model.eval()
    # 初始化test_loss 和 correct, 用来统计每次的误差
    test_loss, correct = 0, 0
    # 测试时模型参数不用更新,所以no_gard()
    # 非训练, 推理期用到
    with torch.no_grad():
        # 加载数据加载器,得到里面的X(图片数据)和y(真实标签)

        for X, y in dataloader:
            # 将数据转到GPU
            X, y = X.to(device), y.to(device)
            # 将图片传入到模型当中就,得到预测的值pred
            pred = model(X)
            # 计算预测值pred和真实值y的差距
            test_loss += loss_fn(pred, y).item()
            # 统计预测正确的个数(针对分类)
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= size
    correct /= size
    print(f"correct = {correct}, Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return correct, test_loss


if __name__=='__main__':
    batch_size = 16

    # # 给训练集和测试集分别创建一个数据集加载器
    train_data = LoadData("/content/drive/MyDrive/data/train2500.txt", True)
    valid_data = LoadData("/content/drive/MyDrive/data/test2500.txt", False)


    train_dataloader = DataLoader(dataset=train_data, num_workers=4, pin_memory=True, batch_size=batch_size, shuffle=True)
    valid_dataloader = DataLoader(dataset=valid_data, num_workers=4, pin_memory=True, batch_size=batch_size)

    # 如果显卡可用,则用显卡进行训练
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using {device} device")

    finetune_net = resnet50(num_classes=5).to(device)
    state_dict=torch.load("/content/drive/MyDrive/data/resnet50_best.pth",map_location='cpu')
    finetune_net.load_state_dict(state_dict)

    nn.init.xavier_normal_(finetune_net.fc.weight)

    parms_1x = [value for name, value in finetune_net.named_parameters()
                if name not in ["fc.weight", "fc.bias"]]
    # 最后一层10倍学习率
    parms_10x = [value for name, value in finetune_net.named_parameters()
                 if name in ["fc.weight", "fc.bias"]]

    finetune_net = finetune_net.to(device)
    # 定义损失函数,计算相差多少,交叉熵,
    loss_fn = nn.CrossEntropyLoss()

    # 定义优化器,用来训练时候优化模型参数,随机梯度下降法
    learning_rate = 1e-3
    optimizer = torch.optim.Adam([
        {
            'params': parms_1x
        },
        {
            'params': parms_10x,
            'lr': learning_rate * 10
        }], lr=learning_rate)

    epochs = 3
    loss_ = 10
    save_root = "/content/drive/MyDrive/data/"


    for t in range(epochs):
        print(f"Epoch {t + 1}\n-------------------------------")
        time_start = time.time()
        avg_loss = train(train_dataloader, finetune_net, loss_fn, optimizer, device)
        time_end = time.time()
        print(f"train time: {(time_end - time_start)}")

        val_accuracy, val_loss = validate(valid_dataloader, finetune_net,loss_fn, device)
        # 写入数据
        WriteData(save_root + "resnet50_3.txt",
                  "epoch", t,
                  "train_loss", avg_loss,
                  "val_loss", val_loss,
                  "val_accuracy", val_accuracy)
        if t % 5 == 0:
            torch.save(finetune_net.state_dict(), save_root + "resnet50_3_epoch" + str(t) + "_loss_" + str(avg_loss) + ".pth")
        torch.save(finetune_net.state_dict(), save_root + "resnet50_3_last.pth")
        if avg_loss < loss_:
            loss_ = avg_loss
            torch.save(finetune_net.state_dict(), save_root + "resnet50_3_best.pth")

尝试增加数据集数量或者做数据增强,如果背景太复杂多样化会影响准确率的。

不知道你这个问题是否已经解决, 如果还没有解决的话:
  • 以下回答来自chatgpt:

    我可以给您一些指导意见来帮助您提高训练效果:

    1. 数据预处理

    在使用预训练模型时,数据预处理非常重要。一般来说,您需要按照预训练模型的预处理方式处理数据。以Resnet50为例,您需要将输入图像缩放到224x224大小,并使用ImageNet的均值和标准差进行归一化。如果您没有这样做,可能会导致数据不兼容、过度拟合等问题。

    下面是一个数据预处理的示例代码:

    from tensorflow.keras.preprocessing.image import ImageDataGenerator
    
    data_generator = ImageDataGenerator(
        preprocessing_function=tf.keras.applications.resnet50.preprocess_input,
        validation_split=0.2)
    
    train_generator = data_generator.flow_from_directory(
        train_directory,
        target_size=(224, 224),
        batch_size=batch_size,
        shuffle=True,
        seed=42,
        class_mode='categorical',
        subset='training')
    
    validation_generator = data_generator.flow_from_directory(
        train_directory,
        target_size=(224, 224),
        batch_size=batch_size,
        shuffle=False,
        seed=42,
        class_mode='categorical',
        subset='validation')
    
    1. 数据增强

    数据增强是提高模型泛化能力和减轻过拟合的有效方式。您可以使用ImageDataGenerator来增加数据量。常用的数据增强操作有:随机旋转、翻转、平移、缩放等。

    下面是一个简单的数据增强示例代码:

    data_generator = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        preprocessing_function=tf.keras.applications.resnet50.preprocess_input,
        validation_split=0.2)
    
    train_generator = data_generator.flow_from_directory(
        train_directory,
        target_size=(224, 224),
        batch_size=batch_size,
        shuffle=True,
        seed=42,
        class_mode='categorical',
        subset='training')
    
    validation_generator = data_generator.flow_from_directory(
        train_directory,
        target_size=(224, 224),
        batch_size=batch_size,
        shuffle=False,
        seed=42,
        class_mode='categorical',
        subset='validation')
    
    1. Fine-tuning

    Fine-tuning是指冻结预训练模型的前几层,仅训练新添加的全连接层。一般来说,越靠近输入数据的层表示的特征越简单,越靠近输出层的层表示的特征越复杂。如果您学习率设置过大,可能会导致前几层的特征被破坏,因此您可能需要使用小学习率,并逐渐逐步解冻更多的卷积层。

    下面是一个Fine-tuning的示例代码:

    # Load the ResNet50 model without the top layers and freeze them
    base_model = tf.keras.applications.ResNet50(
        include_top=False, weights='imagenet')
    
    for layer in base_model.layers:
        layer.trainable = False
    
    # Add a global average pooling layer and a dense output layer
    x = base_model.output
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(num_classes, activation='softmax')(x)
    
    # Create a new model
    model = tf.keras.models.Model(inputs=base_model.input, outputs=x)
    
    # Compile the model and create a data generator
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    # Start with smaller learning rate for fine-tuning
    model.fit(train_generator,
              epochs=5,
              validation_data=validation_generator,
              callbacks=[reduce_lr])
    
    # Unfreeze the layers and retrain with a smaller learning rate
    for layer in base_model.layers:
        layer.trainable = True
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    model.fit(train_generator,
              epochs=10,
              validation_data=validation_generator,
              callbacks=[reduce_lr])
    
    1. 尝试其他预训练模型

    如果您无法提高Resnet50的准确率,可以尝试使用其他更适合您数据集的预训练模型,如InceptionV3, MobileNet等。不同的模型适用于不同的任务,因此您可能需要根据自己的数据和任务选择适合的模型。

    1. 进行超参数搜索

    您可以使用tf.keras.tuner库进行自动超参数搜索,以找到最佳超参数组合来提高模型的准确率。您可以定义搜索空间,并执行自动搜索,直到找到最优的超参数组合。下面是一个简单的超参数搜索示例:

    from keras_tuner import HyperModel
    
    class MyHyperModel(HyperModel):
        def __init__(self, num_classes):
            self.num_classes = num_classes
            self.base_model = tf.keras.applications.ResNet50(
                include_top=False, weights='imagenet')
    
        def build(self, hp):
            # Hyperparameters for fine-tuning
            learning_rate = hp.Float('learning_rate', min_value=1e-6, max_value=1e-4)
            dropout_rate = hp.Float('dropout_rate', min_value=0.0, max_value=0.5)
    
            for layer in self.base_model.layers:
                layer.trainable = False
    
            x = self.base_model.output
            x = tf.keras.layers.GlobalAveragePooling2D()(x)
            x = tf.keras.layers.Dense(256, activation=hp.Choice('dense_activation', values=['relu', 'tanh', 'sigmoid']))(x)
            x = tf.keras.layers.Dropout(dropout_rate)(x)
            x = tf.keras.layers.Dense(self.num_classes, activation='softmax')(x)
    
            model = tf.keras.models.Model(inputs=self.base_model.input, outputs=x)
    
            model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])
    
            return model
    
    hp = kt.HyperParameters()
    tuner = kt.RandomSearch(
        MyHyperModel(num_classes),
        objective='val_accuracy',
        seed=42,
        max_trials=10,
        hyperparameters=hp)
    
    tuner.search(train_generator,
                 epochs=10,
                 validation_data=validation_generator)
    
    best_model = tuner.get_best_models(num_models=1)[0]
    

如果你已经解决了该问题, 非常希望你能够分享一下解决方案, 写成博客, 将相关链接放在评论区, 以帮助更多的人 ^-^