最近在改进一个残差网络想往里面加入注意力机制,结果调了半天这个矩阵纬度还是不匹配,没办法来问问大伙了。
代码如下:
import os
import numpy as np
import torch.nn as nn
import torchvision
from PIL import Image
import torch
import torch.nn.functional as F
from matplotlib import pyplot as plt
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms
from center_loss import CenterLoss
transformtrain = transforms.Compose([
transforms.Resize((256, 256)), # increase image size for better attention mechanism
# data augmentation
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomVerticalFlip(p=0.5),
transforms.RandomRotation(degrees=(-30, 30)),
transforms.ToTensor(),
])
class MyData_train(Dataset):
def __init__(self, img_path, transform=None):
super(MyData_train, self).__init__()
self.root = img_path
self.txt_root = 'train.txt'
f = open(self.txt_root, 'r')
data = f.readlines()
imgs = []
labels = []
for line in data:
line = line.rstrip()
word = line.split()
imgs.append(os.path.join(self.root, word[0], word[1]))
labels.append(word[2])
self.img = imgs
self.label = labels
self.transform = transform
def __len__(self):
return len(self.label)
def __getitem__(self, item):
img = self.img[item]
label = self.label[item]
img = Image.open(img).convert('RGB')
if self.transform is not None:
img = self.transform(img)
label = np.array(label).astype(np.int64)
label = torch.from_numpy(label)
return img, label
path = 'Train'
dataset = MyData_train(path, transformtrain)
train_loader = DataLoader(dataset=dataset, batch_size=16, shuffle=True, drop_last=True)
# define attention module
class Attention(nn.Module):
def __init__(self, in_channels=512, out_channels=512, kernel_size=3):
super(Attention, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.conv1 = nn.Conv2d(in_channels, in_channels, kernel_size, padding=(kernel_size - 1) // 2)
self.bn1 = nn.BatchNorm2d(in_channels)
self.conv2 = nn.Conv2d(in_channels, in_channels, kernel_size, padding=(kernel_size - 1) // 2)
self.bn2 = nn.BatchNorm2d(in_channels)
self.conv3 = nn.Conv2d(in_channels, in_channels, kernel_size, padding=(kernel_size - 1) // 2)
self.bn3 = nn.BatchNorm2d(in_channels)
self.conv4 = nn.Conv2d(in_channels, in_channels, kernel_size, padding=(kernel_size - 1) // 2)
self.bn4 = nn.BatchNorm2d(in_channels)
self.fc = nn.Linear(in_channels, out_channels)
def forward(self, x):
h = F.relu(self.bn1(self.conv1(x)))
h = F.relu(self.bn2(self.conv2(h)))
h = F.relu(self.bn3(self.conv3(h)))
h = F.relu(self.bn4(self.conv4(h)))
h = h.view(h.size(0), h.size(1))
h = self.fc(h)
r = F.softmax(h, dim=1)
r = r.view(r.size(0), r.size(1), 1, 1)
a = (r * x).sum(dim=1)
return a
transformtest = transforms.Compose([
transforms.Resize((256, 256)), # increase image size for better attention mechanism
transforms.ToTensor()
])
class MyData_test(Dataset):
def __init__(self, img_path, transform=None):
super(MyData_test, self).__init__()
self.root = img_path
self.txt_root = 'test.txt'
f = open(self.txt_root, 'r')
data = f.readlines()
imgs = []
labels = []
for line in data:
line = line.rstrip()
word = line.split()
imgs.append(os.path.join(self.root, word[0], word[1]))
labels.append(word[2])
self.img = imgs
self.label = labels
self.transform = transform
def __len__(self):
return len(self.label)
def __getitem__(self, item):
img = self.img[item]
label = self.label[item]
img = Image.open(img).convert('RGB')
if self.transform is not None:
img = self.transform(img)
label = np.array(label).astype(np.int64)
label = torch.from_numpy(label)
return img, label
path = 'Test'
dataset_test = MyData_test(path, transformtest)
test_loader = DataLoader(dataset=dataset_test, batch_size=1, shuffle=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torchvision.models.resnet50(pretrained=True)
# define custom classifier head with attention mechanism
class my_Module(nn.Module):
def __init__(self):
super(my_Module, self).__init__()
self.backbone = model
self.fc1 = nn.Linear(2048, 512)
self.attention = Attention(in_channels=512)
self.fc2 = nn.Linear(512, 23, bias=True)
def forward(self, x):
x = self.backbone.conv1(x)
x = self.backbone.bn1(x)
x = self.backbone.relu(x)
x = self.backbone.maxpool(x)
x = self.backbone.layer1(x)
x = self.backbone.layer2(x)
x = self.backbone.layer3(x)
x = self.backbone.layer4(x)
x = self.backbone.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
#print("x shape before attention:", x.shape)
# apply attention module to x instead of whole model output
x = x.view(x.size(0), x.size(1), 1, 1)
x = self.attention(x)
x = self.fc2(x)
return x
model1 = my_Module()
print(model1)
model1.to(device)
criterion_xent = torch.nn.CrossEntropyLoss()
criterion_cent = CenterLoss(num_classes=23, feat_dim=2048, use_gpu=True)
optimizer_model = torch.optim.SGD(model.parameters(), lr=0.001)
optimizer_centloss = torch.optim.SGD(criterion_cent.parameters(), lr=0.0001)
def train(epoch):
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
input, labels = data
input, labels = input.to(device), labels.to(device)
features, outputs = model1(input)
loss_xent = criterion_xent(outputs, labels)
loss_cent = criterion_cent(features, labels)
loss_cent *= 1
loss = loss_xent + 0.01 * loss_cent
optimizer_model.zero_grad()
optimizer_centloss.zero_grad()
loss.backward()
optimizer_model.step()
for param in criterion_cent.parameters():
param.grad.data *= (1. / 0.01)
optimizer_centloss.step()
running_loss += loss.data
if (i % 20 == 19):
print("epoch:{0} {1} loss:{2}".format(epoch + 1, i + 1, running_loss / 20))
running_loss = 0.0
def test(epoch):
correct = 0
total = 0
with torch.no_grad():
for _, data in enumerate(test_loader, 0):
input, labels = data
input, labels = input.to(device), labels.to(device)
_, y_pred = model1(input)
_, max_pos = torch.max(y_pred.data, dim=1)
total += labels.size(0)
correct += (max_pos == labels).sum().item()
print('Accuracy on test set:{}%'.format(100 * correct / total))
yy[epoch] = 100 * correct / total
sum_epoch = 250
yy = np.zeros(sum_epoch, )
for epoch in range(sum_epoch):
train(epoch)
model1.eval()
test(epoch)
print()
torch.save(model1, 'resnet50_250_0001_centerloss_attention.pkl')
np.save('center_loss_001_attention.npy', yy)
运行结果如下:
Traceback (most recent call last):
File "D:\guaduatepaper\Remote_sensing_image_classification\pytorch_classification\Test5_resnet\improvedexperiment\addCenterloss\atmain.py", line 221, in <module>
train(epoch)
File "D:\guaduatepaper\Remote_sensing_image_classification\pytorch_classification\Test5_resnet\improvedexperiment\addCenterloss\atmain.py", line 185, in train
features, outputs = model1(input)
File "C:\Users\60234\anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "D:\guaduatepaper\Remote_sensing_image_classification\pytorch_classification\Test5_resnet\improvedexperiment\addCenterloss\atmain.py", line 168, in forward
x = self.fc2(x)
File "C:\Users\60234\anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\60234\anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (16x1 and 512x23)
请各路好汉帮忙修正我的代码,让这个模型能正常跑通。
以下答案由GPT-3.5大模型与博主波罗歌共同编写:
根据代码和错误提示,矩阵乘法的问题出在了self.fc2(x)
这一层,具体来说是由于x
的纬度不匹配导致无法进行矩阵乘法。根据代码可知,x
的形状是(batch_size, 512)
,而self.fc2
的形状是(512, 23)
,即需要将x
的纬度从2变为3,使其能够与self.fc2
进行矩阵乘法。
根据上面的问题,需要在Attention模块里将输出的纬度改变从而与self.fc1
连接的输出通道数相匹配。具体就是将Attention
模块的代码修改如下:
class Attention(nn.Module):
def __init__(self, in_channels=512, out_channels=512, kernel_size=3):
super(Attention, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.conv1 = nn.Conv2d(in_channels, in_channels, kernel_size, padding=(kernel_size - 1) // 2)
self.bn1 = nn.BatchNorm2d(in_channels)
self.conv2 = nn.Conv2d(in_channels, in_channels, kernel_size, padding=(kernel_size - 1) // 2)
self.bn2 = nn.BatchNorm2d(in_channels)
self.conv3 = nn.Conv2d(in_channels, in_channels, kernel_size, padding=(kernel_size - 1) // 2)
self.bn3 = nn.BatchNorm2d(in_channels)
self.conv4 = nn.Conv2d(in_channels, out_channels, kernel_size, padding=(kernel_size - 1) // 2) # 这里进行了修改
self.bn4 = nn.BatchNorm2d(out_channels)
self.fc = nn.Linear(out_channels, out_channels)
def forward(self, x):
h = F.relu(self.bn1(self.conv1(x)))
h = F.relu(self.bn2(self.conv2(h)))
h = F.relu(self.bn3(self.conv3(h)))
h = F.relu(self.bn4(self.conv4(h))) # 修改后的代码
h = h.view(h.size(0), h.size(1))
h = self.fc(h)
r = F.softmax(h, dim=1)
r = r.view(r.size(0), r.size(1), 1, 1)
a = (r * x).sum(dim=1)
return a
这样就可以保证self.fc2
输出的纬度与x
匹配了。
如果我的回答解决了您的问题,请采纳!
个人理解,供你参考:
首先,你需要检查下,下面这三个维度:
1、检查输入层的维度:确保在创建输入层时使用了相同的维度(通常为 N*d)。
2、检查权重的维度:确保在创建注意力机制时使用了相同的维度。
3、检查注意力机制的维度:确保在创建注意力机制时使用了相同的维度。
其次,通过下面的思路去检查:
1、数据集不合适:确保数据集包含足够的输入和输出样本,如果没有足够的输入和输出样本,那么通过随机过采样或欠采样来增加数据量
2、网络结构不合适:网络结构与任务不匹配,缺乏残差项,需要重新设计网络以包含更多的残差项。
3、模型不正确:模型在训练时使用了不正确的权重,或者没有正确定义损失函数,那么需要对模型进行调整。
4、超参数设置不正确:例如:学习率、批次大小、正则化系数。如果超参数设置不正确,那么模型无法收敛或达到最佳性能。
#进行迭代求解
theta0,theta1,theta2=gradient_descent_runner(x_data,y_data,theta0,theta1,theta2,lr,epochs)
print('结果:迭代次数:{0} 学习率:{1}之后 a0={2},a1={3},a2={4},代价函数为{5}'.format(epochs,lr,theta0,theta1,theta2,compute_error(theta0,theta1,theta2,x_data,y_data)))
print("多元线性回归方程为:y=",theta1,"X1+",theta2,"X2+",theta0)
引用chatGPT作答,在 my_Module 类中,需要修改 self.fc1 的输入维度。根据代码中的 ResNet50 的输出特征维度为 2048,加上注意力机制后的特征维度为 512,所以在 my_Module 类中,需要将 self.fc1 的输入维度设置为 2048 + 512 = 2560。
具体来说,将以下行
self.fc1 = nn.Linear(2048, 6)
修改为
self.fc1 = nn.Linear(2560, 6)