from matplotlib import pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GatedGraphConv, global_add_pool
train_x = np.load("C:/Users/97233/Desktop/Baseline-with-HAR-datasets-main/Pre-processing/train_x.npy")
test_x = np.load("C:/Users/97233/Desktop/Baseline-with-HAR-datasets-main/Pre-processing/test_x.npy")
train_y = np.load("C:/Users/97233/Desktop/Baseline-with-HAR-datasets-main/Pre-processing/train_y.npy")
test_y = np.load("C:/Users/97233/Desktop/Baseline-with-HAR-datasets-main/Pre-processing/test_y.npy")
print(train_x.shape)
train_x = torch.tensor(train_x).float()
train_y = torch.tensor(train_y).long()
test_x = torch.tensor(test_x).float()
test_y = torch.tensor(test_y).long()
train_dataset = TensorDataset(train_x, train_y)
test_dataset = TensorDataset(test_x, test_y)
BATCH_SIZE = 128
EPOCHS = 50
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, drop_last=True)
class GNNClassifier(nn.Module):
def __init__(self, input_size=128, hidden_size=32, num_classes=7):
super(GNNClassifier, self).__init__()
self.conv1 = GCNConv(input_size, hidden_size)
self.conv2 = GatedGraphConv(hidden_size, hidden_size)
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, x, edge_index, batch):
x = F.relu(self.conv1(x, edge_index))
x = F.dropout(x, training=self.training)
x = F.relu(self.conv2(x, edge_index))
x = global_add_pool(x, batch)
x = self.fc(x)
return x
# 配置模型和优化器
model = GNNClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# 训练模型
train_losses = []
test_losses = []
test_accuracies = []
edge_index = torch.tensor([[0, 1, 2], [1, 2, 3]], dtype=torch.long)
num_nodes = len(train_x)
batch_size = 128
num_batches = num_nodes // batch_size
batch = torch.tensor([i for i in range(num_nodes)], dtype=torch.long)
#batch = (batch // batch_size).tolist()
batch = (torch.div(batch, batch_size, rounding_mode='floor')).tolist()
batch = torch.tensor(batch, dtype=torch.long)
for epoch in range(EPOCHS):
# Training
model.train()
train_loss = 0
for batch_idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
data = data.transpose(1, 2) # 调整数据维度,使之符合GNN的输入格式
output = model(data, edge_index, batch) # edge_index和batch表示图的边和节点信息
#output = model(data, edge_index, batch) # edge_index should be specific to each instance
loss = criterion(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()
train_loss /= len(train_loader)
train_losses.append(train_loss)
# Testing
model.eval()
test_loss = 0
correct = 0
total = 0
with torch.no_grad():
for data, target in test_loader:
data = data.transpose(1, 2) # 调整数据维度,使之符合GNN的输入格式
output = model(data, edge_index, batch)
loss = criterion(output, target)
test_loss += loss.item()
_, predicted = torch.max(output.data, 1)
total += target.size(0)
correct += (predicted == target).sum().item()
test_loss /= len(test_loader)
test_losses.append(test_loss)
test_accuracy = 100 * correct / total
test_accuracies.append(test_accuracy)
print(f'Epoch {epoch + 1}/{EPOCHS}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, TestAccuracy: {test_accuracy:.2f}%')
plt.figure(figsize=(10, 5))
plt.plot(test_accuracies, label='Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
Traceback (most recent call last):
File "C:\Users\97233\Desktop\code\GN.py", line 70, in <module>
output = model(data, edge_index, batch) # edge_index和batch表示图的边和节点信息
File "D:\anaconda\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\97233\Desktop\code\GN.py", line 37, in forward
x = F.relu(self.conv1(x, edge_index))
File "D:\anaconda\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "D:\anaconda\lib\site-packages\torch_geometric\nn\conv\gcn_conv.py", line 229, in forward
x = self.lin(x)
File "D:\anaconda\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "D:\anaconda\lib\site-packages\torch_geometric\nn\dense\linear.py", line 132, in forward
return F.linear(x, self.weight, self.bias)
RuntimeError: Expected size for first two dimensions of batch2 tensor to be: [128, 9] but got: [128, 128].
出现这个报错信息是为什么,要怎么改
引用chatGPT作答,这个错误信息是由于 edge_index 的维度与输入数据 x 的维度不兼容造成的。根据报错信息可以看到,edge_index 的大小是 [2, 3],而 x 的大小是 [batch_size, num_channels, num_nodes]。因此,在调用 model(data, edge_index, batch) 时,edge_index 的维度不匹配,会导致这个错误。
为了解决这个问题,你需要根据数据的特定形状构建一个正确的 edge_index 张量,使其与 x 具有兼容的维度。在这里,edge_index 表示图中的边信息,因此它应该是一个大小为 [2, num_edges] 的长整型张量,其中 num_edges 是图中的边数。为了创建正确的 edge_index 张量,你需要先计算 num_edges,然后使用这个值来创建一个大小为 [2, num_edges] 的张量,并为其填充正确的边信息。
要注意的是,在这个实现中,你需要手动指定边的信息,即哪些节点之间有边。这意味着你需要先根据数据构建图形,然后根据图形计算 edge_index。要构建图形,可以使用库中提供的方法,例如 from_networkx 或 from_scipy_sparse_matrix,这些方法可以将常见的图形表示转换为 PyTorch Geometric 中的表示。然后,你可以使用库中提供的方法来计算 edge_index,例如 from_dense_adjacency_matrix 或 from_scipy_sparse_matrix。
from matplotlib import pyplot as plt
from torch.utils.data import DataLoader
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data, DataLoader as GeoDataLoader
from torch_geometric.nn import GCNConv, GatedGraphConv, global_add_pool
train_x = np.load("C:/Users/97233/Desktop/Baseline-with-HAR-datasets-main/Pre-processing/train_x.npy")
test_x = np.load("C:/Users/97233/Desktop/Baseline-with-HAR-datasets-main/Pre-processing/test_x.npy")
train_y = np.load("C:/Users/97233/Desktop/Baseline-with-HAR-datasets-main/Pre-processing/train_y.npy")
test_y = np.load("C:/Users/97233/Desktop/Baseline-with-HAR-datasets-main/Pre-processing/test_y.npy")
# 将输入数据转化为图数据
train_data_list = []
for i in range(train_x.shape[0]):
x = train_x[i].T # 转置,使得特征矩阵的 shape 变成 (num_nodes, input_size)
edge_index = torch.tensor([[0, 1, 2, 3], [1, 2, 3, 4]])
# 这里先默认每张图都是一个完全图,即任意两个节点之间都有边连接
label = torch.tensor([train_y[i]], dtype=torch.long)
train_data_list.append(Data(x=x, edge_index=edge_index, y=label))
test_data_list = []
for i in range(test_x.shape[0]):
x = test_x[i].T
edge_index = torch.tensor([[0, 1, 2, 3], [1, 2, 3, 4]])
label = torch.tensor([test_y[i]], dtype=torch.long)
test_data_list.append(Data(x=x, edge_index=edge_index, y=label))
train_loader = GeoDataLoader(train_data_list, batch_size=128, shuffle=True)
test_loader = GeoDataLoader(test_data_list, batch_size=128, shuffle=False)
class GNNClassifier(nn.Module):
def __init__(self, input_size=128, hidden_size=32, num_classes=7):
super(GNNClassifier, self).__init__()
self.conv1 = GCNConv(input_size, hidden_size)
self.conv2 = GatedGraphConv(hidden_size, hidden_size)
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, data):
x, edge_index, batch = data.x, data.edge_index, data.batch
x = F.relu(self.conv1(x, edge_index.t().contiguous())) # 注意这里需要转置边索引矩阵,并调用 contiguous() 确保内存连续
x = F.dropout(x, training=self.training)
x = F.relu(self.conv2(x, edge_index.t().contiguous()))
x = global_add_pool(x, batch)
x = self.fc(x)
return x
# 配置模型和优化器
model = GNNClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# 训练模型
train_losses = []
test_losses = []
test_accuracies = []
for epoch in range(50):
# Training
model.train()
train_loss = 0
for batch_idx, data in enumerate(train_loader):
optimizer.zero_grad()
output = model(data)
loss = criterion(output, data.y)
loss.backward()
optimizer.step()
train_loss += loss.item() * data.num_graphs # 这里需要乘上当前 batch 中包含的图数量
train_loss /= len(train_data_list)
train_losses.append(train_loss)
# Testing
model.eval()
test_loss = 0
correct = 0
total = 0
with torch.no_grad():
for data in test_loader:
output = model(data)
loss = criterion(output, data.y)
test_loss += loss.item() * data.num_graphs
_, predicted = torch.max(output.data, 1)
total += data.num_graphs
correct += (predicted == data.y).sum().item()
test_loss /= len(test_data_list)
test_losses.append(test_loss)
test_accuracy = 100 * correct / total
test_accuracies.append(test_accuracy)
# 绘制学习曲线
fig, ax = plt.subplots(1, 2, figsize=(12, 5))
ax[0].plot(train_losses, label="Train")
ax[0].plot(test_losses, label="Test")
ax[0].set_xlabel("Epoch")
ax[0].set_ylabel("Loss")
ax[0].legend()
ax[1].plot(test_accuracies)
ax[1].set_xlabel("Epoch")
ax[1].set_ylabel("Accuracy")
plt.show()
4567都是为了解决3而提出的技术问题,这些数据无需标注,都是用自监督(模型直接从无标签数据中自行学习,无需标注数据)的方式解决问题。