关于图神经网络的问题


from matplotlib import pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GatedGraphConv, global_add_pool

train_x = np.load("C:/Users/97233/Desktop/Baseline-with-HAR-datasets-main/Pre-processing/train_x.npy")
test_x = np.load("C:/Users/97233/Desktop/Baseline-with-HAR-datasets-main/Pre-processing/test_x.npy")
train_y = np.load("C:/Users/97233/Desktop/Baseline-with-HAR-datasets-main/Pre-processing/train_y.npy")
test_y = np.load("C:/Users/97233/Desktop/Baseline-with-HAR-datasets-main/Pre-processing/test_y.npy")
print(train_x.shape)
train_x = torch.tensor(train_x).float()
train_y = torch.tensor(train_y).long()
test_x = torch.tensor(test_x).float()
test_y = torch.tensor(test_y).long()

train_dataset = TensorDataset(train_x, train_y)
test_dataset = TensorDataset(test_x, test_y)

BATCH_SIZE = 128
EPOCHS = 50

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, drop_last=True)

class GNNClassifier(nn.Module):
    def __init__(self, input_size=128, hidden_size=32, num_classes=7):
        super(GNNClassifier, self).__init__()

        self.conv1 = GCNConv(input_size, hidden_size)
        self.conv2 = GatedGraphConv(hidden_size, hidden_size)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x, edge_index, batch):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = global_add_pool(x, batch)
        x = self.fc(x)
        return x

# 配置模型和优化器
model = GNNClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 训练模型
train_losses = []
test_losses = []
test_accuracies = []
edge_index = torch.tensor([[0, 1, 2], [1, 2, 3]], dtype=torch.long)
num_nodes = len(train_x)
batch_size = 128
num_batches = num_nodes // batch_size
batch = torch.tensor([i for i in range(num_nodes)], dtype=torch.long)
#batch = (batch // batch_size).tolist()
batch = (torch.div(batch, batch_size, rounding_mode='floor')).tolist()

batch = torch.tensor(batch, dtype=torch.long)

for epoch in range(EPOCHS):
    # Training
    model.train()
    train_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        data = data.transpose(1, 2)  # 调整数据维度，使之符合GNN的输入格式
        output = model(data, edge_index, batch)  # edge_index和batch表示图的边和节点信息
        #output = model(data, edge_index, batch)  # edge_index should be specific to each instance

        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    train_losses.append(train_loss)

    # Testing
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data = data.transpose(1, 2)  # 调整数据维度，使之符合GNN的输入格式
            output = model(data, edge_index, batch)
            loss = criterion(output, target)
            test_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    test_loss /= len(test_loader)
    test_losses.append(test_loss)
    test_accuracy = 100 * correct / total
    test_accuracies.append(test_accuracy)
    print(f'Epoch {epoch + 1}/{EPOCHS}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, TestAccuracy: {test_accuracy:.2f}%')

plt.figure(figsize=(10, 5))
plt.plot(test_accuracies, label='Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

Traceback (most recent call last):
  File "C:\Users\97233\Desktop\code\GN.py", line 70, in <module>
    output = model(data, edge_index, batch)  # edge_index和batch表示图的边和节点信息
  File "D:\anaconda\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
    return forward_call(*input, **kwargs)
  File "C:\Users\97233\Desktop\code\GN.py", line 37, in forward
    x = F.relu(self.conv1(x, edge_index))
  File "D:\anaconda\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
    return forward_call(*input, **kwargs)
  File "D:\anaconda\lib\site-packages\torch_geometric\nn\conv\gcn_conv.py", line 229, in forward
    x = self.lin(x)
  File "D:\anaconda\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
    return forward_call(*input, **kwargs)
  File "D:\anaconda\lib\site-packages\torch_geometric\nn\dense\linear.py", line 132, in forward
    return F.linear(x, self.weight, self.bias)
RuntimeError: Expected size for first two dimensions of batch2 tensor to be: [128, 9] but got: [128, 128].

出现这个报错信息是为什么，要怎么改

引用chatGPT作答，这个错误信息是由于 edge_index 的维度与输入数据 x 的维度不兼容造成的。根据报错信息可以看到，edge_index 的大小是 [2, 3]，而 x 的大小是 [batch_size, num_channels, num_nodes]。因此，在调用 model(data, edge_index, batch) 时，edge_index 的维度不匹配，会导致这个错误。

为了解决这个问题，你需要根据数据的特定形状构建一个正确的 edge_index 张量，使其与 x 具有兼容的维度。在这里，edge_index 表示图中的边信息，因此它应该是一个大小为 [2, num_edges] 的长整型张量，其中 num_edges 是图中的边数。为了创建正确的 edge_index 张量，你需要先计算 num_edges，然后使用这个值来创建一个大小为 [2, num_edges] 的张量，并为其填充正确的边信息。

要注意的是，在这个实现中，你需要手动指定边的信息，即哪些节点之间有边。这意味着你需要先根据数据构建图形，然后根据图形计算 edge_index。要构建图形，可以使用库中提供的方法，例如 from_networkx 或 from_scipy_sparse_matrix，这些方法可以将常见的图形表示转换为 PyTorch Geometric 中的表示。然后，你可以使用库中提供的方法来计算 edge_index，例如 from_dense_adjacency_matrix 或 from_scipy_sparse_matrix。


from matplotlib import pyplot as plt
from torch.utils.data import DataLoader
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data, DataLoader as GeoDataLoader
from torch_geometric.nn import GCNConv, GatedGraphConv, global_add_pool

train_x = np.load("C:/Users/97233/Desktop/Baseline-with-HAR-datasets-main/Pre-processing/train_x.npy")
test_x = np.load("C:/Users/97233/Desktop/Baseline-with-HAR-datasets-main/Pre-processing/test_x.npy")
train_y = np.load("C:/Users/97233/Desktop/Baseline-with-HAR-datasets-main/Pre-processing/train_y.npy")
test_y = np.load("C:/Users/97233/Desktop/Baseline-with-HAR-datasets-main/Pre-processing/test_y.npy")

# 将输入数据转化为图数据
train_data_list = []
for i in range(train_x.shape[0]):
    x = train_x[i].T  # 转置，使得特征矩阵的 shape 变成 (num_nodes, input_size)
    edge_index = torch.tensor([[0, 1, 2, 3], [1, 2, 3, 4]])
    # 这里先默认每张图都是一个完全图，即任意两个节点之间都有边连接
    label = torch.tensor([train_y[i]], dtype=torch.long)
    train_data_list.append(Data(x=x, edge_index=edge_index, y=label))

test_data_list = []
for i in range(test_x.shape[0]):
    x = test_x[i].T
    edge_index = torch.tensor([[0, 1, 2, 3], [1, 2, 3, 4]])
    label = torch.tensor([test_y[i]], dtype=torch.long)
    test_data_list.append(Data(x=x, edge_index=edge_index, y=label))

train_loader = GeoDataLoader(train_data_list, batch_size=128, shuffle=True)
test_loader = GeoDataLoader(test_data_list, batch_size=128, shuffle=False)

class GNNClassifier(nn.Module):
    def __init__(self, input_size=128, hidden_size=32, num_classes=7):
        super(GNNClassifier, self).__init__()
 
        self.conv1 = GCNConv(input_size, hidden_size)
        self.conv2 = GatedGraphConv(hidden_size, hidden_size)
        self.fc = nn.Linear(hidden_size, num_classes)
 
    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = F.relu(self.conv1(x, edge_index.t().contiguous()))  # 注意这里需要转置边索引矩阵，并调用 contiguous() 确保内存连续
        x = F.dropout(x, training=self.training)
        x = F.relu(self.conv2(x, edge_index.t().contiguous()))
        x = global_add_pool(x, batch)
        x = self.fc(x)
        return x
 
# 配置模型和优化器
model = GNNClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 训练模型
train_losses = []
test_losses = []
test_accuracies = []

for epoch in range(50):
    # Training
    model.train()
    train_loss = 0
    for batch_idx, data in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, data.y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * data.num_graphs  # 这里需要乘上当前 batch 中包含的图数量
    train_loss /= len(train_data_list)
    train_losses.append(train_loss)
 
    # Testing
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            output = model(data)
            loss = criterion(output, data.y)
            test_loss += loss.item() * data.num_graphs
            _, predicted = torch.max(output.data, 1)
            total += data.num_graphs
            correct += (predicted == data.y).sum().item()
    test_loss /= len(test_data_list)
    test_losses.append(test_loss)
    test_accuracy = 100 * correct / total
    test_accuracies.append(test_accuracy)

# 绘制学习曲线
fig, ax = plt.subplots(1, 2, figsize=(12, 5))

ax[0].plot(train_losses, label="Train")
ax[0].plot(test_losses, label="Test")
ax[0].set_xlabel("Epoch")
ax[0].set_ylabel("Loss")
ax[0].legend()

ax[1].plot(test_accuracies)
ax[1].set_xlabel("Epoch")
ax[1].set_ylabel("Accuracy")

plt.show()

你可以看下这个问题的回答https://ask.csdn.net/questions/337355
这篇博客你也可以参考下：卷积神经网络的卷积核是怎么卷积的，特别适合刚入门的理解
除此之外, 这篇博客: 关于通过图注意神经网络处理多元时间序列的错误预测论文的讨论中的 总体思路 部分也许能够解决你的问题, 你可以仔细阅读以下内容或跳转源博客中阅读:
1. 用户提供装在该工业设备上传感器的数据
2. 数据是传感器以时间序列为一个维度采集到的信息
3. 用户要求解决工业设备异常的情况（基于传感器提供的数据）
4. 建立两个图注意力的网络层
5. 面向特征的图注意力层：捕捉多个特征之间的因果关系
6. 面向时间的图注意力层：强调时间维度上的数据依赖关系
7. 还联合训练了一个基于预测的模型和基于重建的模型
8. 解决异常的问题
4567都是为了解决3而提出的技术问题，这些数据无需标注，都是用自监督（模型直接从无标签数据中自行学习，无需标注数据）的方式解决问题。
您还可以看一下七月在线老师的图神经网络课程中的图网络概览与环境配置小节, 巩固相关知识点