import os
import os.path as osp
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
np.set_printoptions(threshold=1000000)
pd.set_option('display.max_columns', 1000000)
#显示所有行
pd.set_option('display.max_rows', 1000000)
#设置value的显示长度为100,默认为50
pd.set_option('max_colwidth',10000)
pd.set_option('display.width',10000)
##加载数据集
folder="node_classify/cora"
data_name="cora"
dataset = Planetoid(root=folder, name=data_name,transform=T.NormalizeFeatures())
data = dataset[0]
print("data",data)
###测试####测试特征归一化 data.x已经归一化
x = data.x
print(x[0])
#print(torch.nonzero(x[1]))
print(torch.nonzero(x[0]))
print(torch.nonzero(x[1]))
print(torch.nonzero(x[0]).shape)
print(torch.nonzero(x[1]).shape)
print(x[0,19])
print(x[1,19])
print(x[0,81])
print(x[1,88])
print("dataset.num_features:", data)
print("dataset.num_features:", dataset.num_features)
print("dataset.num_features:", dataset.num_features)
print("dataset.num_classes:", dataset.num_classes)
print("data.x.shape:", data.x.shape)
print("data.edge_index.shap:", data.edge_index.shape)
##edge_index 代表边([2, 10556]),有边的节点对
x, edge_index = data.x, data.edge_index
print("edge_index.shape:", edge_index.shape)
##Edge feature matrix with shape [num_edges, num_edge_features]
print("data.edge_attr:", data.edge_attr)
print("data.y:", data.y)
print("data.y[data.train_mask]", data.y[data.train_mask])
##有向图还是无向图
print("data.is_undirected:",data.is_undirected())
###train_mask denotes against which nodes to train (140 nodes)
##val_mask denotes which nodes to use for validation, e.g., to perform early stopping (500 nodes)
##test_mask denotes against which nodes to test (1000 nodes)
print(data.train_mask.sum().item())
print(data.val_mask.sum().item())
print(data.test_mask.sum().item())
print("data.train_mask:", data.train_mask)
class GCNConv(MessagePassing):
def init(self, in_channels, out_channels):
super(GCNConv, self).__init__(aggr='add') # "Add" aggregation.
self.lin = torch.nn.Linear(in_channels, out_channels)
def forward(self, x, edge_index):
# x has shape [N, in_channels=1433]
# edge_index has shape [2, E]
# Step 1: Add self-loops to the adjacency matrix.
## x.size(0)=2708
edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))
##print("x = self.lin(x)***:",x,x.shape)
##2708*1433
##print(x)
# Step 2: Linearly transform node feature matrix.2708*1433
x = self.lin(x)
##2708*16
##print("x = self.lin(x):",x,x.shape)
# Step 3-5: Start propagating messages.
return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x)
def message(self, x_j, edge_index, size):
# x_j has shape [E, out_channels]
# Step 3: Normalize node features.
row, col = edge_index
# 计算每个结点的度
deg = degree(row, size[0], dtype=x_j.dtype)
deg_inv_sqrt = deg.pow(-0.5)
##
norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
##1*(10556+2708=13264) * (10556+2708=13264)*out_channels = 1*out_channels
return norm.view(-1, 1) * x_j
def update(self, aggr_out):
# aggr_out has shape [N=2708, out_channels]
# Step 5: Return new node embeddings.
return aggr_out
class Net(torch.nn.Module):
# torch.nn.Module 是所有神经网络单元的基类
def init(self):
super(Net, self).__init__() ###复制并使用Net的父类的初始化方法,即先运行nn.Module的初始化函数
self.conv1 = GCNConv(dataset.num_node_features, 16)
self.conv2 = GCNConv(16, dataset.num_classes)
def forward(self):
x, edge_index = data.x, data.edge_index
x = self.conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training=self.training)
x = self.conv2(x, edge_index)
return F.log_softmax(x, dim=1)
##############################设置GPU、定义优化器#############################
device = torch.device('cpu')
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
##############################定义训练函数#############################
def train():
model.train()
# 在反向传播之前,先将梯度归0
optimizer.zero_grad()
# 将误差反向传播
F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
# 更新参数
optimizer.step()
##############################定义测试函数#############################
def test():
model.eval()
logits, accs = model(), []
for _, mask in data('train_mask', 'val_mask', 'test_mask'):
pred = logits[mask].max(1)[1]
acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
accs.append(acc)
return accs
##############################训练并测试函数#############################
best_val_acc = test_acc = 0
for epoch in range(1, 201):
train()
train_acc, val_acc, tmp_test_acc = test()
if val_acc > best_val_acc:
best_val_acc = val_acc
test_acc = tmp_test_acc
# 打印有哪些参与训练的参数
for name, param in model.named_parameters():
if param.requires_grad:
print(name)
log = 'Epoch: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}'
print(log.format(epoch, train_acc, best_val_acc, test_acc))
代码是你从别的地方搞来的么?看看是不是你的数据集的问题。看你报错的行,缺少叫做key的参数
请问问题解决了吗
我也有这个问题
请问这个问题,您是怎么解决的?