# 数据集类
class MyDataset(paddle.io.Dataset):
"""
步骤一:继承paddle.io.Dataset类
"""
def __init__(self, data, num_features=10, num_labels=1):
"""
步骤二:实现构造函数,定义数据集大小
data: numpy.Array 1维数组
"""
super(MyDataset, self).__init__()
self.data = data
self.num_features = num_features
self.num_labels = num_labels
x = []
y = []
for i in range(0, len(data) - num_features - num_labels + 1):
x.append(data[i:i+num_features])
y.append(data[i+num_features:i+num_features+num_labels])
print('x',x)
print('y',y)
self.x = np.vstack(x).reshape(-1, self.num_features, 1)
self.y = np.vstack(y)
self.x = np.array(self.x, dtype="float32")
self.y = np.array(self.y, dtype="float32")
self.num_samples = len(x)
def __getitem__(self, index):
"""
步骤三:实现__getitem__方法,定义指定index时如何获取数据,并返回单条数据(训练数据,对应的标签)
"""
data = self.x[index]
label = self.y[index]
return data, label
def __len__(self):
"""
步骤四:实现__len__方法,返回数据集总数目
"""
return self.num_samples
train_dataset = MyDataset(train_data, num_features=10)
test_dataset = MyDataset(test_data, num_features=10)
print(train_dataset.__getitem__(0)[0].shape)
print(train_dataset.__getitem__(0)[1].shape)
item就是一个索引,在__getitem__()内使用的时候是随机索引,python的底层会随机分配索引,在函数外面,我们可以指定索引
就像是最下面的代码块A、B部分
A代码
def __init__(self, dataset, idxs):
self.dataset = dataset
self.idxs = [int(i) for i in idxs]
def __len__(self):
return len(self.idxs)
def __getitem__(self, item):
image, label = self.dataset[self.idxs[item]]
print('\r\n')
print(item)
print(self.idxs)
print(self.idxs[item])
print(len(self.dataset))
for k,v in self.dataset:
print(self.dataset[35524])
print(self.dataset[self.idxs[item]])
a = input('请输入a=0 or a=1: \t')
a = input('请输入a=0 or a=1: \t')
if a ==1:
print('执行')
return torch.tensor(image), torch.tensor(label)
B代码
class Student(object):
def __init__(self, user_dic):
self.value = user_dic
def __getitem__(self, item):
print('__getitem__', item)
return self.value[item]
def __setitem__(self, key, value):
print('__setitem__', key, value)
self.value[key] = value
def __delitem__(self, key):
print('__delitem__', key)
del self.value[key]
def __len__(self):
return len(self.value)
这两句概括起来就是利用自己数据集来训练神经网络pytorch,重写Dataset类
【自我理解,仅供参考】
另外,提供一个参考学习链接,期望对你的有所帮助:https://blog.csdn.net/weixin_44911037/article/details/123202869
【里面以实例清晰讲解说明,利于理解】