0基础代码,一个下午也没有解决,问chatGPT也不行,实在想不出,所以来提问,礼貌想问问一直出现这样的报错是为什么?(基于pytorch的BERT文本分类中):
Traceback (most recent call last):
File "E:\pycharm\pycharm projects\BERT-one\run.py", line 37, in
train(config, model, train_iter, dev_iter, test_iter)
TypeError: train() missing 1 required positional argument: 'test_iter'
百度搜素说是没有定义test_iter,我把test_iter的代码贴在下方:
train_data, dev_data, test_data = build_dataset(config)
train_iter = build_iterator(train_data, config)
dev_iter = build_iterator(dev_data, config)
test_iter = build_iterator(test_data, config)
再把build_dataset的代码贴在下方:
def build_dataset(config):
def load_dataset(path, pad_size=50): # 下载数据集?
contents = []
with open(path, 'r', encoding='UTF-8') as f:
for line in tqdm(f):
lin = line.strip()
if not lin: # 跳过空行
continue
if len(lin.split(' ')) == 2:
content, label = lin.split(' ')
content = re.sub("\d+ ", "NUM", content)
content = re.sub(r"
", '', content)
content = re.sub(r'[\u0000-\u0019\u0021-\u0040\u007a-\uffff]', '', content) # 去掉非空格和非字母
token = config.tokenizer.tokenize(content)
token = [CLS] + token
seq_len = len(token)
mask = []
token_ids = config.tokenizer.convert_tokens_to_ids(token)
pad_size = config.pad_size
if pad_size: # 将长的截断,短的补齐
if len(token) < pad_size:
mask = [1] * len(token_ids) + [0] * (pad_size - len(token))
token_ids += ([0] * (pad_size - len(token)))
else:
mask = [1] * pad_size
token_ids = token_ids[:pad_size]
seq_len = pad_size
contents.append((token_ids, int(label), seq_len, mask))
return contents
train = load_dataset(config.train_path, config.pad_size)
dev = load_dataset(config.dev_path, config.pad_size)
test = load_dataset(config.test_path, config.pad_size)
return train, dev, test
以及build_interator的代码:
def build_iterator(dataset, config):
iter = DatasetIterater(dataset, config.batch_size, config.device)
return iter
class DatasetIterater(object):
# 定义输入
def __init__(self, batches, batch_size, device):
self.batch_size = batch_size
self.batches = batches
self.n_batches = len(batches) // batch_size
self.residue = False # 记录batch数量是否为整数
if len(batches) % self.n_batches != 0:
self.residue = True
self.index = 0
self.device = device
def _to_tensor(self, datas):
x = torch.LongTensor([_[0] for _ in datas]).to(self.device)
y = torch.LongTensor([_[1] for _ in datas]).to(self.device)
# pad前的长度(超过pad_size的设为pad_size)
seq_len = torch.LongTensor([_[2] for _ in datas]).to(self.device)
mask = torch.LongTensor([_[3] for _ in datas]).to(self.device)
return (x, seq_len, mask), y
def __next__(self):
if self.residue and self.index == self.n_batches:
batches = self.batches[self.index * self.batch_size: len(self.batches)]
self.index += 1
batches = self._to_tensor(batches)
return batches
elif self.index >= self.n_batches:
self.index = 0
raise StopIteration
else:
batches = self.batches[self.index * self.batch_size: (self.index + 1) * self.batch_size]
self.index += 1
batches = self._to_tensor(batches)
return batches
def __iter__(self):
return self
def __len__(self):
if self.residue:
return self.n_batches + 1
else:
return self.n_batches
万分感谢各位解答,麻烦大家了!还有需要提供的信息我可以贴出来。真的感谢!
参考GPT和自己的思路:
根据代码的提示,确实是缺少了一个参数'test_iter'。根据代码,train()函数应该需要四个参数,分别是config、model、train_iter、dev_iter和test_iter。在你的代码中,你缺少了传入train()函数的test_iter参数。具体来说,你在调用train()函数时,代码应该是这样的:
train(config, model, train_iter, dev_iter, test_iter)
检查一下,看看在train()函数中是否确实需要这样的五个参数。如果是,确认一下代码中是否正确地设置了这些参数。如果还有问题,你可以将train()函数的完整代码也一并贴出来。