from torchtext import data
mytokenize=lambda x:x.split()
TEXT=torchtext.data.Field(
sequential=True,
tokenize=mytokenize,
use_vocab=True,
batch_first=True,
fix_length=200
)
LABEL=torchtext.data.Field(
sequential=False,
use_vocab=False,
pad_token=None,
unk_token=None
)
text_data_fields=[
("label",LABEL),
("text",TEXT)
]
traindata,testdata=data.TabularDataset.splits(
path="D:/textdata",format="csv",
train="train.csv",fields=text_data_fields,
test="test.csv",skip_header=True
)
len(traindata),len(testdata)
请看👉 :使用TorchText处理我们自己的数据集