如何解决XLnet分类存在的问题?

使用XLnet进行文本分类,出现了下面的问题:

def get_inputs(content, tokenizer, max_len=120):
    """ Gets tensors from text using the tokenizer provided"""
    inps = [tokenizer.encode_plus(t, max_length=maxlen, pad_to_max_length=False, add_special_tokens=True) for t in content]
    inp_tok = np.array([a['input_ids'] for a in inps])
    ids = np.array([a['attention_mask'] for a in inps])
    segments = np.array([a['token_type_ids'] for a in inps])
    return inps, inp_tok, ids, segments

def warmup(epoch, lr):
    """Used for increasing the learning rate slowly, this tends to achieve better convergence.
    However, as we are finetuning for few epoch it's not crucial.
    """
    return max(lr +1e-6, 2e-5)

def plot_metrics(pred, true_labels):
    """Plots a ROC curve with the accuracy and the AUC"""
    acc = accuracy_score(true_labels, np.array(pred.flatten() >= .5, dtype='int'))
    fpr, tpr, thresholds = roc_curve(true_labels, pred)
    auc = roc_auc_score(true_labels, pred)

    fig, ax = plt.subplots(1, figsize=(8,8))
    ax.plot(fpr, tpr, color='red')
    ax.plot([0,1], [0,1], color='black', linestyle='--')
    ax.set_title(f"AUC: {auc}\nACC: {acc}");
    return fig

inps, inp_tok, ids, segments = get_inputs(x_train_text, xlnet_tokenizer)
AttributeError                            Traceback (most recent call last)
/tmp/ipykernel_18279/2457827602.py in <module>
----> 1 inps, inp_tok, ids, segments = get_inputs(x_train_text, xlnet_tokenizer)

/tmp/ipykernel_18279/3374919276.py in get_inputs(content, tokenizer, max_len)
      1 def get_inputs(content, tokenizer, max_len=120):
      2     """ Gets tensors from text using the tokenizer provided"""
----> 3     inps = [tokenizer.encode_plus(t, max_length=max_len, pad_to_max_length=False, add_special_tokens=True) for t in content]
      4     inp_tok = np.array([a['input_ids'] for a in inps])
      5     ids = np.array([a['attention_mask'] for a in inps])

/tmp/ipykernel_18279/3374919276.py in <listcomp>(.0)
      1 def get_inputs(content, tokenizer, max_len=120):
      2     """ Gets tensors from text using the tokenizer provided"""
----> 3     inps = [tokenizer.encode_plus(t, max_length=max_len, pad_to_max_length=False, add_special_tokens=True) for t in content]
      4     inp_tok = np.array([a['input_ids'] for a in inps])
      5     ids = np.array([a['attention_mask'] for a in inps])

AttributeError: 'NoneType' object has no attribute 'encode_plus'

检查一下传入的tokenizer参数,导致抛出None值无encode_plus属性错误。

检查下数据是不是空的