Python贝叶斯分类问题

问题遇到的现象和发生背景

补充代码,初始化高斯贝叶斯模型,并训练模型,测试其在分类上的性能。调节模型参数,使邮件分类性能不低于70%。

用代码块功能插入代码,请勿粘贴截图
import os
import numpy as np
from collections import Counter
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

def make_Dictionary(root_dir):
    all_words = []
    emails = [os.path.join(root_dir,f) for f in os.listdir(root_dir)]
    for mail in emails:
        with open(mail) as m:
            for line in m:
                words = line.split()
                all_words += words
    dictionary = Counter(all_words)

    list_to_remove = list(dictionary)

    for item in list_to_remove:
        if item.isalpha() == False:
            del dictionary[item]
        elif len(item) == 1:   
            del dictionary[item]
    return dictionary

def extract_features(mail_dir,dictionary):
    files = [os.path.join(mail_dir,fi) for fi in os.listdir(mail_dir)]
    features_matrix = np.zeros((len(files),len(dictionary)))
    train_labels = np.zeros(len(files))
    count = 0
    docID = 0
    for fil in files:
      with open(fil) as fi:
        for i,line in enumerate(fi):
          if i == 2:
            words = line.split()
            for word in words:
              wordID = 0
              for i,d in enumerate(dictionary):
                if d[0] == word:
                  wordID = i
                  features_matrix[docID,wordID] = words.count(word)
        train_labels[docID] = 0
        filepathTokens = fil.split('/')
        lastToken = filepathTokens[len(filepathTokens) - 1]
        if "spmsg" in lastToken:
            train_labels[docID] = 1
            count = count + 1
        docID = docID + 1
    return features_matrix, train_labels

def test():
    TRAIN_DIR = "src/step1/train-mails"
    TEST_DIR = "src/step1/test-mails"
    dictionary = make_Dictionary(TRAIN_DIR)
    X_train, y_train = extract_features(TRAIN_DIR,dictionary)
    X_test, y_test = extract_features(TEST_DIR,dictionary)

    # 任务:初始化贝叶斯模型,并训练模型,测试其在分类上的性能。
    # 调节模型参数,使邮件分类性能不低于70%,并返回精度值(百分值)
    ########## Begin ##########    
    
    # 分类器在测试集上的预测值
  
    # 计算准确率
   
    ########## End ##########  
    return accuracy

运行结果及报错内容

修改了很多遍正确率都是60%多

我想要达到的结果

分类精度为:74.82758620689656%

参考这篇博文:https://blog.csdn.net/qq_42589613/article/details/127648820