python程序运行报错


from numpy import *


def loadDateSet():
    postingList = [['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],
                   ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
                   ['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
                   ['stop', 'posting', 'stupid', 'worthless', 'garbage'],
                   ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
                   ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
    classVec = [0, 1, 0, 1, 0, 1]
    return postingList, classVec


def createVocabList(dataSet):
    vocabSet = set([])
    for document in dataSet:
        vocabSet = vocabSet | set(document)
    return list(vocabSet)


def setOfWords2Vec(vocabList, inputSet):
    returnVec = [0] * len(vocabList)
    for word in inputSet:
        returnVec[vocabList.index(word)] = 1
    else:
        print("the word: %s is not in my vocabulary!" % word)
    return returnVec


def trainNBO(trainMatrix, trainCategory):
    numTrainDocs = len(trainMatrix)
    numWords = len(trainMatrix[0])
    pAbusive = sum(trainCategory) / float(numTrainDocs)
    p0Num = zeros(numWords);
    p1Num = zeros(numWords)
    p0Demon = 0.0;
    p1Demon = 0.0
    for i in range(numTrainDocs):
        if trainCategory[i] == 1:
            p1Num += trainMatrix[i]
            p1Demon += sum(trainMatrix[i])
        else:
            p0Num += trainMatrix[i]
            p0Demon += sum(trainMatrix[i])
        p1Vect = log(p1Num * p1Demon)
        p0Vect = log(p0Num * p0Demon)
    return p0Vect, p1Vect, pAbusive


def classifyNB(vec2Classify, p0Vec, p1Vec, pClass1):
    p1 = sum(vec2Classify * p1Vec) + log(pClass1)
    p0 = sum(vec2Classify * p0Vec) + log(1.0 - pClass1)
    if p1 > p0:
        return 1
    else:
        return 0


def testingNB():
    listOPosts, listClasses = loadDateSet()
    myVocabList = createVocabList(listOPosts)
    trainMat = []
    for postinDoc in listOPosts:
        trainMat.append(setOfWords2Vec(myVocabList, postinDoc))
        p0V, p1V, pAb = trainNBO(trainMat, listClasses)
        test = ['love', 'my', 'dalmation']
        this = setOfWords2Vec(myVocabList, test)
        print(test, 'classified as:', classifyNB(this, pAb, p1V, p0V))
        test = ['stupid', 'garbage']
        this = setOfWords2Vec(myVocabList, test)
        print(test, 'classified as:', classifyNB(this, pAb, p1V, p0V))

修改代码

p1Vect = log(p1Num/p1Demon) 
p0Vect = log(p0Num/p0Demon)

部分回答整理自GPT:(我已经验证过了,可以正常运行)

img

在您提供的代码中,有几个地方存在错误:

  1. trainNBO 函数中,计算 p1Vectp0Vect 的方式不正确。应该将它们放在循环之外,以便在计算完所有的词向量和词频后再进行计算。另外,计算概率时应使用除法而不是乘法。

  2. classifyNB 函数中,计算 p1p0 的方式也不正确。应该将两个向量的元素相乘后再求和,而不是直接使用乘法运算符 *

下面是修正后的代码:

from numpy import *


def loadDataSet():
    postingList = [['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],
                   ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
                   ['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
                   ['stop', 'posting', 'stupid', 'worthless', 'garbage'],
                   ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
                   ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
    classVec = [0, 1, 0, 1, 0, 1]
    return postingList, classVec


def createVocabList(dataSet):
    vocabSet = set([])
    for document in dataSet:
        vocabSet = vocabSet | set(document)
    return list(vocabSet)


def setOfWords2Vec(vocabList, inputSet):
    returnVec = [0] * len(vocabList)
    for word in inputSet:
        if word in vocabList:
            returnVec[vocabList.index(word)] = 1
        else:
            print("the word: %s is not in my vocabulary!" % word)
    return returnVec


def trainNBO(trainMatrix, trainCategory):
    numTrainDocs = len(trainMatrix)
    numWords = len(trainMatrix[0])
    pAbusive = sum(trainCategory) / float(numTrainDocs)
    p0Num = ones(numWords)
    p1Num = ones(numWords)
    p0Denom = 2.0
    p1Denom = 2.0
    for i in range(numTrainDocs):
        if trainCategory[i] == 1:
            p1Num += trainMatrix[i]
            p1Denom += sum(trainMatrix[i])
        else:
            p0Num += trainMatrix[i]
            p0Denom += sum(trainMatrix[i])
    p1Vect = log(p1Num / p1Denom)
    p0Vect = log(p0Num / p0Denom)
    return p0Vect, p1Vect, pAbusive


def classifyNB(vec2Classify, p0Vec, p1Vec, pClass1):
    p1 = sum(vec2Classify * p1Vec) + log(pClass1)
    p0 = sum(vec2Classify * p0Vec) + log(1.0 - pClass1)
    if p1 > p0:
        return 1
    else:
        return 0


def testingNB():
    listOPosts, listClasses = loadDataSet()
    myVocabList = createVocabList(listOPosts)
    trainMat = []
    for postinDoc in listOPosts:
        trainMat.append(setOfWords2Vec(myVocabList, postinDoc))
    p0V, p1V, pAb = trainNBO(trainMat, listClasses)
    testEntry = ['love', 'my', 'dalmation']
    thisDoc = array(setOfWords2Vec(myVocabList, testEntry))
    print(testEntry, 'classified as:', classifyNB(thisDoc, p0V, p1V, pAb))
    testEntry = ['stupid', 'garbage']
    thisDoc = array(setOfWords2Vec(myVocabList, testEntry))
    print(testEntry, 'classified as:', classifyNB(thisDoc, p0V, p1V, pAb))


testingNB()

最后添加了一个调用testingNB方法,以便可以运行。

错误截图放一下,还有你代码里没有main方法啊

  • 这有个类似的问题, 你可以参考下: https://ask.csdn.net/questions/7743033
  • 你也可以参考下这篇文章:用python进行模拟
  • 除此之外, 这篇博客: Python 汇总中的 成员运算符 部分也许能够解决你的问题, 你可以仔细阅读以下内容或跳转源博客中阅读:
  • 运算符描述
    in如果在指定的序列中找到值返回 True,否则返回 False
    not in如果在指定的序列中没有找到值返回 True,否则返回 False
    """
     @dauthor : cpucode
     @date : 2022/3/1 9:06
     @github : https://github.com/CPU-Code
     @csdn : https://blog.csdn.net/qq_44226094
    """
    
    a = 22
    b = 11
    list = [11, 22, 33, 44, 55]
    
    if (a in list):
        print("a 在列表 list 中")
    else:
        print("a 不在列表 list 中")
    
    
    if(b not in list):
        print("b 不在列表 list 中")
    else:
        print("b 在列表 list 中")
    
    print("/*****************************************/")
    
    # 修改变量 a 的值
    a = 2
    if (a in list):
        print("a 在列表 list 中")
    else:
        print("a 不在列表 list 中")
    

    在这里插入图片描述

  • 您还可以看一下 唐宇迪老师的Python数据挖掘实战课程中的 行为特征小节, 巩固相关知识点

报什么错,运行过后输出打印都没有