with open("中文test.txt", 'r', encoding='utf-8')as f2: # 取得文本字符串
data2 = f2.read()
# print(data2)
rate = {}
for i in range(len(data2)): # 循环取得各字符出现频率
if data2[i] not in rate.keys():
rate[data2[i]] = 1
else:
rate[data2[i]] += 1
# Srate=sorted(rate.items(),key=lambda d:d[1],reverse=False)#对字典按值进行排序True降False升,sorted返回列表而非字典
# print(Srate)
class Node(object):
def __init__(self, value=None, left=None, right=None, data=None): # 默认构造函数,定义左右节点以及父节点和权值
self.value = value
self.left = left
self.right = right
self.data = data
noderate = {}
for x in rate.keys(): # 将字典转为频率结点字典
noderate[x] = Node(rate[x])
noderate[x].data=x
nodes_list = []
for x in noderate.keys(): # 将结点字典加入列表
nodes_list.append(noderate[x])
#print(nodes_list[0].data)
#print(nodes_list)
#print(noderate.keys())
def creatTree(node_list): # 递归构建哈夫曼树
node_list.sort(key=lambda x: x.value) # 对结点列表进行排序
if len(node_list) == 1:
return node_list[0] # 返回根节点
father_node = Node(node_list[0].value + node_list[1].value, node_list[0], node_list[1])
# father_node.left=node_list[0]
# father_node.right=node_list[1]
node_list.pop(0)
node_list.pop(0)
node_list.insert(0, father_node)
return creatTree(node_list)
————————————————————————————————————————————————————————————————————————
def getEncode(node,code,dic):
if node.left==None and node.right==None:
dic[node.data]=code
print(node.data)
return dic
if node.left!=None:
getEncode(node.left, code+'0',dic)
else:# node.right:
getEncode(node.right, code+'1',dic)
——————————————————————————————————————————————————————————————————-————-
dic={}
cout=0
node=creatTree(nodes_list)
getEncode(node,'',dic)
print(dic)
题主思维有盲区:左叉不为空,就不会执行54行else语句,自然也就不会输出右叉了。将54行的else改为52行的样子,left换成right,就OK了。
另外,题主的代码写得有点凌乱,比如,统计字符频率可以这样写:
rate = {}
for ch in data2: # 循环取得各字符出现频率
rate[ch] = rate.get(ch, 0) + 1
从22行到29行,可与这样写:
noderate, nodes_list = {}, []
for x in rate:
noderate[x] = Node(rate[x])
noderate[x].data=x
nodes_list.append(noderate[x])