example input for index_from_tokens
[("cat",1),("cat",2),("cat",2),("door",1),"water",3)]
example output for index_from_tokens
index:{'cat':[(1,2)],'door':[(1,1)],'water':[(3,1)]}
doc_freq:{'cat':2,'door':1,'water':1}
def function(index_from_tokens: list):
index = {}
doc_freq = {}
tmpDict = {} # 组合key,value为新的键
for i in range(len(index_from_tokens)):
key, value = index_from_tokens[i]
newKey = key + "_" + str(value)
if newKey not in tmpDict:
tmpDict[newKey] = 0
tmpDict[newKey] += 1
for newKey, num in tmpDict.items():
idx = newKey.index('_')
oldKey = newKey[:idx]
oldValue = newKey[idx + 1:]
if oldKey not in index:
index[oldKey] = []
index[oldKey].append((int(oldValue), num))
if oldKey not in doc_freq:
doc_freq[oldKey] = 0
doc_freq[oldKey] += 1
return index, doc_freq
index_from_tokens = [("cat", 1), ("cat", 1), ("cat", 2), ("door", 1), ("water", 3)]
index, doc_freq = function(index_from_tokens)
print(index)
print(doc_freq)
方法2:
def function(index_from_tokens: list):
index = {}
doc_freq = {}
tmpDict = {} # 组合key,value为新的键
for i in range(len(index_from_tokens)):
key, value = index_from_tokens[i]
if key not in tmpDict:
tmpDict[key] = []
tmpDict[key].append(value)
for key, value in tmpDict.items():
if key not in index:
index[key] = []
if key not in doc_freq:
doc_freq[key] = 0
# 检查有几类
types = len(set(value))
doc_freq[key] += types
for num in set(value):
index[key].append((num, value.count(num)))
return index, doc_freq
index_from_tokens = [("cat", 1), ("cat", 1), ("cat", 2), ("door", 1), ("water", 3)]
index, doc_freq = function(index_from_tokens)
print(index)
print(doc_freq)
方法3
def function(index_from_tokens: list):
index = {}
doc_freq = {}
for i in range(len(index_from_tokens)):
key, value = index_from_tokens[i]
if key not in index:
index[key] = dict()
if value not in index[key]:
index[key][value] = 0
if key not in doc_freq:
doc_freq[key] = 0
doc_freq[key] += 1
index[key][value] += 1
for key, dict_value in index.items():
tmp = dict_value
index[key] = []
for k, v in tmp.items():
index[key].append((k, v))
return index, doc_freq
index_from_tokens = [("cat", 1), ("cat", 1), ("cat", 2), ("door", 1), ("water", 3)]
index, doc_freq = function(index_from_tokens)
print(index)
print(doc_freq)
题目没表述清楚吧。。。
example input for index_from_tokens
[("cat",1),("cat",1),("cat",2),("door",1),("water",3)]
输入
[("cat",1),("cat",1),("cat",2),("door",1),("water",3)]
输出结果
index:{'cat':[(1,2)],'door':[(1,1)],'water':[(3,1)]}
doc_freq:{'cat':2,'door':1,'water':1}