python 多种方法实现数组统计

example input for index_from_tokens
[("cat",1),("cat",2),("cat",2),("door",1),"water",3)]
example output for index_from_tokens
index:{'cat':[(1,2)],'door':[(1,1)],'water':[(3,1)]}
doc_freq:{'cat':2,'door':1,'water':1}


def function(index_from_tokens: list):
    index = {}
    doc_freq = {}
    tmpDict = {}  # 组合key,value为新的键
    for i in range(len(index_from_tokens)):
        key, value = index_from_tokens[i]
        newKey = key + "_" + str(value)
        if newKey not in tmpDict:
            tmpDict[newKey] = 0
        tmpDict[newKey] += 1
    for newKey, num in tmpDict.items():
        idx = newKey.index('_')
        oldKey = newKey[:idx]
        oldValue = newKey[idx + 1:]
        if oldKey not in index:
            index[oldKey] = []
        index[oldKey].append((int(oldValue), num))
        if oldKey not in doc_freq:
            doc_freq[oldKey] = 0
        doc_freq[oldKey] += 1
    return index, doc_freq


index_from_tokens = [("cat", 1), ("cat", 1), ("cat", 2), ("door", 1), ("water", 3)]
index, doc_freq = function(index_from_tokens)
print(index)
print(doc_freq)

img

方法2:


def function(index_from_tokens: list):
    index = {}
    doc_freq = {}
    tmpDict = {}  # 组合key,value为新的键
    for i in range(len(index_from_tokens)):
        key, value = index_from_tokens[i]
        if key not in tmpDict:
            tmpDict[key] = []
        tmpDict[key].append(value)
    for key, value in tmpDict.items():
        if key not in index:
            index[key] = []
        if key not in doc_freq:
            doc_freq[key] = 0
        # 检查有几类
        types = len(set(value))
        doc_freq[key] += types
        for num in set(value):
            index[key].append((num, value.count(num)))
    return index, doc_freq


index_from_tokens = [("cat", 1), ("cat", 1), ("cat", 2), ("door", 1), ("water", 3)]
index, doc_freq = function(index_from_tokens)
print(index)
print(doc_freq)

img
方法3


def function(index_from_tokens: list):
    index = {}
    doc_freq = {}
    for i in range(len(index_from_tokens)):
        key, value = index_from_tokens[i]
        if key not in index:
            index[key] = dict()
        if value not in index[key]:
            index[key][value] = 0
            if key not in doc_freq:
                doc_freq[key] = 0
            doc_freq[key] += 1
        index[key][value] += 1
    for key, dict_value in index.items():
        tmp = dict_value
        index[key] = []
        for k, v in tmp.items():
            index[key].append((k, v))
    return index, doc_freq


index_from_tokens = [("cat", 1), ("cat", 1), ("cat", 2), ("door", 1), ("water", 3)]
index, doc_freq = function(index_from_tokens)
print(index)
print(doc_freq)

img

题目没表述清楚吧。。。

example input for index_from_tokens
[("cat",1),("cat",1),("cat",2),("door",1),("water",3)]

输入
[("cat",1),("cat",1),("cat",2),("door",1),("water",3)]

输出结果
index:{'cat':[(1,2)],'door':[(1,1)],'water':[(3,1)]}
doc_freq:{'cat':2,'door':1,'water':1}