存放英文单词并统计次数,用什么数据结构比较合理

首先定义一个结构体,存放的是char word[20] 单词的名称, int count 单词出现次数

,一个程序按行读取文本文件,每次读到一个英语单词,count加一。最后输出文本中所有的单词。这里考虑用结构体数组(顺序线性表存储是一种最简单的方案),请问有没有更合理的方案

Tire树是也称为字典树,比较适合应用于楼主提出的场景。根据tire树的特点可以看出优势主要有:

  • 节省存储空间,相同前缀的单词重复字段只会保留一份
  • 查找快,能够实现log(N)的查找速度 但遍历一遍,还是比较浪费时间的。

如果楼主不在意空间因素,那直接hash也是可行的。

可以用trie树实现。

 #include <stdio.h>  
#include <stdlib.h>  
#include <memory.h>  

typedef struct Trie_node{  
    int  count;  
    struct Trie_node *next[26];  

}TrieNode, *Trie;  

TrieNode* createTrieNode(){  
    TrieNode* root = (TrieNode*)malloc(sizeof(TrieNode));  
    root->count = 0;  
    memset(root->next, 0, sizeof(root->next));  
    return root;  
}  

void trie_insert(Trie root, char* word){  
    TrieNode* node = root;  
    char *p = word;  
    while(*p){  
        if(NULL == node->next[*p-'a']){  
            node->next[*p-'a'] = createTrieNode();  
        }  
        node = node->next[*p-'a'];  
        p++;  
    }  
    node->count += 1;  
}  

int trie_search(Trie root, char* word){  
    TrieNode* node = root;  
    char *p = word;  
    while(*p && node!=NULL){  
        node = node->next[*p-'a'];  
        p++;  
    }  
    return (node != NULL && node->count > 0);  
}  

int trie_word_count(Trie root, char* word){  
    TrieNode * node = root;  
    char *p = word;  
    while(*p &&node != NULL){  
        node = node->next[*p-'a'];  
        p++;  
    }  
    return node->count;  
}  


int main(){  
    Trie t = createTrieNode();  
    char word[][10] = {"test","study","open","show","shit","work","work","test","tea","word","area","word","test","test","test"};  
    for(int i = 0;i < 15;i++ ){  
        trie_insert(t,word[i]);  
    }  
    for(int i = 0;i < 15;i++ ){  
        printf("the word %s appears %d times in the trie-tree\n",word[i],trie_word_count(t,word[i]));  
    }  
    char s[10] = "testit";  
    printf("the word %s exist? %d \n",s,trie_search(t,s));  
    return 0;  
}