C语言,将英文文本的单词统计,并输出最高频次的十个单词,我建立了链表,但是貌似排序出了问题

void Frequency(WORD *head)

{
int i;

WORD    *pre;

WORD    *p1;

WORD    *p2;

WORD    *end = NULL;

WORD    *p = head;

while(head->next != end)                  //冒泡排序
{
    p1 = head;
    pre = head;
    p2 = p1->next;
    if(p1->count < p2->count)             //先判断前两个单词频率
        {
            pre = p2;
            p1->next = p2->next;
            p2->next = p1;
        }
        else
            p1 = p1->next;
        p2 = p1->next;
    
    while(p1->next != end)                //把最小的一个排到最后
    {
        if(p1->count < p2->count)
        {
            pre->next = p2;
            p1->next = p2->next;
            p2->next = p1;
        }
        else
            p1 = p1->next;
        p2 = p1->next;
        pre = pre->next;
    }
    end = p1;                             //一次排序后,最后一个数已经最小,end往前移
}

for(i=0; i<10; i++)                       //输出频率前十的单词 
{
    printf("%s", head->word);
    printf("\n");
    head = head->next;
}

}

20行之后应该加个head = p2,因为你改变了头结点了

望采纳

  • 首先,在第三个 while 循环中,你使用了 p1 和 p2 两个指针来遍历链表,但是并没有初始化这两个指针。因此,当你第一次进入 while 循环时,p1 和 p2 指向的位置是不确定的。

  • 其次,在第三个 while 循环中,你使用的是 p1->next != end 这个条件,而不是 p2->next != end。这意味着当 p1 到达链表的最后一个元素时,while 循环依然会继续执行,导致 p2 指向了一个不存在的元素,从而可能引发错误。

你可以将 p1 和 p2 初始化为 head,并在 while 循环中使用 p2->next != end 这个条件。

修改如下,供参考:

WORD* Frequency(WORD *head)
{
    int i;
    WORD *p1,*p2,*end = NULL,*phead = NULL;
    phead = (WORD*)malloc(sizeof(WORD)); //给链表加个表头
    phead->next = head;
    while(phead->next != end) //冒泡排序
    {
        p1 = phead;
        p2 = phead->next;
        while (p2->next != end)
        {
            if (p2->count < p2->next->count)
            {
                p1->next = p2->next;
                p2->next = p2->next->next;
                p1->next->next = p2;
                p2 = p1->next;
            }
            p2 = p2->next;
            p1 = p1->next;
        }
        end = p2;
    }
        //if(p1->count < p2->count) //先判断前两个单词频率
        //{
        //    pre = p2;
        //    p1->next = p2->next;
        //    p2->next = p1;
        //}
        //else
        //    p1 = p1->next;
        //p2 = p1->next;
        //while(p1->next != end)//把最小的一个排到最后
        //{
        //    if(p1->count < p2->count)
        //    {
        //        pre->next = p2;
        //        p1->next = p2->next;
        //        p2->next = p1;
        //    }
        //    else
        //        p1 = p1->next;
        //    p2 = p1->next;
        //    pre = pre->next;
        //}
        // end = p1;  //一次排序后,最后一个数已经最小,end往前移
        //}
    head = phead->next;
    free(phead);
    p1 = head;
    for(i=0;p1 && i<10; i++)//输出频率前十的单词
    {
        printf("%s\n", p1->score);
                           //printf("\n");
        p1 = p1->next;
    }
    return head;//返回排序后的表头
}

写了一个完整的,运行结果如下:

img

代码:

#define _CRT_SECURE_NO_WARNINGS 1
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

struct WordNode
{
    char word[30];
    int nmb;
    struct WordNode* next;
};

class WordCount
{
public:
    WordCount() 
    { 
        head = (struct WordNode*)malloc(sizeof(struct WordNode)); 
        head->next = 0;
    }
    ~WordCount()
    {
        WordNode* node = 0;
        while (head)
        {
            node = head->next;
            free(head);
            head = node;
        }
        head = 0;
    }
    WordNode* findNode(char* p);     //查找单词
    void insertNode(WordNode* node); //插入单词
    void addCount(WordNode* node);  //单词的数量加1

    void sort_bubble();//根据词频排序-冒泡排序
    void display();   //显示
private:
    struct WordNode* head;
};

//判断是否为空
int isEmpty(char* p)
{
    while (*p)
    {
        if (*p > 0x20)
            return 0;
        p++;
    }
    return 1;
}
//查找单词
WordNode* WordCount::findNode(char* p)
{
    struct WordNode* node = head->next;
    while (node)
    {
        if (strcmp(p, node->word) == 0)
        {
            return node;
        }
        else
        {
            node = node->next;
        }
    }
    return 0;
}
//插入单词
void WordCount::insertNode(struct WordNode* node)
{
    struct WordNode* p = head;
    while (p->next)
        p = p->next;
    p->next = node;
}


//根据词频排序--冒泡排序
void WordCount::sort_bubble() 
{
    struct WordNode* p, * tail, * q;
    tail = NULL;
    while ((head->next->next) != tail)
    {
        p = head;
        q = head->next;
        while (q->next != tail)
        {
            if (q->nmb < q->next->nmb) //降序
            {
                p->next = q->next;
                q->next = q->next->next;
                p->next->next = q;
                q = p->next;
            }
            q = q->next;
            p = p->next;
        }
        tail = q;
    }
}



//单词的数量加1
void WordCount::addCount(struct WordNode* node)
{
    node->nmb += 1;
}

//显示
void WordCount::display()
{
    struct WordNode* node = head->next;
    while (node)
    {
        printf("%s\t%d\n", node->word, node->nmb);
        node = node->next;
    }
}
bool isctrl(char ch)
{
    if (ch < 0x1F || ch == 0x7F)
    {
        return true;
    }
    return false;
}
//删除空格,并将大写转变成小写
void toLower(char tt[])
{
    int i = 0, j = 0;
    while (tt[i] != '\0')
    {
        if ((tt[i] != ' ') && (!isctrl(tt[i])))
        {
            if (tt[i] >= 'A' && tt[i] <= 'Z')
                tt[j++] = tt[i] + 32;
            else
                tt[j++] = tt[i];
        }
        i++;
    }
}
//buf是存储文件的缓冲区,lSize是文件大小
char* textFileRead(const char* filename, int* lSize)
{
    char* buf;
    FILE* pf = fopen(filename, "r");
    fseek(pf, 0, SEEK_END);
    *lSize = ftell(pf);
    // 用完后需要将内存free掉
    rewind(pf);
    buf = new char[*lSize + 1];
    *lSize = fread(buf, sizeof(char), *lSize, pf);
    buf[*lSize] = '\0';
    return buf;
}

int main()
{
    int size, i = 0, j = 0;
    char text[50] = { 0 };
    WordCount cc;
    struct WordNode* node = 0;
    char* buf = textFileRead("textfile.txt", &size);

    if (size <= 0)
    {
        printf("文件打开失败,或者为空\n");
        return 0;
    }

    while (i <= size)
    {
        if (i == size)
        {
            text[j] = '\0';
            toLower(text);
            if (isEmpty(text))
            {
                j = 0;
                i++;
                continue;
            }
            node = 0;
            node = cc.findNode(text);
            if (node)
            {
                cc.addCount(node);
            }
            else
            {
                node = (struct WordNode*)malloc(sizeof(WordNode));
                strcpy(node->word, text);
                node->nmb = 1;
                node->next = 0;
                cc.insertNode(node);
            }
            break;
        }
        else
        {
            //此处不考虑中间含有.的单词,入地名,缩写等等
            if (buf[i] == ' ' || buf[i] == '\r' || buf[i] == ',' || buf[i]=='?' || buf[i] == '.' || buf[i] == '\n' || buf[i] == '!' || isctrl(buf[i]) || buf[i] == ';')
            {
                text[j] = '\0';
                toLower(text);
                if (isEmpty(text))
                {
                    j = 0;
                    i++;
                    continue;
                }
                node = 0;
                node = cc.findNode(text);
                if (node)
                {
                    //printf("find ,add count\n");
                    cc.addCount(node);
                }
                else
                {
                    //printf("not find,create\n");
                    node = (struct WordNode*)malloc(sizeof(WordNode));//new WordNode;
                    strcpy(node->word, text);
                    node->nmb = 1;
                    node->next = 0;
                    cc.insertNode(node);
                }
                j = 0;
            }
            else
            {
                text[j++] = buf[i];
            }
            i++;
        }
    }

    cc.sort_bubble();
    cc.display();
    return 0;
}





链表的

#define _CRT_SECURE_NO_WARNINGS 1
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
struct WordNode
{
    char word[30];
    int nmb;
    struct WordNode* next;
};
class WordCount
{
public:
    WordCount() 
    { 
        head = (struct WordNode*)malloc(sizeof(struct WordNode)); 
        head->next = 0;
    }
    ~WordCount()
    {
        WordNode* node = 0;
        while (head)
        {
            node = head->next;
            free(head);
            head = node;
        }
        head = 0;
    }
    WordNode* findNode(char* p);     //查找单词
    void insertNode(WordNode* node); //插入单词
    void addCount(WordNode* node);  //单词的数量加1
    void sort_bubble();//根据词频排序-冒泡排序
    void display();   //显示
private:
    struct WordNode* head;
};
//判断是否为空
int isEmpty(char* p)
{
    while (*p)
    {
        if (*p > 0x20)
            return 0;
        p++;
    }
    return 1;
}
//查找单词
WordNode* WordCount::findNode(char* p)
{
    struct WordNode* node = head->next;
    while (node)
    {
        if (strcmp(p, node->word) == 0)
        {
            return node;
        }
        else
        {
            node = node->next;
        }
    }
    return 0;
}
//插入单词
void WordCount::insertNode(struct WordNode* node)
{
    struct WordNode* p = head;
    while (p->next)
        p = p->next;
    p->next = node;
}
//根据词频排序--冒泡排序
void WordCount::sort_bubble() 
{
    struct WordNode* p, * tail, * q;
    tail = NULL;
    while ((head->next->next) != tail)
    {
        p = head;
        q = head->next;
        while (q->next != tail)
        {
            if (q->nmb < q->next->nmb) //降序
            {
                p->next = q->next;
                q->next = q->next->next;
                p->next->next = q;
                q = p->next;
            }
            q = q->next;
            p = p->next;
        }
        tail = q;
    }
}
//单词的数量加1
void WordCount::addCount(struct WordNode* node)
{
    node->nmb += 1;
}
//显示
void WordCount::display()
{
    struct WordNode* node = head->next;
    while (node)
    {
        printf("%s\t%d\n", node->word, node->nmb);
        node = node->next;
    }
}
bool isctrl(char ch)
{
    if (ch < 0x1F || ch == 0x7F)
    {
        return true;
    }
    return false;
}
//删除空格,并将大写转变成小写
void toLower(char tt[])
{
    int i = 0, j = 0;
    while (tt[i] != '\0')
    {
        if ((tt[i] != ' ') && (!isctrl(tt[i])))
        {
            if (tt[i] >= 'A' && tt[i] <= 'Z')
                tt[j++] = tt[i] + 32;
            else
                tt[j++] = tt[i];
        }
        i++;
    }
}
//buf是存储文件的缓冲区,lSize是文件大小
char* textFileRead(const char* filename, int* lSize)
{
    char* buf;
    FILE* pf = fopen(filename, "r");
    fseek(pf, 0, SEEK_END);
    *lSize = ftell(pf);
    // 用完后需要将内存free掉
    rewind(pf);
    buf = new char[*lSize + 1];
    *lSize = fread(buf, sizeof(char), *lSize, pf);
    buf[*lSize] = '\0';
    return buf;
}
int main()
{
    int size, i = 0, j = 0;
    char text[50] = { 0 };
    WordCount cc;
    struct WordNode* node = 0;
    char* buf = textFileRead("textfile.txt", &size);
    if (size <= 0)
    {
        printf("文件打开失败,或者为空\n");
        return 0;
    }
    while (i <= size)
    {
        if (i == size)
        {
            text[j] = '\0';
            toLower(text);
            if (isEmpty(text))
            {
                j = 0;
                i++;
                continue;
            }
            node = 0;
            node = cc.findNode(text);
            if (node)
            {
                cc.addCount(node);
            }
            else
            {
                node = (struct WordNode*)malloc(sizeof(WordNode));
                strcpy(node->word, text);
                node->nmb = 1;
                node->next = 0;
                cc.insertNode(node);
            }
            break;
        }
        else
        {
            //此处不考虑中间含有.的单词,入地名,缩写等等
            if (buf[i] == ' ' || buf[i] == '\r' || buf[i] == ',' || buf[i]=='?' || buf[i] == '.' || buf[i] == '\n' || buf[i] == '!' || isctrl(buf[i]) || buf[i] == ';')
            {
                text[j] = '\0';
                toLower(text);
                if (isEmpty(text))
                {
                    j = 0;
                    i++;
                    continue;
                }
                node = 0;
                node = cc.findNode(text);
                if (node)
                {
                    //printf("find ,add count\n");
                    cc.addCount(node);
                }
                else
                {
                    //printf("not find,create\n");
                    node = (struct WordNode*)malloc(sizeof(WordNode));//new WordNode;
                    strcpy(node->word, text);
                    node->nmb = 1;
                    node->next = 0;
                    cc.insertNode(node);
                }
                j = 0;
            }
            else
            {
                text[j++] = buf[i];
            }
            i++;
        }
    }
    cc.sort_bubble();
    cc.display();
    return 0;
}



   

#include "stdafx.h"

#include "stdio.h"

#include "string.h"

 

struct word

{  

     char str[30];  //单词  

     int num;       //单词出现的次数

}A[1000];

 

int sum;    //单词的总个数

 

void chuli(char s[])

{  

      int i,j;  

      int flag=0;  //flag为零时没有重复的

      for(i=0;i<=sum;i++)  

    {   

       if(strcmp(A[i].str,s)==0)   

       {             

            A[i].num++;    

            flag=1;    

            sum++;      

         }     

     }  

      if(flag==0)  

       {   

         for(j=0;j<30;j++)       

            A[sum].str[j]=s[j];  

           A[sum].num++;   

           sum++;  

       }       

   }

  void paixu()

{    

      int i,j;    

      struct word a;  

      for(i=0;i<sum;i++)  

    {   

        for(j=i+1;j<sum;j++)      

          if(A[i].num<A[j].num)    

         {     

              a=A[j];          

             A[j]=A[i];

             A[i]=a; 

           }

      }

}

 

int main()

{  

      char ch,s[30];  

      int i,flag=0;

       FILE *fp;

       fp=fopen("d:\\a.txt","r");

      if(fp==NULL)

 {

        printf("此文件不存在!\n");

  }

     sum=0;

     ch=NULL;

      for(i=0;i<1000;i++)

       A[i].num=0;

   while(ch!=-1)

 {

     for(i=0;i<30;i++)

     s[i]='\0';

     ch=fgetc(fp);

    if((65<=ch&&ch<=90)||(ch>=97&&ch<=122))

  {

      for(i=0;;i++)

      {

         s[i]=ch;

         ch=fgetc(fp);

         if((65<=ch&&ch<=90)||(ch>=97&&ch<=122))continue;

         else break;

       }

       chuli(s);

      }

    }

       paixu();

       printf("该文章共有:%d个单词\n",sum);

       printf("该文章中单词出现频率最高的前十个从小到大依次为:\n");

        for(i=0;i<10;i++)

        printf("%s出现次数为:%d\n",A[i].str,A[i].num);

       return 0;

}