编译原理实验1：词法分析程序设计与实现

请根据给定的文法设计并实现词法分析程序，从源程序中识别出单词，记录其单词类别和单词值，输入输出及处理要求如下：
（1）数据结构和与语法分析程序的接口请自行定义；类别码需按下表格式统一定义；
（2）为了方便进行自动评测，输入的被编译源文件统一命名为testfile.txt；输出的结果文件统一命名为output.txt，结果文件中每行按如下方式组织：
单词类别码单词的字符/字符串形式(中间仅用一个空格间隔)
单词的类别码请统一按如下形式定义：

【输入形式】testfile.txt中的符合文法要求的测试程序。
【输出形式】要求将词法分析结果输出至output.txt中。

实验过程以及结果

#include <string.h>
#include <iostream>
#include <fstream> 
#include <cstring>
#include <conio.h>
#include <map>
#include <algorithm>  
#include <ctype.h>
using namespace std;
map<string,string>Category_code={
    {"identifier","IDENFR"},    {"else","ELSETK"},        {"-","MINU"},    {"=","ASSIGN"}, 
    {"int_constant","INTCON"},  {"switch","SWITCHTK"},   {"*","MULT"},    {";","SEMICN"},
    {"char_constant","CHARCON"}, {"case","CASETK"},        {"/","DIV"},        {",","COMMA"},
    {"character_string","STRCON"},    {"default","DEFAULTTK"}, {"<","LSS"},        {"(","LPARENT"},
    {"const","CONSTTK"},    {"while",    "WHILETK"},        {"<=","LEQ"},    {")","RPARENT"},
    {"int",    "INTTK"},        {"for","FORTK"},            {">","GRE"},    {"[","LBRACK"},
    {"char","CHARTK"},         {"scanf","SCANFTK"},        {">=","GEQ"},    {"]","RBRACK"},
    {"void","VOIDTK"},     {"printf","PRINTFTK"},        {"==","EQL"},    {"{","LBRACE"},
    {"main","MAINTK"},     {"return","RETURNTK"},        {"!=","NEQ"},   {"}","RBRACE"},
    {"if","IFTK"},         {"+","PLUS"},               {":","COLON"}    
};
//函数声明
bool isnumber(char x);//判断整形常量
bool isalpha();//判断字母
string to_lower(string str);//大写转小写
string lexical_analysis(string filename);//词法分析
string Verification(string filename);//读入验证文件
void Verificate(int number);//输入需要验证的文件夹序号进行验证
int main()
{

    Verificate(6);
    system("pause");

    return 0;
}
//输入需要验证的文件夹序号进行验证
void Verificate(int number)
{
    string out="";
    for(int i=1;i<=10;i++)
    {
        string filename1=".vscode\\"+to_string(number)+"\\testfile"+to_string(i)+".txt";
        string filename2=".vscode\\"+to_string(number)+"\\output"+to_string(i)+".txt";
        string testout=lexical_analysis(filename1);
        string verificationout=Verification(filename2);
        out+=((verificationout==testout)?"right":"wrong");
        out+=" ";
    }
    cout<<out<<endl;
}

//读入验证文件,返回字符串
string Verification(string filename)
{

    string vout ="";
    fstream fin;
    fin.open(filename);
    if (!fin.is_open()) {
    cout << "Could not find the file\n";
    cout << "Program terminating\n";
    system("pause");
    exit(EXIT_FAILURE);
    }

    string tempout="";
    while (getline(fin,tempout))
    {
        vout+=tempout+" ";
    }
    return vout;
}
//词法分析,读入指定文件并进行词法分析,返回分析结果
string lexical_analysis(string filename)
{
    //词法分析输出结果
    string testout="";
    // 类别码初始化
    fstream fin;
    fin.open(filename);
        if (!fin.is_open()) {
        cout << "Could not find the file\n";
        cout << "Program terminating\n";
        system("pause");
        exit(EXIT_FAILURE);
    }
    string temp="";
    while(getline(fin,temp))
    {

        // 数组索引
        int index=0;
        //拼接字符串
        string single="";
        //当这一行读取结束并且拼接的字符串清空才能进行下一行操作
        while((index<temp.length())||single!="\0")
        {
            //如果一直读入数字或字母或_就拼接字符串
            if(isalnum(temp[index])||temp[index]=='_')
            {
                single+=temp[index];
                index++;
                
            }
            //没有读到数字或字母或者拼接符号说明拼接结束，去查找类别码
            else if(Category_code.find(to_lower(single))!=Category_code.end())
            {
                cout<<Category_code[single]<<" "<<single<<endl;
                testout+=Category_code[single]+" "+single+" ";
                single="";//清空拼接的字符串
            }
            //处理特殊情况
            else
            {   
                //拼接字符串长度>0,整形常量或者是标识符
                if(single.length()>0)
                {   
                    // 标识符不能用数字开头
                    if(!isnumber(single[0]))
                    {
                        cout<<Category_code["identifier"]<<" "<<single<<endl;
                        testout+=Category_code["identifier"]+" "+single+" ";
                        single="";
                    }
                    //整型常量
                    else
                    {
                        cout<<Category_code["int_constant"]<<" "<<single<<endl;
                        testout+=Category_code["int_constant"]+" "+single+" ";
                        single="";
                    }
                }
                else
                {
                    string curr(1,temp[index]);

                    // 如果是空格，\t 跳过,这里提供一种更简单的方法，当碰到ascii码小于等于32直接跳过
                    
                    // 方法一，枚举
                    // if(temp[index]=='\t'||temp[index]==' ')
                    // {
                    //     // do nothing
                    //     index++;                       
                    // }
                    
                    // 方法二，ascii码判断
                    if(temp[index]-'\0'<=32)
                    {
                        // do nothing
                        index++;       
                    }
                    //如果是单一特殊字符,去除<=,>=,==,!=
                    else if(Category_code.find(curr)!=Category_code.end()&&!((temp[index]=='<'||temp[index]=='>'||temp[index]=='!'||temp[index]=='=')&&temp[index+1]=='='))
                    {
                        cout<<Category_code[curr]<<" "<<temp[index]<<endl;
                        testout+=Category_code[curr]+" "+temp[index]+" ";
                        index++;
                    }
                    //如果是>=,<=,!=,==这种双字符类型
                    else if(((temp[index]=='<'||temp[index]=='>'||temp[index]=='!'||temp[index]=='=')&&temp[index+1]=='='))
                    {
                        curr+=temp[index+1];
                        cout<<Category_code[curr]<<" "<<temp[index]<<endl;
                        testout+=Category_code[curr]+" "+curr+" ";
                        index++;
                        index++;
                    }
                    //字符串常量
                    else if (temp[index]=='\"')
                    {
                        string str="";
                        while(temp[++index]!='\"')
                        {
                            str+=temp[index];
                        }
                        cout<<Category_code["character_string"]<<" "<<str<<endl;
                        testout+=Category_code["character_string"]+" "+str+" ";
                        index++;
                    }
                    //字符常量
                    else if (temp[index]=='\'')
                    {
                        string str="";
                        while(temp[++index]!='\'')
                        {
                            str+=temp[index];
                        }
                        cout<<Category_code["char_constant"]<<" "<<str<<endl;
                        testout+=Category_code["char_constant"]+" "+str+" ";
                        index++;
                    }
                    else 
                    {

                        cout<<"this line should not appear"<<endl;

                    }
                }             
            }
            
        }        
    }
    return testout;
}

bool isnumber(char x)//判断数字
{
    return x >= '0' && x <= '9';
}
bool isalpha(char x)//判断字母
{
    return (x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z');
}
string to_lower(string str)//大写转小写
{   
    int i=0;
 
        while( str[i] ) 
        {
            tolower(str[i]);
            i++;
        }
    return str;
}

执行步骤如下：
最外面是一个大的while循环，判断文件是否读取完成，里面有一个小的while循环，判断读入的字符是否为空格，之后按照下面步骤进行。

k=0,如果读取的字符是字母（LETTER函数判断），则k++,继续读字符，并把读的字符传给数组TOKEN，直到读到的不是字母；最后读完字符串，还要与关键字做比较（RESERVEDWORD函数）如果字母后面是数字，则把它定义为标识符。
i=0,如果一开始就是数字（NUMBER函数判断），则k++，继续读数字，直到读不到数字。
如果是字符常量或者是字符串，前面字符是’或者”，在while循环里面，判断下面读取的字符是否为’或者”，如果是则跳出循环，在TOKEN数组里面的就是字符常量或者字符串。
最后是运算符判断，比如>=这个运算符，先读取的是>，然后再判断后面是否为=,如果为=，则定义为GEQ；否则为GRE，即=

https://blog.csdn.net/qq_44809707/article/details/111560922

编译原理词法分析实验题，原文：

编译原理词法分析实验_皮小孩ls的博客-CSDN博客_编译原理词法分析实验目录实验内容描述实验设计输入输出形式样例输入和样例输出实验设计原理（步骤）主要函数和辅助函数核心代码截图调试过程实验结果实验内容描述根据给定的文法设计并实现词法分析程序，从源程序中识别出单词，记录其单词类别和单词值，输入输出及处理要求如下：（1）数据结构和与语法分析程序的接口请自行定义；类别码需按下表格式统一定义；（2）为了方便进行自动评测，输入的被编译源文件统一命名为testfile.txt；输出的结果文件统一命名为output.txt，结果文件中每行按如下方式组织：单词类别码单词的字符/字符

https://blog.csdn.net/qq_44809707/article/details/111560922

这是一个字符解析的过程

原题：

https://blog.csdn.net/qq_44809707/article/details/111560922

参考我上传的资源，编译原理的实验，指导手册，和实验代码

编译原理 实验1：词法分析程序设计与实现

编译原理实验1：词法分析程序设计与实现