请根据给定的文法设计并实现词法分析程序,从源程序中识别出单词,记录其单词类别和单词值,输入输出及处理要求如下:
(1)数据结构和与语法分析程序的接口请自行定义;类别码需按下表格式统一定义;
(2)为了方便进行自动评测,输入的被编译源文件统一命名为testfile.txt;输出的结果文件统一命名为output.txt,结果文件中每行按如下方式组织:
单词类别码 单词的字符/字符串形式(中间仅用一个空格间隔)
单词的类别码请统一按如下形式定义:
实验过程以及结果
#include <string.h>
#include <iostream>
#include <fstream>
#include <cstring>
#include <conio.h>
#include <map>
#include <algorithm>
#include <ctype.h>
using namespace std;
map<string,string>Category_code={
{"identifier","IDENFR"}, {"else","ELSETK"}, {"-","MINU"}, {"=","ASSIGN"},
{"int_constant","INTCON"}, {"switch","SWITCHTK"}, {"*","MULT"}, {";","SEMICN"},
{"char_constant","CHARCON"}, {"case","CASETK"}, {"/","DIV"}, {",","COMMA"},
{"character_string","STRCON"}, {"default","DEFAULTTK"}, {"<","LSS"}, {"(","LPARENT"},
{"const","CONSTTK"}, {"while", "WHILETK"}, {"<=","LEQ"}, {")","RPARENT"},
{"int", "INTTK"}, {"for","FORTK"}, {">","GRE"}, {"[","LBRACK"},
{"char","CHARTK"}, {"scanf","SCANFTK"}, {">=","GEQ"}, {"]","RBRACK"},
{"void","VOIDTK"}, {"printf","PRINTFTK"}, {"==","EQL"}, {"{","LBRACE"},
{"main","MAINTK"}, {"return","RETURNTK"}, {"!=","NEQ"}, {"}","RBRACE"},
{"if","IFTK"}, {"+","PLUS"}, {":","COLON"}
};
//函数声明
bool isnumber(char x);//判断整形常量
bool isalpha();//判断字母
string to_lower(string str);//大写转小写
string lexical_analysis(string filename);//词法分析
string Verification(string filename);//读入验证文件
void Verificate(int number);//输入需要验证的文件夹序号进行验证
int main()
{
Verificate(6);
system("pause");
return 0;
}
//输入需要验证的文件夹序号进行验证
void Verificate(int number)
{
string out="";
for(int i=1;i<=10;i++)
{
string filename1=".vscode\\"+to_string(number)+"\\testfile"+to_string(i)+".txt";
string filename2=".vscode\\"+to_string(number)+"\\output"+to_string(i)+".txt";
string testout=lexical_analysis(filename1);
string verificationout=Verification(filename2);
out+=((verificationout==testout)?"right":"wrong");
out+=" ";
}
cout<<out<<endl;
}
//读入验证文件,返回字符串
string Verification(string filename)
{
string vout ="";
fstream fin;
fin.open(filename);
if (!fin.is_open()) {
cout << "Could not find the file\n";
cout << "Program terminating\n";
system("pause");
exit(EXIT_FAILURE);
}
string tempout="";
while (getline(fin,tempout))
{
vout+=tempout+" ";
}
return vout;
}
//词法分析,读入指定文件并进行词法分析,返回分析结果
string lexical_analysis(string filename)
{
//词法分析输出结果
string testout="";
// 类别码初始化
fstream fin;
fin.open(filename);
if (!fin.is_open()) {
cout << "Could not find the file\n";
cout << "Program terminating\n";
system("pause");
exit(EXIT_FAILURE);
}
string temp="";
while(getline(fin,temp))
{
// 数组索引
int index=0;
//拼接字符串
string single="";
//当这一行读取结束并且拼接的字符串清空才能进行下一行操作
while((index<temp.length())||single!="\0")
{
//如果一直读入数字或字母或_就拼接字符串
if(isalnum(temp[index])||temp[index]=='_')
{
single+=temp[index];
index++;
}
//没有读到数字或字母或者拼接符号说明拼接结束,去查找类别码
else if(Category_code.find(to_lower(single))!=Category_code.end())
{
cout<<Category_code[single]<<" "<<single<<endl;
testout+=Category_code[single]+" "+single+" ";
single="";//清空拼接的字符串
}
//处理特殊情况
else
{
//拼接字符串长度>0,整形常量或者是标识符
if(single.length()>0)
{
// 标识符不能用数字开头
if(!isnumber(single[0]))
{
cout<<Category_code["identifier"]<<" "<<single<<endl;
testout+=Category_code["identifier"]+" "+single+" ";
single="";
}
//整型常量
else
{
cout<<Category_code["int_constant"]<<" "<<single<<endl;
testout+=Category_code["int_constant"]+" "+single+" ";
single="";
}
}
else
{
string curr(1,temp[index]);
// 如果是空格,\t 跳过,这里提供一种更简单的方法,当碰到ascii码小于等于32直接跳过
// 方法一,枚举
// if(temp[index]=='\t'||temp[index]==' ')
// {
// // do nothing
// index++;
// }
// 方法二,ascii码判断
if(temp[index]-'\0'<=32)
{
// do nothing
index++;
}
//如果是单一特殊字符,去除<=,>=,==,!=
else if(Category_code.find(curr)!=Category_code.end()&&!((temp[index]=='<'||temp[index]=='>'||temp[index]=='!'||temp[index]=='=')&&temp[index+1]=='='))
{
cout<<Category_code[curr]<<" "<<temp[index]<<endl;
testout+=Category_code[curr]+" "+temp[index]+" ";
index++;
}
//如果是>=,<=,!=,==这种双字符类型
else if(((temp[index]=='<'||temp[index]=='>'||temp[index]=='!'||temp[index]=='=')&&temp[index+1]=='='))
{
curr+=temp[index+1];
cout<<Category_code[curr]<<" "<<temp[index]<<endl;
testout+=Category_code[curr]+" "+curr+" ";
index++;
index++;
}
//字符串常量
else if (temp[index]=='\"')
{
string str="";
while(temp[++index]!='\"')
{
str+=temp[index];
}
cout<<Category_code["character_string"]<<" "<<str<<endl;
testout+=Category_code["character_string"]+" "+str+" ";
index++;
}
//字符常量
else if (temp[index]=='\'')
{
string str="";
while(temp[++index]!='\'')
{
str+=temp[index];
}
cout<<Category_code["char_constant"]<<" "<<str<<endl;
testout+=Category_code["char_constant"]+" "+str+" ";
index++;
}
else
{
cout<<"this line should not appear"<<endl;
}
}
}
}
}
return testout;
}
bool isnumber(char x)//判断数字
{
return x >= '0' && x <= '9';
}
bool isalpha(char x)//判断字母
{
return (x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z');
}
string to_lower(string str)//大写转小写
{
int i=0;
while( str[i] )
{
tolower(str[i]);
i++;
}
return str;
}
执行步骤如下:
最外面是一个大的while循环,判断文件是否读取完成,里面有一个小的while循环,判断读入的字符是否为空格,之后按照下面步骤进行。
k=0,如果读取的字符是字母(LETTER函数判断),则k++,继续读字符,并把读的字符传给数组TOKEN,直到读到的不是字母;最后读完字符串,还要与关键字做比较(RESERVEDWORD函数)如果字母后面是数字,则把它定义为标识符。
i=0,如果一开始就是数字(NUMBER函数判断),则k++,继续读数字,直到读不到数字。
如果是字符常量或者是字符串,前面字符是’或者”,在while循环里面,判断下面读取的字符是否为’或者”,如果是则跳出循环,在TOKEN数组里面的就是字符常量或者字符串。
最后是运算符判断,比如>=这个运算符,先读取的是>,然后再判断后面是否为=,如果为=,则定义为GEQ;否则为GRE,即=
https://blog.csdn.net/qq_44809707/article/details/111560922
这是一个字符解析的过程
参考我上传的资源,编译原理的实验,指导手册,和实验代码