头文件head里是链表相关代码。构想的是使用文件流读txt文件里的简单c++文件,然后实现简单词法分析。在AutoForLexical()函数里设计如果ifflag的值为true,则说明状态转换时使用了当前fgetc()获取的一个字符,在循环结束时读取下一个字符。否则不在读取新的字符知道ifflag为true。但是我这里AutoForLexical()函数中的while循环只会识别出void这个词就停止了,也就是遇到第一个空格后ifflag变为false后就不会被重置。
#define _CRT_SECURE_NO_WARNINGS;
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include <string>
#include"head.h"
using namespace std;
char KEYWORD[] = "保留字";
char ID[] = "标识符";
char NUM[] = "常数";
char SEPARATOR[] = "分隔符号";
char ARITHMETIC[] = "算术运算符";
char RELATIONAL[] = "关系运算符";
char SPECIAL[] = "特殊符号";
char keyWord[200][200] = {"abstract","auto","break","case","char","class","const",
"continue","default","delete","double","enum","extern","f","F","far","final",
"float","for","friend","goto","if","inline","int","interrupt","l","L","long",
"new","operator","private","protected","public","register","return","short",
"signed","sizeof","static","struct","switch","typedef","u","U","union","unsigned",
"virtual","void","while","main","else","wchar_t"};
char entry[200] = "";
bool ifflag = true;
FILE* cstream, * outstream;
int sEnd[] = {2,3,4,6,7,8,11,12,14,15,17,18,20,21,23,24,26,27,29,30,32,33,35,36,37,45,47,48};
int currentNum = 0;
int numOfKeyWord = 0;
int numOfID = 0;
int numOfNum = 0;
int numOfSeparator = 0;
int numOfArithmetic = 0;
int numOfRelational = 0;
int numOfSpecial = 0;
ChainList keyWordLt;
ChainList keyWordList;
ChainList idList;
ChainList numList;
ChainList separatprList;
ChainList arithmeticList;
ChainList relationalList;
ChainList specialList;
int move(int s, char ch);
int getLength(char entry[]);
void AutoForLexical(FILE* cstream, FILE* outstream);
int findEnd(int s);
int main()
{
if ((cstream = fopen("d:\\sample.txt", "r")) == NULL)
{
printf("Failed to open sample.txt!");
exit(0);
}
if ((outstream = fopen("d:\\text1.txt", "w")) == NULL)
{
printf("Failed to open text1.txt");
exit(0);
}
while (strcmp("", keyWord[currentNum]) != 0)
{
keyWordLt.Insert(keyWord[currentNum], currentNum, KEYWORD);
currentNum++;
}
AutoForLexical(cstream, outstream);
ChainListNode* temp;
fprintf(outstream,"%s \n","标识符表");
temp = idList.firstNode->nextNode;
for (int i = 0; i <= idList.getListLength() - 1; i++)
{
//fprintf(outstream, "%d %s %s\n", temp->data.num,temp->data.name,temp->data.type);
printf("%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("\n");
}
fprintf(outstream, "%s \n", "保留字");
temp = keyWordLt.firstNode->nextNode;
for (int i = 0; i <= keyWordLt.getListLength() - 1; i++)
{
//fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("\n");
}
fprintf(outstream, "%s \n", "常数表");
temp = numList.firstNode->nextNode;
for (int i = 0; i <= numList.getListLength() - 1; i++)
{
//fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("\n");
}
fprintf(outstream, "%s \n", "分隔符号表");
temp = separatprList.firstNode->nextNode;
for (int i = 0; i <= separatprList.getListLength() - 1; i++)
{
//fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("\n");
}
fprintf(outstream, "%s \n", "算术运算符表");
temp = arithmeticList.firstNode->nextNode;
for (int i = 0; i <= arithmeticList.getListLength() - 1; i++)
{
//fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("\n");
}
fprintf(outstream, "%s \n", "关系运算符表");
temp = relationalList.firstNode->nextNode;
for (int i = 0; i <= relationalList.getListLength() - 1; i++)
{
//fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("\n");
}
fprintf(outstream, "%s \n", "特殊符号表");
temp = specialList.firstNode->nextNode;
for (int i = 0; i <= specialList.getListLength() - 1; i++)
{
//fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("\n");
}
fclose(cstream);
fclose(outstream);
system("pause");
}
void AutoForLexical(FILE* cstream, FILE* outstream)
{
char ch;
int s0 = 0;
int s = s0;
memset(entry, '\0', sizeof(entry));
ch = fgetc(cstream);
while (ch!=EOF)
{
ifflag = true;
s = move(s, ch);
if (findEnd(s) != -1)
{
if (s >= 2 && s <= 18)//算术运算符
{
ChainListNode* temp = arithmeticList.Search(entry);
if (temp==nullptr)
{
arithmeticList.Insert(entry, numOfArithmetic, ARITHMETIC);
numOfArithmetic++;
}
else
{
fprintf(outstream,"%d %s %s\n",temp->data.num,temp->data.name,temp->data.type);
}
}
else if (s==48)//特殊字符
{
ChainListNode* temp = specialList.Search(entry);
if (temp == nullptr)
{
specialList.Insert(entry, numOfSpecial, SPECIAL);
numOfSpecial++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
else if (s==37)//分隔符号
{
if ((int)ch == 126 || (int)ch == 94)
{
ChainListNode* temp = arithmeticList.Search(entry);
if (temp == nullptr)
{
arithmeticList.Insert(entry, numOfArithmetic, ARITHMETIC);
numOfArithmetic++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
else if ((int)ch == 34 || (int)ch == 39)
{
ChainListNode* temp = specialList.Search(entry);
if (temp == nullptr)
{
specialList.Insert(entry, numOfSpecial, SPECIAL);
numOfSpecial++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
else
{
ChainListNode* temp = separatprList.Search(entry);
if (temp == nullptr)
{
separatprList.Insert(entry, numOfSeparator, SEPARATOR);
numOfSeparator++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
}
else if (s == 30)
{
ChainListNode* temp = separatprList.Search(entry);
if (temp == nullptr)
{
separatprList.Insert(entry, numOfSeparator, SEPARATOR);
numOfSeparator++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
else if (s>=20 && s<=36 && s!=30)//关系运算符
{
ChainListNode* temp = relationalList.Search(entry);
if (temp == nullptr)
{
relationalList.Insert(entry, numOfRelational, RELATIONAL);
numOfRelational++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
else if (s==47)//标识符和保留字
{
if (keyWordLt.Search(entry))
{
ChainListNode* temp = keyWordList.Search(entry);
if (temp == nullptr)
{
keyWordList.Insert(entry, numOfKeyWord, KEYWORD);
numOfKeyWord++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
else
{
ChainListNode* temp = idList.Search(entry);
if (temp == nullptr)
{
idList.Insert(entry, numOfID, ID);
numOfID++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
}
else if (s==45)//常数
{
ChainListNode* temp = numList.Search(entry);
if (temp == nullptr)
{
numList.Insert(entry, numOfNum, NUM);
numOfNum++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
memset(entry,'\0',sizeof(entry));
s = s0;
}
if (ifflag)
{
ch = fgetc(cstream);
}
else {
continue;
}
}
}
int move(int s, char ch)
{
if ((int)ch == 43 && s == 0)
{
entry[getLength(entry)] = ch;
return 1;
}
else if((int)ch == 61 && s == 1)
{
entry[getLength(entry)] = ch;
return 2;
}
else if ((int)ch == 43 && s == 1)
{
entry[getLength(entry)] = ch;
return 3;
}
else if (s == 1 && !((int)ch==61 || (int)ch==43))
{
ifflag = false;
return 4;
}
else if ((int)ch == 38 && s == 0)
{
entry[getLength(entry)] = ch;
return 5;
}
else if ((int)ch == 61 && s == 5)
{
entry[getLength(entry)] = ch;
return 6;
}
else if ((int)ch == 62 && s == 5)
{
entry[getLength(entry)] = ch;
return 7;
}
else if ((int)ch == 38 && s == 5)
{
entry[getLength(entry)] = ch;
return 8;
}
else if (s == 5 && !((int)ch == 61 || (int)ch == 62 || (int)ch==38))
{
ifflag = false;
return 9;
}
else if ((int)ch == 42 && s == 0)
{
entry[getLength(entry)] = ch;
return 10;
}
else if ((int)ch == 61 && s == 10)
{
entry[getLength(entry)] = ch;
return 11;
}
else if (s == 10 && (int)ch != 61)
{
ifflag = false;
return 12;
}
else if ((int)ch == 40 && s == 0)
{
entry[getLength(entry)] = ch;
return 13;
}
else if ((int)ch == 61 && s == 13)
{
entry[getLength(entry)] = ch;
return 14;
}
else if (s == 13 && (int)ch != 61)
{
ifflag = false;
return 15;
}
else if ((int)ch == 37 && s == 0)
{
entry[getLength(entry)] = ch;
return 16;
}
else if ((int)ch == 61 && s == 16)
{
entry[getLength(entry)] = ch;
return 17;
}
else if (s == 16 && (int)ch != 61)
{
ifflag = false;
return 18;
}
else if ((int)ch == 33 && s == 0)
{
entry[getLength(entry)] = ch;
return 19;
}
else if ((int)ch == 61 && s == 19)
{
entry[getLength(entry)] = ch;
return 20;
}
else if (s == 19 && (int)ch != 61)
{
ifflag = false;
return 21;
}
else if ((int)ch == 38 && s == 0)
{
entry[getLength(entry)] = ch;
return 22;
}
else if ((int)ch == 38 && s == 22)
{
entry[getLength(entry)] = ch;
return 23;
}
else if (s == 22 && (int)ch != 38)
{
ifflag = false;
return 24;
}
else if ((int)ch == 124 && s == 0)
{
entry[getLength(entry)] = ch;
return 25;
}
else if ((int)ch == 124 && s == 25)
{
entry[getLength(entry)] = ch;
return 26;
}
else if (s == 25 && (int)ch != 124)
{
ifflag = false;
return 27;
}
else if ((int)ch == 61 && s == 0)
{
entry[getLength(entry)] = ch;
return 28;
}
else if ((int)ch == 61 && s == 28)
{
entry[getLength(entry)] = ch;
return 29;
}
else if (s == 28 && (int)ch != 61)
{
ifflag = false;
return 30;
}
else if ((int)ch == 62 && s == 0)
{
entry[getLength(entry)] = ch;
return 31;
}
else if ((int)ch == 61 && s == 31)
{
entry[getLength(entry)] = ch;
return 32;
}
else if (s == 31 && (int)ch != 61)
{
ifflag = false;
return 33;
}
else if ((int)ch == 60 && s == 0)
{
entry[getLength(entry)] = ch;
return 34;
}
else if ((int)ch == 61 && s == 34)
{
entry[getLength(entry)] = ch;
return 35;
}
else if (s == 34 && (int)ch != 61)
{
ifflag = false;
return 36;
}
else if ((int)ch==92 || (int)ch == 34 || (int)ch == 39 || (int)ch == 40 || (int)ch == 41 || (int)ch == 44 || (int)ch == 39 || (int)ch == 58 || (int)ch == 91 || (int)ch == 93 || (int)ch == 94 || (int)ch == 123 || (int)ch == 125 || (int)ch == 126)
{
entry[getLength(entry)] = ch;
return 37;
}
else if ((int)ch == 92 && s == 0)
{
entry[getLength(entry)] = ch;
return 38;
}
else if ((int)ch == 41 && s == 38)
{
entry[getLength(entry)] = ch;
return 39;
}
else if ((int)ch == 97 && s == 38)
{
entry[getLength(entry)] = ch;
return 40;
}
else if ((int)ch == 98 && s == 38)
{
entry[getLength(entry)] = ch;
return 41;
}
else if ((int)ch == 110 && s == 38)
{
entry[getLength(entry)] = ch;
return 42;
}
else if ((int)ch == 114 && s == 38)
{
entry[getLength(entry)] = ch;
return 43;
}
else if ((int)ch == 116 && s == 38)
{
entry[getLength(entry)] = ch;
return 44;
}
else if (s == 38)
{
ifflag = false;
return 38;
}
else if (((int)ch == 43 || (int)ch == 38) && s == 0)
{
entry[getLength(entry)] = ch;
return 38;
}
else if (!((int)ch >= 41 && (int)ch <= 57) && s == 38)
{
entry[getLength(entry) - 1] = '\0';
ifflag = false;
return 0;
}
else if (((int)ch >= 41 && (int)ch <= 57) && s == 38)
{
entry[getLength(entry)] = ch;
return 39;
}
else if (((int)ch >= 41 && (int)ch <= 57) && s == 39)
{
entry[getLength(entry)] = ch;
return 39;
}
else if ((int)ch == 39 && s == 39)
{
entry[getLength(entry)] = ch;
return 40;
}
else if ((int)ch == 101 && s == 39)
{
entry[getLength(entry)] = ch;
return 42;
}
else if (s == 39)
{
entry[getLength(entry)] = ch;
return 45;
}
else if (((int)ch >= 41 && (int)ch <= 57) && s == 40)
{
entry[getLength(entry)] = ch;
return 41;
}
else if (((int)ch >= 41 && (int)ch <= 57) && s == 41)
{
entry[getLength(entry)] = ch;
return 41;
}
else if ((int)ch == 101 && s == 41)
{
entry[getLength(entry)] = ch;
return 42;
}
else if (s == 41)
{
ifflag = false;
return 45;
}
else if (((int)ch == 43 || (int)ch == 38) && s == 42)
{
entry[getLength(entry)] = ch;
return 43;
}
else if (((int)ch >= 41 && (int)ch <= 57) && s == 42)
{
entry[getLength(entry)] = ch;
return 44;
}
else if (((int)ch >= 41 && (int)ch <= 57) && s == 43)
{
entry[getLength(entry)] = ch;
return 44;
}
else if (((int)ch >= 41 && (int)ch <= 57) && s == 44)
{
entry[getLength(entry)] = ch;
return 44;
}
else if (s == 44)
{
ifflag = false;
return 45;
}
else if (((int)ch == 95 || ((int)ch >= 65 && (int)ch <= 90) || ((int)ch >= 97 && (int)ch <= 122)) && s == 0)
{
entry[getLength(entry)] = ch;
return 46;
}
else if (s == 0)
{
ifflag = false;
return 0;
}
else if (((int)ch == 95 || ((int)ch >= 65 && (int)ch <= 90) || ((int)ch >= 97 && (int)ch <= 122) || ((int)ch >= 41 && (int)ch <= 57)) && s == 46)
{
entry[getLength(entry)] = ch;
return 46;
}
else if (s == 46)
{
ifflag = false;
return 47;
}
else if (s == 0 && (int)ch == 0 || (int)ch == 7 || (int)ch == 8 || (int)ch == 9 || (int)ch == 10 || (int)ch == 11 || (int)ch == 12 || (int)ch == 13 || (int)ch == 34 || (int)ch == 39 || (int)ch == 63 || (int)ch == 92)
{
entry[getLength(entry)] = ch;
return 48;
}
}
int findEnd(int s)
{
int low = 0;
int high = 27;
while (low <= high)
{
int mid = (low + high) / 2;
if (s == sEnd[mid])
return sEnd[mid];
else
if (s > sEnd[mid])
low = mid + 1;
else
high = mid - 1;
}
return -1;
}
int getLength(char ch[])
{
int length = 0;
while (ch[length] != '\0')
length++;
return length;
}