编译原理实验有点难,但是加上ai的辅助就会很简单,下面梳理一下代码流程。
全代码在github仓库中,链接:NeiFeiTiii/CompilerOriginTest at Version2.0,感谢star一下
一、项目结构
关键内容就是里面的那几个.c和.h文件。还有那三个txt文件,用于输入输出的存储。
二、代码流程
标准就是先进入词法、然后进行语法和语义的分析,最后生成四元式完成中间代码生成。下面是main.c文件:
#include "PraseWithRecursive.h"
void clearFile(const char *filename) {
FILE *file = fopen(filename, "w");
if (file != NULL) {
fclose(file);
}
}
int main() {
clearFile("Lex.txt");
PraseWithRecursive();
return 0;
}
这里就是清空一下要输出到的文件内容,然后进到递归下降分析的语法语义的执行中,但是这里不要在意,语法语义会调用词法的函数,来进行一个取得下一个二元式。
void scanner() {
token = getNextToken();
word.Class = token.type;
switch (word.Class) {
case ID:
case INT:
case REAL:
strcpy(word.Value.Val1, token.value);
break;
case PLUS:
case MINUS:
case MUL:
case DIV:
case LP:
case RP:
case EQ:
case IS:
case LT:
case GT:
strcpy(word.Value.Val4, token.value);
break;
case END:
printf("End of tokens");
return;
default:
ErrorPrint("Unknown token type");
exit(1);
}
}
本项目的关键在于getNextToken()这个函数,通过它将前后实验联系在一起。
最后结果将会保存在Lex和Output这两个文件里面。
三、代码细节
1.词法
词法的代码细节是最关键的,它关乎到后面两个实验的实验结果,它的输出牵一发而动全身,主要就是完成二三实验调用的获取下一个token的接口。
下面是长代码,需要滑动屏幕:
Token getNextToken() {
static FILE *fp = NULL;
static int initialized = 0;
static long file_offset = 0;
if (!initialized) {
fp = fopen("input.txt", "r");
if (fp == NULL) {
printf("Error: Cannot open source file\n");
exit(1);
}
initialized = 1;
}
fseek(fp, file_offset, SEEK_SET);
char ch;
int i, c;
TOKEN = (char *)malloc(MAX_TOKEN_LENGTH * sizeof(char));
if (TOKEN == NULL) {
fprintf(stderr, "Error: Memory allocation failed\n");
exit(1);
}
ch = fgetc(fp);
while (1) {
if (ch == EOF) {
Token token = {END, " ", -1, -1};
out(token);
free(TOKEN);
file_offset = ftell(fp);
return token;
}
fseek(fp, -1, SEEK_CUR);
ch = fgetc(fp);
if (ch == '#') {
while (ch != '\n') {
ch = fgetc(fp);
column_number++;
}
continue;
}
if (isalpha(ch)) { // ID
TOKEN[0] = ch;
column_number++;
i = 1;
ch = fgetc(fp);
if (ch == EOF) {
fseek(fp, 0, SEEK_END);
file_offset = ftell(fp);
}
else{
while (isalnum(ch)) {
TOKEN[i] = ch;
i++;
ch = fgetc(fp);
column_number++;
if (ch == EOF) {
fseek(fp, 0, SEEK_END);
file_offset = ftell(fp);
}
}
if (ch != EOF)
fseek(fp, -1, SEEK_CUR);
}
TOKEN[i] = '\0';
file_offset = ftell(fp);
c = lookup(TOKEN);
if (c == ID) {
Token token = {ID, "", line_number, column_number};
strcpy(token.value, TOKEN);
out(token);
free(TOKEN);
return token;
}
}
if (isdigit(ch)) {
TOKEN[0] = ch;
ch = fgetc(fp);
column_number++;
i = 1;
int is_real = 0;
int is_octal = (TOKEN[0] == '0');
int is_hex = 0;
if (ch == EOF) {
fseek(fp, 0, SEEK_END);
file_offset = ftell(fp);
} else {
if (is_octal && (ch == 'x' || ch == 'X')) { // 16进制
is_octal = 0;
is_hex = 1;
TOKEN[i] = ch;
i++;
column_number++;
ch = fgetc(fp);
if (ch == EOF) {
fseek(fp, 0, SEEK_END);
file_offset = ftell(fp);
} else {
while ((ch <= 70 && ch >= 65) || (ch <= 102 && ch >= 97) || isdigit(ch)) {
TOKEN[i] = ch;
i++;
ch = fgetc(fp);
column_number++;
}
if (ch != EOF)
fseek(fp, -1, SEEK_CUR);
}
}
if (is_octal && ch != 'x' && ch != 'X') { // 8进制
is_octal = 0;
while (isdigit(ch)) {
TOKEN[i] = ch;
i++;
ch = fgetc(fp);
column_number++;
is_octal = 1;
}
if (ch == EOF) {
fseek(fp, 0, SEEK_END);
file_offset = ftell(fp);
} else
fseek(fp, -1, SEEK_CUR);
}
while (isdigit(ch)) { // 10进制
TOKEN[i] = ch;
i++;
ch = fgetc(fp);
column_number++;
}
if (ch == EOF) {
fseek(fp, 0, SEEK_END);
file_offset = ftell(fp);
} else {
if (ch == '.') {
TOKEN[i] = ch;
is_real = 1;
i++;
ch = fgetc(fp);
while (isdigit(ch)) {
TOKEN[i] = ch;
i++;
ch = fgetc(fp);
column_number++;
}
if (ch == EOF) {
fseek(fp, 0, SEEK_END);
file_offset = ftell(fp);
} else
fseek(fp, -1, SEEK_CUR);
}
if (ch == 'e' || ch == 'E') {
is_real = 1;
TOKEN[i] = ch;
i++;
ch = fgetc(fp);
column_number++;
if (ch == EOF) {
fseek(fp, 0, SEEK_END);
file_offset = ftell(fp);
ErrorPrint("Real number not complete,In the end of file");
} else {
if (ch == '+' || ch == '-') {
TOKEN[i] = ch;
i++;
ch = fgetc(fp);
column_number++;
if (ch == EOF) {
fseek(fp, 0, SEEK_END);
file_offset = ftell(fp);
ErrorPrint("Real number not complete,In the end of file");
while (isdigit(ch)) {
TOKEN[i] = ch;
i++;
ch = fgetc(fp);
column_number++;
}
if (ch == EOF) {
fseek(fp, 0, SEEK_END);
file_offset = ftell(fp);
} else
fseek(fp, -1, SEEK_CUR);
}
while (isdigit(ch)) {
TOKEN[i] = ch;
i++;
ch = fgetc(fp);
column_number++;
}
if (ch == EOF) {
fseek(fp, 0, SEEK_END);
file_offset = ftell(fp);
} else
fseek(fp, -1, SEEK_CUR);
}
}
}
if (ch != EOF)
fseek(fp, -1, SEEK_CUR);
}
}
TOKEN[i] = '\0';
if (is_real) {
Token token = {REAL, "", line_number, column_number};
strcpy(token.value, TOKEN);
out(token);
free(TOKEN);
file_offset = ftell(fp);
return token;
} else if (is_hex) {
Token token = {HEX, "", line_number, column_number};
strcpy(token.value, TOKEN);
out(token);
free(TOKEN);
file_offset = ftell(fp);
return token;
} else if (is_octal) {
Token token = {OCTAL, "", line_number, column_number};
strcpy(token.value, TOKEN);
out(token);
free(TOKEN);
file_offset = ftell(fp);
return token;
} else {
Token token = {INT, "", line_number, column_number};
strcpy(token.value, TOKEN);
out(token);
free(TOKEN);
file_offset = ftell(fp);
return token;
}
}
if (ch == '"') {
ch = fgetc(fp);
column_number++;
i = 0;
while (ch != '"') {
TOKEN[i] = ch;
i++;
ch = fgetc(fp);
column_number++;
if (ch == EOF) {
Token token = {END, " ", -1, -1};
report_error("String not closed");
out(token);
free(TOKEN);
return token;
}
}
TOKEN[i] = '\0';
Token token = {STRING, "", line_number, column_number};
strcpy(token.value, TOKEN);
out(token);
free(TOKEN);
file_offset = ftell(fp);
return token;
}
if (ch == '\'') { // 字符
ch = fgetc(fp);
column_number++;
i = 0;
while (ch != '\'') {
TOKEN[i] = ch;
i++;
ch = fgetc(fp);
column_number++;
}
TOKEN[i] = '\0';
Token token = {CHAR, "", line_number, column_number};
strcpy(token.value, TOKEN);
out(token);
free(TOKEN);
file_offset = ftell(fp);
return token;
}
if (ch == '(' || ch == ')' || ch == '{' || ch == '}' || ch == '[' || ch == ']' || ch == ';' || ch == ',') {
Token token;
if (ch == '(') token = (Token){LP, "(", line_number, column_number};
else if (ch == ')') token = (Token){RP, ")", line_number, column_number};
else if (ch == '{') token = (Token){BRACKET, "{", line_number, column_number};
else if (ch == '}') token = (Token){BRACKET, "}", line_number, column_number};
else if (ch == '[') token = (Token){BRACKET, "[", line_number, column_number};
else if (ch == ']') token = (Token){BRACKET, "]", line_number, column_number};
else if (ch == ';') token = (Token){DOT, ";", line_number, column_number};
else token = (Token){DOT, ",", line_number, column_number};
out(token);
column_number++;
free(TOKEN);
file_offset = ftell(fp);
return token;
}
if (ch == '=' || ch == '!' || ch == '<' || ch == '>' || ch == ':') {
char next_ch = fgetc(fp);
column_number++;
Token token;
if (ch == '=' && next_ch == '=') token = (Token){EQ, "", line_number, column_number};
else if (ch == '!' && next_ch == '=') token = (Token){NE, "", line_number, column_number};
else if (ch == '<' && next_ch == '=') token = (Token){LE, "", line_number, column_number};
else if (ch == '>' && next_ch == '=') token = (Token){GE, "", line_number, column_number};
else if (ch == ':' && next_ch == '=') token = (Token){IS, "", line_number, column_number};
else {
fseek(fp, -1, SEEK_CUR);
column_number--;
if (ch == '=') token = (Token){IS, " ", line_number, column_number};
else if (ch == '!') token = (Token){NOT, " ", line_number, column_number};
else if (ch == '<') token = (Token){LT, " ", line_number, column_number};
else if (ch == '>') token = (Token){GT, " ", line_number, column_number};
else {
report_error("Unknown operator");
out(token);
free(TOKEN);
exit(1);
}
}
out(token);
free(TOKEN);
file_offset = ftell(fp);
return token;
}
if (ch == '+' || ch == '-' || ch == '*' || ch == '/') {
Token token;
if (ch == '+') token = (Token){PLUS, " ", line_number, column_number};
else if (ch == '-') token = (Token){MINUS, " ", line_number, column_number};
else if (ch == '*') token = (Token){MUL, " ", line_number, column_number};
else token = (Token){DIV, " ", line_number, column_number};
out(token);
column_number++;
free(TOKEN);
file_offset = ftell(fp);
return token;
}
if (isspace(ch)) {
if (ch == '\n') {
line_number++;
column_number = 0;
} else {
column_number++;
}
ch = fgetc(fp);
continue;
}
else {
report_error("Unknown character");
Token token = {UNKNOWN, " ", line_number, column_number};
out(token);
free(TOKEN);
exit(1);
}
}
}
这里的逻辑就是获取文件中的一个字母,看看是什么类型(即if), 如果符合这个类型,就继续检查下一个字母(即while),直到这个字母不符合条件,记录检测文件的断点,返回这个token。思路清晰,代码逻辑也就不攻自破。
2.语法、语义
这里几乎是糅合在一块了,使用递归下降的方法,之后打印结果。
char *E(void) {
char opp[3], *E1_place, *E2_place, *Temp_place;
E1_place = T();
while (word.Class == PLUS || word.Class == MINUS || word.Class == EQ) {
if (word.Class == PLUS) {
strcpy(opp, "+");
} else if (word.Class == MINUS) {
strcpy(opp, "-");
} else if (word.Class == EQ) {
strcpy(opp, "==");
}
scanner();
E2_place = T();
Temp_place = NewTemp();
GEN(opp, E1_place, E2_place, Temp_place);
E1_place = Temp_place;
}
return E1_place;
}
char *T(void) {
char opp[3], *T1_place, *T2_place, *Temp_place;
T1_place = F();
while (word.Class == MUL || word.Class == DIV || word.Class == LT || word.Class == GT) {
if (word.Class == MUL) {
strcpy(opp, "*");
} else if (word.Class == DIV) {
strcpy(opp, "/");
} else if (word.Class == LT) {
strcpy(opp, "<");
} else if (word.Class == GT) {
strcpy(opp, ">");
}
scanner();
T2_place = F();
Temp_place = NewTemp();
GEN(opp, T1_place, T2_place, Temp_place);
T1_place = Temp_place;
}
return T1_place;
}
char *F(void) {
char *place;
if (word.Class == ID || word.Class == INT || word.Class == REAL) { // 标识符
place = strdup(token.value); // 自动分配内存,然后复制字符串
scanner();
return place;
} else if (word.Class == LP) { // 左括号
scanner();
place = E();
if (word.Class == RP) { // 右括号
scanner();
return place;
} else {
ErrorPrint("dont have')'");
}
} else {
ErrorPrint("Not A Valid Prase");
}
return NULL; // 无效返回,其实根本不会到这里,完全是做个警告处理
}
char *A(void) { // 赋值语句的处理
char *E_place, *A_place;
E_place = E();
if (word.Class == IS) {
scanner();
A_place = E();
GEN("=", E_place, "", A_place);
return A_place;
}
return E_place;
}