在网上,看到还是有部分程序爱好者希望能编出自己的编译器.当然,这的确是件难事,许多人都说要去看什么编译原理和精通汇编语言,结果让这些爱好者都望而却步.但是,当我们亲手去做做后,发现要做一个简单的程序解释器(就像Java和Basic)那样,还是挺容易的.你根本不用去看那些东西,只要你懂C语言,在看了本文后,就可以完成那样的解释器.
在网上,有许多大型C语言,Perl语言的编译器源代码.但当你下载后看看,竟发现点都看不懂.其实那些东西还是不看为妙.看了本文后,我相信你宁愿自己动手编,也不愿意去领会那些庞大的源代码了.
少说费话了,我们开始讲解.
这一篇Basic解释器的代码.十分经典.而且十分简单化.
get_token()是词汇提取,譬如 PRINT A+B 通过调用一次get_token(),就在 字符串token里装上PRINT 再调用一次get_token(),token里就装上A 再调用一次get_token(),token里就装上+ 再调用一次get_token(),token里就装上B 很简单吧! putback()是将prog指针向回移动一格.
其中包含了词发分析和十分关键的代数式求值get_exp(int *result) 关于它的代数式求值get_exp(int *result),用到递归函数 void get_exp(),level2(),level3(),level4(),level5(); void level6(),primitive(),arith(),unary(); ,确实难看懂,不过你尽管拿来用就是了.
话不多说,你看源代码就是了.最后,我将给你看看C++中完整的源代码
/* recursive descent parser for integer expression which may include variables */
#include <stdio.h> #include <setjmp.h> #include <math.h> #include <ctype.h> #include <stdlib.h>
#define DELIMITER 1 #define VARIABLE 2 #define NUMBER 3 #define COMMAND 4 #define STRING 5 #define QUOTE 6
#define EOL 9 #define FINISHED 10
extern char *prog; /* holds expression to be analyzed */
extern jmp_buf e_buf; /* hold enviroment */ extern int variables[26]; /* variables */ extern struct commands { char command[20]; char tok; } table[];
extern char token[80]; /* holds string representation of token */ extern char token_type; /* contains type of token */ extern char tok; /* holds the internal representation of token */
void get_exp(),level2(),level3(),level4(),level5(); void level6(),primitive(),arith(),unary(); void serror(),putback();
/* entry point into parser */ void get_exp(int *result) { get_token(); if (!*token) { serror(2); return; } level2(result); putback(); /*return last token read to input stream */ }
/* add or subtract two terms */ void level2(int *result) { register char op; int hold; level3(result); while ((op = *token) =='+' || op == '-') { get_token(); level3(&hold); arith(op,result,&hold); } }
/* multiply or divide two factors */ void level3(int *result) { register char op; int hold;
level4(result); while ((op = *token) == '*' || op == '/' || op == '%') { get_token(); level3(&hold); arith(op,result,&hold); } }
/* process integer exponent */ void level4(int *result) { register char op; int hold;
level5(result); if (*token == '^') { get_token(); level4(&hold); arith(op,result,&hold); } }
/* is a unary + or - */ void level5(int *result) { register char op; op = 0; if ((token_type==DELIMITER) && *token == '+' || *token == '-' ) { op = *token; get_token(); } level6(result); if (op) unary(op,result); }
/* process parenthesized expression */ void level6(int *result) { if ((*token == '(') && (token_type == DELIMITER)) { get_token(); level2(result); if (*token!=')') serror(1); get_token(); } else primitive(result); }
/* find value of number or variable */ void primitive(int *result) { switch (token_type) { case VARIABLE: *result = find_var(token); get_token(); return; case NUMBER: *result = atoi(token); get_token(); return; default: serror(0); } }
/* perform the specified arithmetic */ void arith(char o,int *r,int *h) { register int t,ex; switch (o) { case '-': *r = *r-*h; break; case '+': *r = *r+*h; break; case '*': *r = *r**h; break; case '/': *r = (*r)/(*h); break; case '%': *r = (*r)%(*h); break; case '^': ex = *r; if (*h==0) { *r = 1; break; } for (t=*h-1;t>0;--t) *r=(*r)*ex; break; } }
/* reverse the sign */ void unary(char o,int *r) { if (o=='-') *r = -(*r); }
/* find the value of a variable */ int find_var(char *s) { if (!isalpha(*s)) { serror(4); /* not a variable */ return 0; } return variables[toupper(*token)-'A']; }
/* display an error message */ void serror(int error) { char *e[] = { "syntax error", "unbalanced parentheses", "no expression present", "equal sign expected", "not a variable", "label table full", "duplicate label", "undefined label", "THEN expected", "TO expected", "too many nested FOR loops", "NEXT without FOR", "too many nested GOSUB", "RETURN without GOSUB" };
printf ("%s\n",e[error]); longjmp(e_buf,1); /* return to save point */ }
/* get a token */ get_token() { register char *temp;
token_type = 0;tok = 0; temp = token; if (*prog == '\0') { /* end of file */ *token = 0; tok = FINISHED; return (token_type = DELIMITER); } while (iswhite(*prog)) ++prog; /* skip over white space */
if (*prog == '\r') { /* CR LF */ ++prog;++prog; tok = EOL;*token = '\r'; token[1] = '\n';token[2] = 0; return (token_type = DELIMITER); } if (strchr("+-*^/%=;(),><",*prog)) { /* delimiter */ *temp = *prog; prog++; /* advance to next position */ temp++; *temp=0; return (token_type = DELIMITER); } if (*prog == '"') { /* quote string */ prog++; while (*prog!='"'&&*prog!='\r') *temp++=*prog++; if (*prog=='\r') serror(1); prog++;*temp=0; return (token_type = QUOTE); } if (isdigit(*prog)) { /* number */ while (!isdelim(*prog)) *temp++=*prog++; *temp = '\0'; return (token_type = NUMBER); }
if (isalpha(*prog)) { /* var or command */ while (!isdelim(*prog)) *temp++=*prog++; token_type = STRING; }
*temp = '\0';
/* see if a string is a command or a variable */ if (token_type == STRING) { tok = look_up(token); /* convert to internal rep */ if (!tok) token_type = VARIABLE; else token_type = COMMAND; /* is a command */ } return token_type; }
/* return a token to input stream */ void putback() { char *t; t = token; for (;*t;t++) prog--; }
look_up(char *s) { register int i,j; char *p;
/* convert to lowercase */ p = s; while (*p) { *p = tolower(*p); p++; }
/* see if token is in table */ for (i=0;*table[i].command;i++) if (!strcmp(table[i].command,s)) return table[i].tok; return 0; /* unknown command */ }
/* return true if c is a delimiter */ isdelim(char c) { if (strchr(";,+-<>/*%^=() ",c)||c==9||c=='\r'||c==0) return 1; return 0; }
iswhite (char c) { if (c==' '||c=='\t') return 1; else return 0; }

|