00001
00002
00003
00004
00005
00006
00007
00008 #include <ctype.h>
00009 #include <string.h>
00010
00011 #define llex_c
00012
00013 #include "lua.h"
00014
00015 #include "ldo.h"
00016 #include "llex.h"
00017 #include "lobject.h"
00018 #include "lparser.h"
00019 #include "lstate.h"
00020 #include "lstring.h"
00021 #include "lzio.h"
00022
00023
00024
00025 #define next(LS) (LS->current = zgetc(LS->z))
00026
00027
00028
00029
00030
00031 static const char *const token2string [] = {
00032 "and", "break", "do", "else", "elseif",
00033 "end", "false", "for", "function", "if",
00034 "in", "local", "nil", "not", "or", "repeat",
00035 "return", "then", "true", "until", "while", "*name",
00036 "..", "...", "==", ">=", "<=", "~=",
00037 "*number", "*string", "<eof>"
00038 };
00039
00040
00041 void luaX_init (lua_State *L) {
00042 int i;
00043 for (i=0; i<NUM_RESERVED; i++) {
00044 TString *ts = luaS_new(L, token2string[i]);
00045 luaS_fix(ts);
00046 lua_assert(strlen(token2string[i])+1 <= TOKEN_LEN);
00047 ts->tsv.reserved = cast(lu_byte, i+1);
00048 }
00049 }
00050
00051
00052 #define MAXSRC 80
00053
00054
00055 void luaX_checklimit (LexState *ls, int val, int limit, const char *msg) {
00056 if (val > limit) {
00057 msg = luaO_pushfstring(ls->L, "too many %s (limit=%d)", msg, limit);
00058 luaX_syntaxerror(ls, msg);
00059 }
00060 }
00061
00062
00063 void luaX_errorline (LexState *ls, const char *s, const char *token, int line) {
00064 lua_State *L = ls->L;
00065 char buff[MAXSRC];
00066 luaO_chunkid(buff, getstr(ls->source), MAXSRC);
00067 luaO_pushfstring(L, "%s:%d: %s near `%s'", buff, line, s, token);
00068 luaD_throw(L, LUA_ERRSYNTAX);
00069 }
00070
00071
00072 static void luaX_error (LexState *ls, const char *s, const char *token)
00073
00074 {
00075 luaX_errorline(ls, s, token, ls->linenumber);
00076 }
00077
00078
00079 void luaX_syntaxerror (LexState *ls, const char *msg) {
00080 const char *lasttoken;
00081 switch (ls->t.token) {
00082 case TK_NAME:
00083 lasttoken = getstr(ls->t.seminfo.ts);
00084 break;
00085 case TK_STRING:
00086 case TK_NUMBER:
00087 lasttoken = luaZ_buffer(ls->buff);
00088 break;
00089 default:
00090 lasttoken = luaX_token2str(ls, ls->t.token);
00091 break;
00092 }
00093 luaX_error(ls, msg, lasttoken);
00094 }
00095
00096
00097 const char *luaX_token2str (LexState *ls, int token) {
00098 if (token < FIRST_RESERVED) {
00099 lua_assert(token == (unsigned char)token);
00100 return luaO_pushfstring(ls->L, "%c", token);
00101 }
00102 else
00103 return token2string[token-FIRST_RESERVED];
00104 }
00105
00106
00107 static void luaX_lexerror (LexState *ls, const char *s, int token)
00108
00109 {
00110 if (token == TK_EOS)
00111 luaX_error(ls, s, luaX_token2str(ls, token));
00112 else
00113 luaX_error(ls, s, luaZ_buffer(ls->buff));
00114 }
00115
00116
00117 static void inclinenumber (LexState *LS)
00118
00119 {
00120 next(LS);
00121 ++LS->linenumber;
00122 luaX_checklimit(LS, LS->linenumber, MAX_INT, "lines in a chunk");
00123 }
00124
00125
00126 void luaX_setinput (lua_State *L, LexState *LS, ZIO *z, TString *source) {
00127 LS->L = L;
00128 LS->lookahead.token = TK_EOS;
00129 LS->z = z;
00130 LS->fs = NULL;
00131 LS->linenumber = 1;
00132 LS->lastline = 1;
00133 LS->source = source;
00134 next(LS);
00135 if (LS->current == '#') {
00136 do {
00137 next(LS);
00138 } while (LS->current != '\n' && LS->current != EOZ);
00139 }
00140 }
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154 #define EXTRABUFF 32
00155
00156
00157 #define MAXNOCHECK 5
00158
00159 #define checkbuffer(LS, len) \
00160 if (((len)+MAXNOCHECK)*sizeof(char) > luaZ_sizebuffer((LS)->buff)) \
00161 luaZ_openspace((LS)->L, (LS)->buff, (len)+EXTRABUFF)
00162
00163 #define save(LS, c, l) \
00164 (luaZ_buffer((LS)->buff)[l++] = cast(char, c))
00165 #define save_and_next(LS, l) (save(LS, LS->current, l), next(LS))
00166
00167
00168 static size_t readname (LexState *LS)
00169
00170 {
00171 size_t l = 0;
00172 checkbuffer(LS, l);
00173 do {
00174 checkbuffer(LS, l);
00175 save_and_next(LS, l);
00176 } while (isalnum(LS->current) || LS->current == '_');
00177 save(LS, '\0', l);
00178 return l-1;
00179 }
00180
00181
00182
00183 static void read_numeral (LexState *LS, int comma, SemInfo *seminfo)
00184
00185 {
00186 size_t l = 0;
00187 checkbuffer(LS, l);
00188 if (comma) save(LS, '.', l);
00189 while (isdigit(LS->current)) {
00190 checkbuffer(LS, l);
00191 save_and_next(LS, l);
00192 }
00193 if (LS->current == '.') {
00194 save_and_next(LS, l);
00195 if (LS->current == '.') {
00196 save_and_next(LS, l);
00197 save(LS, '\0', l);
00198 luaX_lexerror(LS,
00199 "ambiguous syntax (decimal point x string concatenation)",
00200 TK_NUMBER);
00201 }
00202 }
00203 while (isdigit(LS->current)) {
00204 checkbuffer(LS, l);
00205 save_and_next(LS, l);
00206 }
00207 if (LS->current == 'e' || LS->current == 'E') {
00208 save_and_next(LS, l);
00209 if (LS->current == '+' || LS->current == '-')
00210 save_and_next(LS, l);
00211 while (isdigit(LS->current)) {
00212 checkbuffer(LS, l);
00213 save_and_next(LS, l);
00214 }
00215 }
00216 save(LS, '\0', l);
00217 if (!luaO_str2d(luaZ_buffer(LS->buff), &seminfo->r))
00218 luaX_lexerror(LS, "malformed number", TK_NUMBER);
00219 }
00220
00221
00222 static void read_long_string (LexState *LS, SemInfo *seminfo)
00223
00224 {
00225 int cont = 0;
00226 size_t l = 0;
00227 checkbuffer(LS, l);
00228 save(LS, '[', l);
00229 save_and_next(LS, l);
00230 if (LS->current == '\n')
00231 inclinenumber(LS);
00232 for (;;) {
00233 checkbuffer(LS, l);
00234 switch (LS->current) {
00235 case EOZ:
00236 save(LS, '\0', l);
00237 luaX_lexerror(LS, (seminfo) ? "unfinished long string" :
00238 "unfinished long comment", TK_EOS);
00239 break;
00240 case '[':
00241 save_and_next(LS, l);
00242 if (LS->current == '[') {
00243 cont++;
00244 save_and_next(LS, l);
00245 }
00246 continue;
00247 case ']':
00248 save_and_next(LS, l);
00249 if (LS->current == ']') {
00250 if (cont == 0) goto endloop;
00251 cont--;
00252 save_and_next(LS, l);
00253 }
00254 continue;
00255 case '\n':
00256 save(LS, '\n', l);
00257 inclinenumber(LS);
00258 if (!seminfo) l = 0;
00259 continue;
00260 default:
00261 save_and_next(LS, l);
00262 }
00263 } endloop:
00264 save_and_next(LS, l);
00265 save(LS, '\0', l);
00266 if (seminfo)
00267 seminfo->ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff) + 2, l - 5);
00268 }
00269
00270
00271 static void read_string (LexState *LS, int del, SemInfo *seminfo)
00272
00273 {
00274 size_t l = 0;
00275 checkbuffer(LS, l);
00276 save_and_next(LS, l);
00277 while (LS->current != del) {
00278 checkbuffer(LS, l);
00279 switch (LS->current) {
00280 case EOZ:
00281 save(LS, '\0', l);
00282 luaX_lexerror(LS, "unfinished string", TK_EOS);
00283 break;
00284 case '\n':
00285 save(LS, '\0', l);
00286 luaX_lexerror(LS, "unfinished string", TK_STRING);
00287 break;
00288 case '\\':
00289 next(LS);
00290 switch (LS->current) {
00291 case 'a': save(LS, '\a', l); next(LS); break;
00292 case 'b': save(LS, '\b', l); next(LS); break;
00293 case 'f': save(LS, '\f', l); next(LS); break;
00294 case 'n': save(LS, '\n', l); next(LS); break;
00295 case 'r': save(LS, '\r', l); next(LS); break;
00296 case 't': save(LS, '\t', l); next(LS); break;
00297 case 'v': save(LS, '\v', l); next(LS); break;
00298 case '\n': save(LS, '\n', l); inclinenumber(LS); break;
00299 case EOZ: break;
00300 default: {
00301 if (!isdigit(LS->current))
00302 save_and_next(LS, l);
00303 else {
00304 int c = 0;
00305 int i = 0;
00306 do {
00307 c = 10*c + (LS->current-'0');
00308 next(LS);
00309 } while (++i<3 && isdigit(LS->current));
00310 if (c > UCHAR_MAX) {
00311 save(LS, '\0', l);
00312 luaX_lexerror(LS, "escape sequence too large", TK_STRING);
00313 }
00314 save(LS, c, l);
00315 }
00316 }
00317 }
00318 break;
00319 default:
00320 save_and_next(LS, l);
00321 }
00322 }
00323 save_and_next(LS, l);
00324 save(LS, '\0', l);
00325 seminfo->ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff) + 1, l - 3);
00326 }
00327
00328
00329 int luaX_lex (LexState *LS, SemInfo *seminfo) {
00330 for (;;) {
00331 switch (LS->current) {
00332
00333 case '\n': {
00334 inclinenumber(LS);
00335 continue;
00336 }
00337 case '-': {
00338 next(LS);
00339 if (LS->current != '-') return '-';
00340
00341 next(LS);
00342 if (LS->current == '[' && (next(LS), LS->current == '['))
00343 read_long_string(LS, NULL);
00344 else
00345 while (LS->current != '\n' && LS->current != EOZ)
00346 next(LS);
00347 continue;
00348 }
00349 case '[': {
00350 next(LS);
00351 if (LS->current != '[') return '[';
00352 else {
00353 read_long_string(LS, seminfo);
00354 return TK_STRING;
00355 }
00356 }
00357 case '=': {
00358 next(LS);
00359 if (LS->current != '=') return '=';
00360 else { next(LS); return TK_EQ; }
00361 }
00362 case '<': {
00363 next(LS);
00364 if (LS->current != '=') return '<';
00365 else { next(LS); return TK_LE; }
00366 }
00367 case '>': {
00368 next(LS);
00369 if (LS->current != '=') return '>';
00370 else { next(LS); return TK_GE; }
00371 }
00372 case '~': {
00373 next(LS);
00374 if (LS->current != '=') return '~';
00375 else { next(LS); return TK_NE; }
00376 }
00377 case '"':
00378 case '\'': {
00379 read_string(LS, LS->current, seminfo);
00380 return TK_STRING;
00381 }
00382 case '.': {
00383 next(LS);
00384 if (LS->current == '.') {
00385 next(LS);
00386 if (LS->current == '.') {
00387 next(LS);
00388 return TK_DOTS;
00389 }
00390 else return TK_CONCAT;
00391 }
00392 else if (!isdigit(LS->current)) return '.';
00393 else {
00394 read_numeral(LS, 1, seminfo);
00395 return TK_NUMBER;
00396 }
00397 }
00398 case EOZ: {
00399 return TK_EOS;
00400 }
00401 default: {
00402 if (isspace(LS->current)) {
00403 next(LS);
00404 continue;
00405 }
00406 else if (isdigit(LS->current)) {
00407 read_numeral(LS, 0, seminfo);
00408 return TK_NUMBER;
00409 }
00410 else if (isalpha(LS->current) || LS->current == '_') {
00411
00412 size_t l = readname(LS);
00413 TString *ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff), l);
00414 if (ts->tsv.reserved > 0)
00415 return ts->tsv.reserved - 1 + FIRST_RESERVED;
00416 seminfo->ts = ts;
00417 return TK_NAME;
00418 }
00419 else {
00420 int c = LS->current;
00421 if (iscntrl(c))
00422 luaX_error(LS, "invalid control char",
00423 luaO_pushfstring(LS->L, "char(%d)", c));
00424 next(LS);
00425 return c;
00426 }
00427 }
00428 }
00429 }
00430 }
00431
00432 #undef next