1*8e3e3a7aSWarner Losh /* 2*8e3e3a7aSWarner Losh ** $Id: llex.c,v 2.96 2016/05/02 14:02:12 roberto Exp $ 3*8e3e3a7aSWarner Losh ** Lexical Analyzer 4*8e3e3a7aSWarner Losh ** See Copyright Notice in lua.h 5*8e3e3a7aSWarner Losh */ 6*8e3e3a7aSWarner Losh 7*8e3e3a7aSWarner Losh #define llex_c 8*8e3e3a7aSWarner Losh #define LUA_CORE 9*8e3e3a7aSWarner Losh 10*8e3e3a7aSWarner Losh #include "lprefix.h" 11*8e3e3a7aSWarner Losh 12*8e3e3a7aSWarner Losh 13*8e3e3a7aSWarner Losh #include <locale.h> 14*8e3e3a7aSWarner Losh #include <string.h> 15*8e3e3a7aSWarner Losh 16*8e3e3a7aSWarner Losh #include "lua.h" 17*8e3e3a7aSWarner Losh 18*8e3e3a7aSWarner Losh #include "lctype.h" 19*8e3e3a7aSWarner Losh #include "ldebug.h" 20*8e3e3a7aSWarner Losh #include "ldo.h" 21*8e3e3a7aSWarner Losh #include "lgc.h" 22*8e3e3a7aSWarner Losh #include "llex.h" 23*8e3e3a7aSWarner Losh #include "lobject.h" 24*8e3e3a7aSWarner Losh #include "lparser.h" 25*8e3e3a7aSWarner Losh #include "lstate.h" 26*8e3e3a7aSWarner Losh #include "lstring.h" 27*8e3e3a7aSWarner Losh #include "ltable.h" 28*8e3e3a7aSWarner Losh #include "lzio.h" 29*8e3e3a7aSWarner Losh 30*8e3e3a7aSWarner Losh 31*8e3e3a7aSWarner Losh 32*8e3e3a7aSWarner Losh #define next(ls) (ls->current = zgetc(ls->z)) 33*8e3e3a7aSWarner Losh 34*8e3e3a7aSWarner Losh 35*8e3e3a7aSWarner Losh 36*8e3e3a7aSWarner Losh #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') 37*8e3e3a7aSWarner Losh 38*8e3e3a7aSWarner Losh 39*8e3e3a7aSWarner Losh /* ORDER RESERVED */ 40*8e3e3a7aSWarner Losh static const char *const luaX_tokens [] = { 41*8e3e3a7aSWarner Losh "and", "break", "do", "else", "elseif", 42*8e3e3a7aSWarner Losh "end", "false", "for", "function", "goto", "if", 43*8e3e3a7aSWarner Losh "in", "local", "nil", "not", "or", "repeat", 44*8e3e3a7aSWarner Losh "return", "then", "true", "until", "while", 45*8e3e3a7aSWarner Losh "//", "..", "...", "==", ">=", "<=", "~=", 46*8e3e3a7aSWarner Losh "<<", ">>", "::", "<eof>", 47*8e3e3a7aSWarner Losh "<number>", "<integer>", "<name>", "<string>" 48*8e3e3a7aSWarner Losh }; 49*8e3e3a7aSWarner Losh 50*8e3e3a7aSWarner Losh 51*8e3e3a7aSWarner Losh #define save_and_next(ls) (save(ls, ls->current), next(ls)) 52*8e3e3a7aSWarner Losh 53*8e3e3a7aSWarner Losh 54*8e3e3a7aSWarner Losh static l_noret lexerror (LexState *ls, const char *msg, int token); 55*8e3e3a7aSWarner Losh 56*8e3e3a7aSWarner Losh 57*8e3e3a7aSWarner Losh static void save (LexState *ls, int c) { 58*8e3e3a7aSWarner Losh Mbuffer *b = ls->buff; 59*8e3e3a7aSWarner Losh if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) { 60*8e3e3a7aSWarner Losh size_t newsize; 61*8e3e3a7aSWarner Losh if (luaZ_sizebuffer(b) >= MAX_SIZE/2) 62*8e3e3a7aSWarner Losh lexerror(ls, "lexical element too long", 0); 63*8e3e3a7aSWarner Losh newsize = luaZ_sizebuffer(b) * 2; 64*8e3e3a7aSWarner Losh luaZ_resizebuffer(ls->L, b, newsize); 65*8e3e3a7aSWarner Losh } 66*8e3e3a7aSWarner Losh b->buffer[luaZ_bufflen(b)++] = cast(char, c); 67*8e3e3a7aSWarner Losh } 68*8e3e3a7aSWarner Losh 69*8e3e3a7aSWarner Losh 70*8e3e3a7aSWarner Losh void luaX_init (lua_State *L) { 71*8e3e3a7aSWarner Losh int i; 72*8e3e3a7aSWarner Losh TString *e = luaS_newliteral(L, LUA_ENV); /* create env name */ 73*8e3e3a7aSWarner Losh luaC_fix(L, obj2gco(e)); /* never collect this name */ 74*8e3e3a7aSWarner Losh for (i=0; i<NUM_RESERVED; i++) { 75*8e3e3a7aSWarner Losh TString *ts = luaS_new(L, luaX_tokens[i]); 76*8e3e3a7aSWarner Losh luaC_fix(L, obj2gco(ts)); /* reserved words are never collected */ 77*8e3e3a7aSWarner Losh ts->extra = cast_byte(i+1); /* reserved word */ 78*8e3e3a7aSWarner Losh } 79*8e3e3a7aSWarner Losh } 80*8e3e3a7aSWarner Losh 81*8e3e3a7aSWarner Losh 82*8e3e3a7aSWarner Losh const char *luaX_token2str (LexState *ls, int token) { 83*8e3e3a7aSWarner Losh if (token < FIRST_RESERVED) { /* single-byte symbols? */ 84*8e3e3a7aSWarner Losh lua_assert(token == cast_uchar(token)); 85*8e3e3a7aSWarner Losh return luaO_pushfstring(ls->L, "'%c'", token); 86*8e3e3a7aSWarner Losh } 87*8e3e3a7aSWarner Losh else { 88*8e3e3a7aSWarner Losh const char *s = luaX_tokens[token - FIRST_RESERVED]; 89*8e3e3a7aSWarner Losh if (token < TK_EOS) /* fixed format (symbols and reserved words)? */ 90*8e3e3a7aSWarner Losh return luaO_pushfstring(ls->L, "'%s'", s); 91*8e3e3a7aSWarner Losh else /* names, strings, and numerals */ 92*8e3e3a7aSWarner Losh return s; 93*8e3e3a7aSWarner Losh } 94*8e3e3a7aSWarner Losh } 95*8e3e3a7aSWarner Losh 96*8e3e3a7aSWarner Losh 97*8e3e3a7aSWarner Losh static const char *txtToken (LexState *ls, int token) { 98*8e3e3a7aSWarner Losh switch (token) { 99*8e3e3a7aSWarner Losh case TK_NAME: case TK_STRING: 100*8e3e3a7aSWarner Losh case TK_FLT: case TK_INT: 101*8e3e3a7aSWarner Losh save(ls, '\0'); 102*8e3e3a7aSWarner Losh return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff)); 103*8e3e3a7aSWarner Losh default: 104*8e3e3a7aSWarner Losh return luaX_token2str(ls, token); 105*8e3e3a7aSWarner Losh } 106*8e3e3a7aSWarner Losh } 107*8e3e3a7aSWarner Losh 108*8e3e3a7aSWarner Losh 109*8e3e3a7aSWarner Losh static l_noret lexerror (LexState *ls, const char *msg, int token) { 110*8e3e3a7aSWarner Losh msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber); 111*8e3e3a7aSWarner Losh if (token) 112*8e3e3a7aSWarner Losh luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token)); 113*8e3e3a7aSWarner Losh luaD_throw(ls->L, LUA_ERRSYNTAX); 114*8e3e3a7aSWarner Losh } 115*8e3e3a7aSWarner Losh 116*8e3e3a7aSWarner Losh 117*8e3e3a7aSWarner Losh l_noret luaX_syntaxerror (LexState *ls, const char *msg) { 118*8e3e3a7aSWarner Losh lexerror(ls, msg, ls->t.token); 119*8e3e3a7aSWarner Losh } 120*8e3e3a7aSWarner Losh 121*8e3e3a7aSWarner Losh 122*8e3e3a7aSWarner Losh /* 123*8e3e3a7aSWarner Losh ** creates a new string and anchors it in scanner's table so that 124*8e3e3a7aSWarner Losh ** it will not be collected until the end of the compilation 125*8e3e3a7aSWarner Losh ** (by that time it should be anchored somewhere) 126*8e3e3a7aSWarner Losh */ 127*8e3e3a7aSWarner Losh TString *luaX_newstring (LexState *ls, const char *str, size_t l) { 128*8e3e3a7aSWarner Losh lua_State *L = ls->L; 129*8e3e3a7aSWarner Losh TValue *o; /* entry for 'str' */ 130*8e3e3a7aSWarner Losh TString *ts = luaS_newlstr(L, str, l); /* create new string */ 131*8e3e3a7aSWarner Losh setsvalue2s(L, L->top++, ts); /* temporarily anchor it in stack */ 132*8e3e3a7aSWarner Losh o = luaH_set(L, ls->h, L->top - 1); 133*8e3e3a7aSWarner Losh if (ttisnil(o)) { /* not in use yet? */ 134*8e3e3a7aSWarner Losh /* boolean value does not need GC barrier; 135*8e3e3a7aSWarner Losh table has no metatable, so it does not need to invalidate cache */ 136*8e3e3a7aSWarner Losh setbvalue(o, 1); /* t[string] = true */ 137*8e3e3a7aSWarner Losh luaC_checkGC(L); 138*8e3e3a7aSWarner Losh } 139*8e3e3a7aSWarner Losh else { /* string already present */ 140*8e3e3a7aSWarner Losh ts = tsvalue(keyfromval(o)); /* re-use value previously stored */ 141*8e3e3a7aSWarner Losh } 142*8e3e3a7aSWarner Losh L->top--; /* remove string from stack */ 143*8e3e3a7aSWarner Losh return ts; 144*8e3e3a7aSWarner Losh } 145*8e3e3a7aSWarner Losh 146*8e3e3a7aSWarner Losh 147*8e3e3a7aSWarner Losh /* 148*8e3e3a7aSWarner Losh ** increment line number and skips newline sequence (any of 149*8e3e3a7aSWarner Losh ** \n, \r, \n\r, or \r\n) 150*8e3e3a7aSWarner Losh */ 151*8e3e3a7aSWarner Losh static void inclinenumber (LexState *ls) { 152*8e3e3a7aSWarner Losh int old = ls->current; 153*8e3e3a7aSWarner Losh lua_assert(currIsNewline(ls)); 154*8e3e3a7aSWarner Losh next(ls); /* skip '\n' or '\r' */ 155*8e3e3a7aSWarner Losh if (currIsNewline(ls) && ls->current != old) 156*8e3e3a7aSWarner Losh next(ls); /* skip '\n\r' or '\r\n' */ 157*8e3e3a7aSWarner Losh if (++ls->linenumber >= MAX_INT) 158*8e3e3a7aSWarner Losh lexerror(ls, "chunk has too many lines", 0); 159*8e3e3a7aSWarner Losh } 160*8e3e3a7aSWarner Losh 161*8e3e3a7aSWarner Losh 162*8e3e3a7aSWarner Losh void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source, 163*8e3e3a7aSWarner Losh int firstchar) { 164*8e3e3a7aSWarner Losh ls->t.token = 0; 165*8e3e3a7aSWarner Losh ls->L = L; 166*8e3e3a7aSWarner Losh ls->current = firstchar; 167*8e3e3a7aSWarner Losh ls->lookahead.token = TK_EOS; /* no look-ahead token */ 168*8e3e3a7aSWarner Losh ls->z = z; 169*8e3e3a7aSWarner Losh ls->fs = NULL; 170*8e3e3a7aSWarner Losh ls->linenumber = 1; 171*8e3e3a7aSWarner Losh ls->lastline = 1; 172*8e3e3a7aSWarner Losh ls->source = source; 173*8e3e3a7aSWarner Losh ls->envn = luaS_newliteral(L, LUA_ENV); /* get env name */ 174*8e3e3a7aSWarner Losh luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */ 175*8e3e3a7aSWarner Losh } 176*8e3e3a7aSWarner Losh 177*8e3e3a7aSWarner Losh 178*8e3e3a7aSWarner Losh 179*8e3e3a7aSWarner Losh /* 180*8e3e3a7aSWarner Losh ** ======================================================= 181*8e3e3a7aSWarner Losh ** LEXICAL ANALYZER 182*8e3e3a7aSWarner Losh ** ======================================================= 183*8e3e3a7aSWarner Losh */ 184*8e3e3a7aSWarner Losh 185*8e3e3a7aSWarner Losh 186*8e3e3a7aSWarner Losh static int check_next1 (LexState *ls, int c) { 187*8e3e3a7aSWarner Losh if (ls->current == c) { 188*8e3e3a7aSWarner Losh next(ls); 189*8e3e3a7aSWarner Losh return 1; 190*8e3e3a7aSWarner Losh } 191*8e3e3a7aSWarner Losh else return 0; 192*8e3e3a7aSWarner Losh } 193*8e3e3a7aSWarner Losh 194*8e3e3a7aSWarner Losh 195*8e3e3a7aSWarner Losh /* 196*8e3e3a7aSWarner Losh ** Check whether current char is in set 'set' (with two chars) and 197*8e3e3a7aSWarner Losh ** saves it 198*8e3e3a7aSWarner Losh */ 199*8e3e3a7aSWarner Losh static int check_next2 (LexState *ls, const char *set) { 200*8e3e3a7aSWarner Losh lua_assert(set[2] == '\0'); 201*8e3e3a7aSWarner Losh if (ls->current == set[0] || ls->current == set[1]) { 202*8e3e3a7aSWarner Losh save_and_next(ls); 203*8e3e3a7aSWarner Losh return 1; 204*8e3e3a7aSWarner Losh } 205*8e3e3a7aSWarner Losh else return 0; 206*8e3e3a7aSWarner Losh } 207*8e3e3a7aSWarner Losh 208*8e3e3a7aSWarner Losh 209*8e3e3a7aSWarner Losh /* LUA_NUMBER */ 210*8e3e3a7aSWarner Losh /* 211*8e3e3a7aSWarner Losh ** this function is quite liberal in what it accepts, as 'luaO_str2num' 212*8e3e3a7aSWarner Losh ** will reject ill-formed numerals. 213*8e3e3a7aSWarner Losh */ 214*8e3e3a7aSWarner Losh static int read_numeral (LexState *ls, SemInfo *seminfo) { 215*8e3e3a7aSWarner Losh TValue obj; 216*8e3e3a7aSWarner Losh const char *expo = "Ee"; 217*8e3e3a7aSWarner Losh int first = ls->current; 218*8e3e3a7aSWarner Losh lua_assert(lisdigit(ls->current)); 219*8e3e3a7aSWarner Losh save_and_next(ls); 220*8e3e3a7aSWarner Losh if (first == '0' && check_next2(ls, "xX")) /* hexadecimal? */ 221*8e3e3a7aSWarner Losh expo = "Pp"; 222*8e3e3a7aSWarner Losh for (;;) { 223*8e3e3a7aSWarner Losh if (check_next2(ls, expo)) /* exponent part? */ 224*8e3e3a7aSWarner Losh check_next2(ls, "-+"); /* optional exponent sign */ 225*8e3e3a7aSWarner Losh if (lisxdigit(ls->current)) 226*8e3e3a7aSWarner Losh save_and_next(ls); 227*8e3e3a7aSWarner Losh else if (ls->current == '.') 228*8e3e3a7aSWarner Losh save_and_next(ls); 229*8e3e3a7aSWarner Losh else break; 230*8e3e3a7aSWarner Losh } 231*8e3e3a7aSWarner Losh save(ls, '\0'); 232*8e3e3a7aSWarner Losh if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0) /* format error? */ 233*8e3e3a7aSWarner Losh lexerror(ls, "malformed number", TK_FLT); 234*8e3e3a7aSWarner Losh if (ttisinteger(&obj)) { 235*8e3e3a7aSWarner Losh seminfo->i = ivalue(&obj); 236*8e3e3a7aSWarner Losh return TK_INT; 237*8e3e3a7aSWarner Losh } 238*8e3e3a7aSWarner Losh else { 239*8e3e3a7aSWarner Losh lua_assert(ttisfloat(&obj)); 240*8e3e3a7aSWarner Losh seminfo->r = fltvalue(&obj); 241*8e3e3a7aSWarner Losh return TK_FLT; 242*8e3e3a7aSWarner Losh } 243*8e3e3a7aSWarner Losh } 244*8e3e3a7aSWarner Losh 245*8e3e3a7aSWarner Losh 246*8e3e3a7aSWarner Losh /* 247*8e3e3a7aSWarner Losh ** skip a sequence '[=*[' or ']=*]'; if sequence is well formed, return 248*8e3e3a7aSWarner Losh ** its number of '='s; otherwise, return a negative number (-1 iff there 249*8e3e3a7aSWarner Losh ** are no '='s after initial bracket) 250*8e3e3a7aSWarner Losh */ 251*8e3e3a7aSWarner Losh static int skip_sep (LexState *ls) { 252*8e3e3a7aSWarner Losh int count = 0; 253*8e3e3a7aSWarner Losh int s = ls->current; 254*8e3e3a7aSWarner Losh lua_assert(s == '[' || s == ']'); 255*8e3e3a7aSWarner Losh save_and_next(ls); 256*8e3e3a7aSWarner Losh while (ls->current == '=') { 257*8e3e3a7aSWarner Losh save_and_next(ls); 258*8e3e3a7aSWarner Losh count++; 259*8e3e3a7aSWarner Losh } 260*8e3e3a7aSWarner Losh return (ls->current == s) ? count : (-count) - 1; 261*8e3e3a7aSWarner Losh } 262*8e3e3a7aSWarner Losh 263*8e3e3a7aSWarner Losh 264*8e3e3a7aSWarner Losh static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { 265*8e3e3a7aSWarner Losh int line = ls->linenumber; /* initial line (for error message) */ 266*8e3e3a7aSWarner Losh save_and_next(ls); /* skip 2nd '[' */ 267*8e3e3a7aSWarner Losh if (currIsNewline(ls)) /* string starts with a newline? */ 268*8e3e3a7aSWarner Losh inclinenumber(ls); /* skip it */ 269*8e3e3a7aSWarner Losh for (;;) { 270*8e3e3a7aSWarner Losh switch (ls->current) { 271*8e3e3a7aSWarner Losh case EOZ: { /* error */ 272*8e3e3a7aSWarner Losh const char *what = (seminfo ? "string" : "comment"); 273*8e3e3a7aSWarner Losh const char *msg = luaO_pushfstring(ls->L, 274*8e3e3a7aSWarner Losh "unfinished long %s (starting at line %d)", what, line); 275*8e3e3a7aSWarner Losh lexerror(ls, msg, TK_EOS); 276*8e3e3a7aSWarner Losh break; /* to avoid warnings */ 277*8e3e3a7aSWarner Losh } 278*8e3e3a7aSWarner Losh case ']': { 279*8e3e3a7aSWarner Losh if (skip_sep(ls) == sep) { 280*8e3e3a7aSWarner Losh save_and_next(ls); /* skip 2nd ']' */ 281*8e3e3a7aSWarner Losh goto endloop; 282*8e3e3a7aSWarner Losh } 283*8e3e3a7aSWarner Losh break; 284*8e3e3a7aSWarner Losh } 285*8e3e3a7aSWarner Losh case '\n': case '\r': { 286*8e3e3a7aSWarner Losh save(ls, '\n'); 287*8e3e3a7aSWarner Losh inclinenumber(ls); 288*8e3e3a7aSWarner Losh if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ 289*8e3e3a7aSWarner Losh break; 290*8e3e3a7aSWarner Losh } 291*8e3e3a7aSWarner Losh default: { 292*8e3e3a7aSWarner Losh if (seminfo) save_and_next(ls); 293*8e3e3a7aSWarner Losh else next(ls); 294*8e3e3a7aSWarner Losh } 295*8e3e3a7aSWarner Losh } 296*8e3e3a7aSWarner Losh } endloop: 297*8e3e3a7aSWarner Losh if (seminfo) 298*8e3e3a7aSWarner Losh seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep), 299*8e3e3a7aSWarner Losh luaZ_bufflen(ls->buff) - 2*(2 + sep)); 300*8e3e3a7aSWarner Losh } 301*8e3e3a7aSWarner Losh 302*8e3e3a7aSWarner Losh 303*8e3e3a7aSWarner Losh static void esccheck (LexState *ls, int c, const char *msg) { 304*8e3e3a7aSWarner Losh if (!c) { 305*8e3e3a7aSWarner Losh if (ls->current != EOZ) 306*8e3e3a7aSWarner Losh save_and_next(ls); /* add current to buffer for error message */ 307*8e3e3a7aSWarner Losh lexerror(ls, msg, TK_STRING); 308*8e3e3a7aSWarner Losh } 309*8e3e3a7aSWarner Losh } 310*8e3e3a7aSWarner Losh 311*8e3e3a7aSWarner Losh 312*8e3e3a7aSWarner Losh static int gethexa (LexState *ls) { 313*8e3e3a7aSWarner Losh save_and_next(ls); 314*8e3e3a7aSWarner Losh esccheck (ls, lisxdigit(ls->current), "hexadecimal digit expected"); 315*8e3e3a7aSWarner Losh return luaO_hexavalue(ls->current); 316*8e3e3a7aSWarner Losh } 317*8e3e3a7aSWarner Losh 318*8e3e3a7aSWarner Losh 319*8e3e3a7aSWarner Losh static int readhexaesc (LexState *ls) { 320*8e3e3a7aSWarner Losh int r = gethexa(ls); 321*8e3e3a7aSWarner Losh r = (r << 4) + gethexa(ls); 322*8e3e3a7aSWarner Losh luaZ_buffremove(ls->buff, 2); /* remove saved chars from buffer */ 323*8e3e3a7aSWarner Losh return r; 324*8e3e3a7aSWarner Losh } 325*8e3e3a7aSWarner Losh 326*8e3e3a7aSWarner Losh 327*8e3e3a7aSWarner Losh static unsigned long readutf8esc (LexState *ls) { 328*8e3e3a7aSWarner Losh unsigned long r; 329*8e3e3a7aSWarner Losh int i = 4; /* chars to be removed: '\', 'u', '{', and first digit */ 330*8e3e3a7aSWarner Losh save_and_next(ls); /* skip 'u' */ 331*8e3e3a7aSWarner Losh esccheck(ls, ls->current == '{', "missing '{'"); 332*8e3e3a7aSWarner Losh r = gethexa(ls); /* must have at least one digit */ 333*8e3e3a7aSWarner Losh while ((save_and_next(ls), lisxdigit(ls->current))) { 334*8e3e3a7aSWarner Losh i++; 335*8e3e3a7aSWarner Losh r = (r << 4) + luaO_hexavalue(ls->current); 336*8e3e3a7aSWarner Losh esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large"); 337*8e3e3a7aSWarner Losh } 338*8e3e3a7aSWarner Losh esccheck(ls, ls->current == '}', "missing '}'"); 339*8e3e3a7aSWarner Losh next(ls); /* skip '}' */ 340*8e3e3a7aSWarner Losh luaZ_buffremove(ls->buff, i); /* remove saved chars from buffer */ 341*8e3e3a7aSWarner Losh return r; 342*8e3e3a7aSWarner Losh } 343*8e3e3a7aSWarner Losh 344*8e3e3a7aSWarner Losh 345*8e3e3a7aSWarner Losh static void utf8esc (LexState *ls) { 346*8e3e3a7aSWarner Losh char buff[UTF8BUFFSZ]; 347*8e3e3a7aSWarner Losh int n = luaO_utf8esc(buff, readutf8esc(ls)); 348*8e3e3a7aSWarner Losh for (; n > 0; n--) /* add 'buff' to string */ 349*8e3e3a7aSWarner Losh save(ls, buff[UTF8BUFFSZ - n]); 350*8e3e3a7aSWarner Losh } 351*8e3e3a7aSWarner Losh 352*8e3e3a7aSWarner Losh 353*8e3e3a7aSWarner Losh static int readdecesc (LexState *ls) { 354*8e3e3a7aSWarner Losh int i; 355*8e3e3a7aSWarner Losh int r = 0; /* result accumulator */ 356*8e3e3a7aSWarner Losh for (i = 0; i < 3 && lisdigit(ls->current); i++) { /* read up to 3 digits */ 357*8e3e3a7aSWarner Losh r = 10*r + ls->current - '0'; 358*8e3e3a7aSWarner Losh save_and_next(ls); 359*8e3e3a7aSWarner Losh } 360*8e3e3a7aSWarner Losh esccheck(ls, r <= UCHAR_MAX, "decimal escape too large"); 361*8e3e3a7aSWarner Losh luaZ_buffremove(ls->buff, i); /* remove read digits from buffer */ 362*8e3e3a7aSWarner Losh return r; 363*8e3e3a7aSWarner Losh } 364*8e3e3a7aSWarner Losh 365*8e3e3a7aSWarner Losh 366*8e3e3a7aSWarner Losh static void read_string (LexState *ls, int del, SemInfo *seminfo) { 367*8e3e3a7aSWarner Losh save_and_next(ls); /* keep delimiter (for error messages) */ 368*8e3e3a7aSWarner Losh while (ls->current != del) { 369*8e3e3a7aSWarner Losh switch (ls->current) { 370*8e3e3a7aSWarner Losh case EOZ: 371*8e3e3a7aSWarner Losh lexerror(ls, "unfinished string", TK_EOS); 372*8e3e3a7aSWarner Losh break; /* to avoid warnings */ 373*8e3e3a7aSWarner Losh case '\n': 374*8e3e3a7aSWarner Losh case '\r': 375*8e3e3a7aSWarner Losh lexerror(ls, "unfinished string", TK_STRING); 376*8e3e3a7aSWarner Losh break; /* to avoid warnings */ 377*8e3e3a7aSWarner Losh case '\\': { /* escape sequences */ 378*8e3e3a7aSWarner Losh int c; /* final character to be saved */ 379*8e3e3a7aSWarner Losh save_and_next(ls); /* keep '\\' for error messages */ 380*8e3e3a7aSWarner Losh switch (ls->current) { 381*8e3e3a7aSWarner Losh case 'a': c = '\a'; goto read_save; 382*8e3e3a7aSWarner Losh case 'b': c = '\b'; goto read_save; 383*8e3e3a7aSWarner Losh case 'f': c = '\f'; goto read_save; 384*8e3e3a7aSWarner Losh case 'n': c = '\n'; goto read_save; 385*8e3e3a7aSWarner Losh case 'r': c = '\r'; goto read_save; 386*8e3e3a7aSWarner Losh case 't': c = '\t'; goto read_save; 387*8e3e3a7aSWarner Losh case 'v': c = '\v'; goto read_save; 388*8e3e3a7aSWarner Losh case 'x': c = readhexaesc(ls); goto read_save; 389*8e3e3a7aSWarner Losh case 'u': utf8esc(ls); goto no_save; 390*8e3e3a7aSWarner Losh case '\n': case '\r': 391*8e3e3a7aSWarner Losh inclinenumber(ls); c = '\n'; goto only_save; 392*8e3e3a7aSWarner Losh case '\\': case '\"': case '\'': 393*8e3e3a7aSWarner Losh c = ls->current; goto read_save; 394*8e3e3a7aSWarner Losh case EOZ: goto no_save; /* will raise an error next loop */ 395*8e3e3a7aSWarner Losh case 'z': { /* zap following span of spaces */ 396*8e3e3a7aSWarner Losh luaZ_buffremove(ls->buff, 1); /* remove '\\' */ 397*8e3e3a7aSWarner Losh next(ls); /* skip the 'z' */ 398*8e3e3a7aSWarner Losh while (lisspace(ls->current)) { 399*8e3e3a7aSWarner Losh if (currIsNewline(ls)) inclinenumber(ls); 400*8e3e3a7aSWarner Losh else next(ls); 401*8e3e3a7aSWarner Losh } 402*8e3e3a7aSWarner Losh goto no_save; 403*8e3e3a7aSWarner Losh } 404*8e3e3a7aSWarner Losh default: { 405*8e3e3a7aSWarner Losh esccheck(ls, lisdigit(ls->current), "invalid escape sequence"); 406*8e3e3a7aSWarner Losh c = readdecesc(ls); /* digital escape '\ddd' */ 407*8e3e3a7aSWarner Losh goto only_save; 408*8e3e3a7aSWarner Losh } 409*8e3e3a7aSWarner Losh } 410*8e3e3a7aSWarner Losh read_save: 411*8e3e3a7aSWarner Losh next(ls); 412*8e3e3a7aSWarner Losh /* go through */ 413*8e3e3a7aSWarner Losh only_save: 414*8e3e3a7aSWarner Losh luaZ_buffremove(ls->buff, 1); /* remove '\\' */ 415*8e3e3a7aSWarner Losh save(ls, c); 416*8e3e3a7aSWarner Losh /* go through */ 417*8e3e3a7aSWarner Losh no_save: break; 418*8e3e3a7aSWarner Losh } 419*8e3e3a7aSWarner Losh default: 420*8e3e3a7aSWarner Losh save_and_next(ls); 421*8e3e3a7aSWarner Losh } 422*8e3e3a7aSWarner Losh } 423*8e3e3a7aSWarner Losh save_and_next(ls); /* skip delimiter */ 424*8e3e3a7aSWarner Losh seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, 425*8e3e3a7aSWarner Losh luaZ_bufflen(ls->buff) - 2); 426*8e3e3a7aSWarner Losh } 427*8e3e3a7aSWarner Losh 428*8e3e3a7aSWarner Losh 429*8e3e3a7aSWarner Losh static int llex (LexState *ls, SemInfo *seminfo) { 430*8e3e3a7aSWarner Losh luaZ_resetbuffer(ls->buff); 431*8e3e3a7aSWarner Losh for (;;) { 432*8e3e3a7aSWarner Losh switch (ls->current) { 433*8e3e3a7aSWarner Losh case '\n': case '\r': { /* line breaks */ 434*8e3e3a7aSWarner Losh inclinenumber(ls); 435*8e3e3a7aSWarner Losh break; 436*8e3e3a7aSWarner Losh } 437*8e3e3a7aSWarner Losh case ' ': case '\f': case '\t': case '\v': { /* spaces */ 438*8e3e3a7aSWarner Losh next(ls); 439*8e3e3a7aSWarner Losh break; 440*8e3e3a7aSWarner Losh } 441*8e3e3a7aSWarner Losh case '-': { /* '-' or '--' (comment) */ 442*8e3e3a7aSWarner Losh next(ls); 443*8e3e3a7aSWarner Losh if (ls->current != '-') return '-'; 444*8e3e3a7aSWarner Losh /* else is a comment */ 445*8e3e3a7aSWarner Losh next(ls); 446*8e3e3a7aSWarner Losh if (ls->current == '[') { /* long comment? */ 447*8e3e3a7aSWarner Losh int sep = skip_sep(ls); 448*8e3e3a7aSWarner Losh luaZ_resetbuffer(ls->buff); /* 'skip_sep' may dirty the buffer */ 449*8e3e3a7aSWarner Losh if (sep >= 0) { 450*8e3e3a7aSWarner Losh read_long_string(ls, NULL, sep); /* skip long comment */ 451*8e3e3a7aSWarner Losh luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */ 452*8e3e3a7aSWarner Losh break; 453*8e3e3a7aSWarner Losh } 454*8e3e3a7aSWarner Losh } 455*8e3e3a7aSWarner Losh /* else short comment */ 456*8e3e3a7aSWarner Losh while (!currIsNewline(ls) && ls->current != EOZ) 457*8e3e3a7aSWarner Losh next(ls); /* skip until end of line (or end of file) */ 458*8e3e3a7aSWarner Losh break; 459*8e3e3a7aSWarner Losh } 460*8e3e3a7aSWarner Losh case '[': { /* long string or simply '[' */ 461*8e3e3a7aSWarner Losh int sep = skip_sep(ls); 462*8e3e3a7aSWarner Losh if (sep >= 0) { 463*8e3e3a7aSWarner Losh read_long_string(ls, seminfo, sep); 464*8e3e3a7aSWarner Losh return TK_STRING; 465*8e3e3a7aSWarner Losh } 466*8e3e3a7aSWarner Losh else if (sep != -1) /* '[=...' missing second bracket */ 467*8e3e3a7aSWarner Losh lexerror(ls, "invalid long string delimiter", TK_STRING); 468*8e3e3a7aSWarner Losh return '['; 469*8e3e3a7aSWarner Losh } 470*8e3e3a7aSWarner Losh case '=': { 471*8e3e3a7aSWarner Losh next(ls); 472*8e3e3a7aSWarner Losh if (check_next1(ls, '=')) return TK_EQ; 473*8e3e3a7aSWarner Losh else return '='; 474*8e3e3a7aSWarner Losh } 475*8e3e3a7aSWarner Losh case '<': { 476*8e3e3a7aSWarner Losh next(ls); 477*8e3e3a7aSWarner Losh if (check_next1(ls, '=')) return TK_LE; 478*8e3e3a7aSWarner Losh else if (check_next1(ls, '<')) return TK_SHL; 479*8e3e3a7aSWarner Losh else return '<'; 480*8e3e3a7aSWarner Losh } 481*8e3e3a7aSWarner Losh case '>': { 482*8e3e3a7aSWarner Losh next(ls); 483*8e3e3a7aSWarner Losh if (check_next1(ls, '=')) return TK_GE; 484*8e3e3a7aSWarner Losh else if (check_next1(ls, '>')) return TK_SHR; 485*8e3e3a7aSWarner Losh else return '>'; 486*8e3e3a7aSWarner Losh } 487*8e3e3a7aSWarner Losh case '/': { 488*8e3e3a7aSWarner Losh next(ls); 489*8e3e3a7aSWarner Losh if (check_next1(ls, '/')) return TK_IDIV; 490*8e3e3a7aSWarner Losh else return '/'; 491*8e3e3a7aSWarner Losh } 492*8e3e3a7aSWarner Losh case '~': { 493*8e3e3a7aSWarner Losh next(ls); 494*8e3e3a7aSWarner Losh if (check_next1(ls, '=')) return TK_NE; 495*8e3e3a7aSWarner Losh else return '~'; 496*8e3e3a7aSWarner Losh } 497*8e3e3a7aSWarner Losh case ':': { 498*8e3e3a7aSWarner Losh next(ls); 499*8e3e3a7aSWarner Losh if (check_next1(ls, ':')) return TK_DBCOLON; 500*8e3e3a7aSWarner Losh else return ':'; 501*8e3e3a7aSWarner Losh } 502*8e3e3a7aSWarner Losh case '"': case '\'': { /* short literal strings */ 503*8e3e3a7aSWarner Losh read_string(ls, ls->current, seminfo); 504*8e3e3a7aSWarner Losh return TK_STRING; 505*8e3e3a7aSWarner Losh } 506*8e3e3a7aSWarner Losh case '.': { /* '.', '..', '...', or number */ 507*8e3e3a7aSWarner Losh save_and_next(ls); 508*8e3e3a7aSWarner Losh if (check_next1(ls, '.')) { 509*8e3e3a7aSWarner Losh if (check_next1(ls, '.')) 510*8e3e3a7aSWarner Losh return TK_DOTS; /* '...' */ 511*8e3e3a7aSWarner Losh else return TK_CONCAT; /* '..' */ 512*8e3e3a7aSWarner Losh } 513*8e3e3a7aSWarner Losh else if (!lisdigit(ls->current)) return '.'; 514*8e3e3a7aSWarner Losh else return read_numeral(ls, seminfo); 515*8e3e3a7aSWarner Losh } 516*8e3e3a7aSWarner Losh case '0': case '1': case '2': case '3': case '4': 517*8e3e3a7aSWarner Losh case '5': case '6': case '7': case '8': case '9': { 518*8e3e3a7aSWarner Losh return read_numeral(ls, seminfo); 519*8e3e3a7aSWarner Losh } 520*8e3e3a7aSWarner Losh case EOZ: { 521*8e3e3a7aSWarner Losh return TK_EOS; 522*8e3e3a7aSWarner Losh } 523*8e3e3a7aSWarner Losh default: { 524*8e3e3a7aSWarner Losh if (lislalpha(ls->current)) { /* identifier or reserved word? */ 525*8e3e3a7aSWarner Losh TString *ts; 526*8e3e3a7aSWarner Losh do { 527*8e3e3a7aSWarner Losh save_and_next(ls); 528*8e3e3a7aSWarner Losh } while (lislalnum(ls->current)); 529*8e3e3a7aSWarner Losh ts = luaX_newstring(ls, luaZ_buffer(ls->buff), 530*8e3e3a7aSWarner Losh luaZ_bufflen(ls->buff)); 531*8e3e3a7aSWarner Losh seminfo->ts = ts; 532*8e3e3a7aSWarner Losh if (isreserved(ts)) /* reserved word? */ 533*8e3e3a7aSWarner Losh return ts->extra - 1 + FIRST_RESERVED; 534*8e3e3a7aSWarner Losh else { 535*8e3e3a7aSWarner Losh return TK_NAME; 536*8e3e3a7aSWarner Losh } 537*8e3e3a7aSWarner Losh } 538*8e3e3a7aSWarner Losh else { /* single-char tokens (+ - / ...) */ 539*8e3e3a7aSWarner Losh int c = ls->current; 540*8e3e3a7aSWarner Losh next(ls); 541*8e3e3a7aSWarner Losh return c; 542*8e3e3a7aSWarner Losh } 543*8e3e3a7aSWarner Losh } 544*8e3e3a7aSWarner Losh } 545*8e3e3a7aSWarner Losh } 546*8e3e3a7aSWarner Losh } 547*8e3e3a7aSWarner Losh 548*8e3e3a7aSWarner Losh 549*8e3e3a7aSWarner Losh void luaX_next (LexState *ls) { 550*8e3e3a7aSWarner Losh ls->lastline = ls->linenumber; 551*8e3e3a7aSWarner Losh if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */ 552*8e3e3a7aSWarner Losh ls->t = ls->lookahead; /* use this one */ 553*8e3e3a7aSWarner Losh ls->lookahead.token = TK_EOS; /* and discharge it */ 554*8e3e3a7aSWarner Losh } 555*8e3e3a7aSWarner Losh else 556*8e3e3a7aSWarner Losh ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */ 557*8e3e3a7aSWarner Losh } 558*8e3e3a7aSWarner Losh 559*8e3e3a7aSWarner Losh 560*8e3e3a7aSWarner Losh int luaX_lookahead (LexState *ls) { 561*8e3e3a7aSWarner Losh lua_assert(ls->lookahead.token == TK_EOS); 562*8e3e3a7aSWarner Losh ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); 563*8e3e3a7aSWarner Losh return ls->lookahead.token; 564*8e3e3a7aSWarner Losh } 565*8e3e3a7aSWarner Losh 566