xref: /freebsd/contrib/lua/src/llex.c (revision a9490b81b032b43cdb3c8c76b4d01bbad9ff82c1)
18e3e3a7aSWarner Losh /*
20495ed39SKyle Evans ** $Id: llex.c $
38e3e3a7aSWarner Losh ** Lexical Analyzer
48e3e3a7aSWarner Losh ** See Copyright Notice in lua.h
58e3e3a7aSWarner Losh */
68e3e3a7aSWarner Losh 
78e3e3a7aSWarner Losh #define llex_c
88e3e3a7aSWarner Losh #define LUA_CORE
98e3e3a7aSWarner Losh 
108e3e3a7aSWarner Losh #include "lprefix.h"
118e3e3a7aSWarner Losh 
128e3e3a7aSWarner Losh 
138e3e3a7aSWarner Losh #include <locale.h>
148e3e3a7aSWarner Losh #include <string.h>
158e3e3a7aSWarner Losh 
168e3e3a7aSWarner Losh #include "lua.h"
178e3e3a7aSWarner Losh 
188e3e3a7aSWarner Losh #include "lctype.h"
198e3e3a7aSWarner Losh #include "ldebug.h"
208e3e3a7aSWarner Losh #include "ldo.h"
218e3e3a7aSWarner Losh #include "lgc.h"
228e3e3a7aSWarner Losh #include "llex.h"
238e3e3a7aSWarner Losh #include "lobject.h"
248e3e3a7aSWarner Losh #include "lparser.h"
258e3e3a7aSWarner Losh #include "lstate.h"
268e3e3a7aSWarner Losh #include "lstring.h"
278e3e3a7aSWarner Losh #include "ltable.h"
288e3e3a7aSWarner Losh #include "lzio.h"
298e3e3a7aSWarner Losh 
308e3e3a7aSWarner Losh 
318e3e3a7aSWarner Losh 
328e3e3a7aSWarner Losh #define next(ls)	(ls->current = zgetc(ls->z))
338e3e3a7aSWarner Losh 
348e3e3a7aSWarner Losh 
358e3e3a7aSWarner Losh 
368e3e3a7aSWarner Losh #define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
378e3e3a7aSWarner Losh 
388e3e3a7aSWarner Losh 
398e3e3a7aSWarner Losh /* ORDER RESERVED */
408e3e3a7aSWarner Losh static const char *const luaX_tokens [] = {
418e3e3a7aSWarner Losh     "and", "break", "do", "else", "elseif",
428e3e3a7aSWarner Losh     "end", "false", "for", "function", "goto", "if",
438e3e3a7aSWarner Losh     "in", "local", "nil", "not", "or", "repeat",
448e3e3a7aSWarner Losh     "return", "then", "true", "until", "while",
458e3e3a7aSWarner Losh     "//", "..", "...", "==", ">=", "<=", "~=",
468e3e3a7aSWarner Losh     "<<", ">>", "::", "<eof>",
478e3e3a7aSWarner Losh     "<number>", "<integer>", "<name>", "<string>"
488e3e3a7aSWarner Losh };
498e3e3a7aSWarner Losh 
508e3e3a7aSWarner Losh 
518e3e3a7aSWarner Losh #define save_and_next(ls) (save(ls, ls->current), next(ls))
528e3e3a7aSWarner Losh 
538e3e3a7aSWarner Losh 
548e3e3a7aSWarner Losh static l_noret lexerror (LexState *ls, const char *msg, int token);
558e3e3a7aSWarner Losh 
568e3e3a7aSWarner Losh 
save(LexState * ls,int c)578e3e3a7aSWarner Losh static void save (LexState *ls, int c) {
588e3e3a7aSWarner Losh   Mbuffer *b = ls->buff;
598e3e3a7aSWarner Losh   if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
608e3e3a7aSWarner Losh     size_t newsize;
618e3e3a7aSWarner Losh     if (luaZ_sizebuffer(b) >= MAX_SIZE/2)
628e3e3a7aSWarner Losh       lexerror(ls, "lexical element too long", 0);
638e3e3a7aSWarner Losh     newsize = luaZ_sizebuffer(b) * 2;
648e3e3a7aSWarner Losh     luaZ_resizebuffer(ls->L, b, newsize);
658e3e3a7aSWarner Losh   }
660495ed39SKyle Evans   b->buffer[luaZ_bufflen(b)++] = cast_char(c);
678e3e3a7aSWarner Losh }
688e3e3a7aSWarner Losh 
698e3e3a7aSWarner Losh 
luaX_init(lua_State * L)708e3e3a7aSWarner Losh void luaX_init (lua_State *L) {
718e3e3a7aSWarner Losh   int i;
728e3e3a7aSWarner Losh   TString *e = luaS_newliteral(L, LUA_ENV);  /* create env name */
738e3e3a7aSWarner Losh   luaC_fix(L, obj2gco(e));  /* never collect this name */
748e3e3a7aSWarner Losh   for (i=0; i<NUM_RESERVED; i++) {
758e3e3a7aSWarner Losh     TString *ts = luaS_new(L, luaX_tokens[i]);
768e3e3a7aSWarner Losh     luaC_fix(L, obj2gco(ts));  /* reserved words are never collected */
778e3e3a7aSWarner Losh     ts->extra = cast_byte(i+1);  /* reserved word */
788e3e3a7aSWarner Losh   }
798e3e3a7aSWarner Losh }
808e3e3a7aSWarner Losh 
818e3e3a7aSWarner Losh 
luaX_token2str(LexState * ls,int token)828e3e3a7aSWarner Losh const char *luaX_token2str (LexState *ls, int token) {
838e3e3a7aSWarner Losh   if (token < FIRST_RESERVED) {  /* single-byte symbols? */
840495ed39SKyle Evans     if (lisprint(token))
858e3e3a7aSWarner Losh       return luaO_pushfstring(ls->L, "'%c'", token);
860495ed39SKyle Evans     else  /* control character */
870495ed39SKyle Evans       return luaO_pushfstring(ls->L, "'<\\%d>'", token);
888e3e3a7aSWarner Losh   }
898e3e3a7aSWarner Losh   else {
908e3e3a7aSWarner Losh     const char *s = luaX_tokens[token - FIRST_RESERVED];
918e3e3a7aSWarner Losh     if (token < TK_EOS)  /* fixed format (symbols and reserved words)? */
928e3e3a7aSWarner Losh       return luaO_pushfstring(ls->L, "'%s'", s);
938e3e3a7aSWarner Losh     else  /* names, strings, and numerals */
948e3e3a7aSWarner Losh       return s;
958e3e3a7aSWarner Losh   }
968e3e3a7aSWarner Losh }
978e3e3a7aSWarner Losh 
988e3e3a7aSWarner Losh 
txtToken(LexState * ls,int token)998e3e3a7aSWarner Losh static const char *txtToken (LexState *ls, int token) {
1008e3e3a7aSWarner Losh   switch (token) {
1018e3e3a7aSWarner Losh     case TK_NAME: case TK_STRING:
1028e3e3a7aSWarner Losh     case TK_FLT: case TK_INT:
1038e3e3a7aSWarner Losh       save(ls, '\0');
1048e3e3a7aSWarner Losh       return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
1058e3e3a7aSWarner Losh     default:
1068e3e3a7aSWarner Losh       return luaX_token2str(ls, token);
1078e3e3a7aSWarner Losh   }
1088e3e3a7aSWarner Losh }
1098e3e3a7aSWarner Losh 
1108e3e3a7aSWarner Losh 
lexerror(LexState * ls,const char * msg,int token)1118e3e3a7aSWarner Losh static l_noret lexerror (LexState *ls, const char *msg, int token) {
1128e3e3a7aSWarner Losh   msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
1138e3e3a7aSWarner Losh   if (token)
1148e3e3a7aSWarner Losh     luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
1158e3e3a7aSWarner Losh   luaD_throw(ls->L, LUA_ERRSYNTAX);
1168e3e3a7aSWarner Losh }
1178e3e3a7aSWarner Losh 
1188e3e3a7aSWarner Losh 
luaX_syntaxerror(LexState * ls,const char * msg)1198e3e3a7aSWarner Losh l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
1208e3e3a7aSWarner Losh   lexerror(ls, msg, ls->t.token);
1218e3e3a7aSWarner Losh }
1228e3e3a7aSWarner Losh 
1238e3e3a7aSWarner Losh 
1248e3e3a7aSWarner Losh /*
1258c784bb8SWarner Losh ** Creates a new string and anchors it in scanner's table so that it
1268c784bb8SWarner Losh ** will not be collected until the end of the compilation; by that time
1278c784bb8SWarner Losh ** it should be anchored somewhere. It also internalizes long strings,
1288c784bb8SWarner Losh ** ensuring there is only one copy of each unique string.  The table
1298c784bb8SWarner Losh ** here is used as a set: the string enters as the key, while its value
1308c784bb8SWarner Losh ** is irrelevant. We use the string itself as the value only because it
131*a9490b81SWarner Losh ** is a TValue readily available. Later, the code generation can change
1328c784bb8SWarner Losh ** this value.
1338e3e3a7aSWarner Losh */
luaX_newstring(LexState * ls,const char * str,size_t l)1348e3e3a7aSWarner Losh TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
1358e3e3a7aSWarner Losh   lua_State *L = ls->L;
1368e3e3a7aSWarner Losh   TString *ts = luaS_newlstr(L, str, l);  /* create new string */
1378c784bb8SWarner Losh   const TValue *o = luaH_getstr(ls->h, ts);
1388c784bb8SWarner Losh   if (!ttisnil(o))  /* string already present? */
1398c784bb8SWarner Losh     ts = keystrval(nodefromval(o));  /* get saved copy */
1408c784bb8SWarner Losh   else {  /* not in use yet */
141*a9490b81SWarner Losh     TValue *stv = s2v(L->top.p++);  /* reserve stack space for string */
1428c784bb8SWarner Losh     setsvalue(L, stv, ts);  /* temporarily anchor the string */
1438c784bb8SWarner Losh     luaH_finishset(L, ls->h, stv, o, stv);  /* t[string] = string */
1448c784bb8SWarner Losh     /* table is not a metatable, so it does not need to invalidate cache */
1458e3e3a7aSWarner Losh     luaC_checkGC(L);
146*a9490b81SWarner Losh     L->top.p--;  /* remove string from stack */
1478c784bb8SWarner Losh   }
1488e3e3a7aSWarner Losh   return ts;
1498e3e3a7aSWarner Losh }
1508e3e3a7aSWarner Losh 
1518e3e3a7aSWarner Losh 
1528e3e3a7aSWarner Losh /*
1538e3e3a7aSWarner Losh ** increment line number and skips newline sequence (any of
1548e3e3a7aSWarner Losh ** \n, \r, \n\r, or \r\n)
1558e3e3a7aSWarner Losh */
inclinenumber(LexState * ls)1568e3e3a7aSWarner Losh static void inclinenumber (LexState *ls) {
1578e3e3a7aSWarner Losh   int old = ls->current;
1588e3e3a7aSWarner Losh   lua_assert(currIsNewline(ls));
1598e3e3a7aSWarner Losh   next(ls);  /* skip '\n' or '\r' */
1608e3e3a7aSWarner Losh   if (currIsNewline(ls) && ls->current != old)
1618e3e3a7aSWarner Losh     next(ls);  /* skip '\n\r' or '\r\n' */
1628e3e3a7aSWarner Losh   if (++ls->linenumber >= MAX_INT)
1638e3e3a7aSWarner Losh     lexerror(ls, "chunk has too many lines", 0);
1648e3e3a7aSWarner Losh }
1658e3e3a7aSWarner Losh 
1668e3e3a7aSWarner Losh 
luaX_setinput(lua_State * L,LexState * ls,ZIO * z,TString * source,int firstchar)1678e3e3a7aSWarner Losh void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
1688e3e3a7aSWarner Losh                     int firstchar) {
1698e3e3a7aSWarner Losh   ls->t.token = 0;
1708e3e3a7aSWarner Losh   ls->L = L;
1718e3e3a7aSWarner Losh   ls->current = firstchar;
1728e3e3a7aSWarner Losh   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
1738e3e3a7aSWarner Losh   ls->z = z;
1748e3e3a7aSWarner Losh   ls->fs = NULL;
1758e3e3a7aSWarner Losh   ls->linenumber = 1;
1768e3e3a7aSWarner Losh   ls->lastline = 1;
1778e3e3a7aSWarner Losh   ls->source = source;
1788e3e3a7aSWarner Losh   ls->envn = luaS_newliteral(L, LUA_ENV);  /* get env name */
1798e3e3a7aSWarner Losh   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
1808e3e3a7aSWarner Losh }
1818e3e3a7aSWarner Losh 
1828e3e3a7aSWarner Losh 
1838e3e3a7aSWarner Losh 
1848e3e3a7aSWarner Losh /*
1858e3e3a7aSWarner Losh ** =======================================================
1868e3e3a7aSWarner Losh ** LEXICAL ANALYZER
1878e3e3a7aSWarner Losh ** =======================================================
1888e3e3a7aSWarner Losh */
1898e3e3a7aSWarner Losh 
1908e3e3a7aSWarner Losh 
check_next1(LexState * ls,int c)1918e3e3a7aSWarner Losh static int check_next1 (LexState *ls, int c) {
1928e3e3a7aSWarner Losh   if (ls->current == c) {
1938e3e3a7aSWarner Losh     next(ls);
1948e3e3a7aSWarner Losh     return 1;
1958e3e3a7aSWarner Losh   }
1968e3e3a7aSWarner Losh   else return 0;
1978e3e3a7aSWarner Losh }
1988e3e3a7aSWarner Losh 
1998e3e3a7aSWarner Losh 
2008e3e3a7aSWarner Losh /*
2018e3e3a7aSWarner Losh ** Check whether current char is in set 'set' (with two chars) and
2028e3e3a7aSWarner Losh ** saves it
2038e3e3a7aSWarner Losh */
check_next2(LexState * ls,const char * set)2048e3e3a7aSWarner Losh static int check_next2 (LexState *ls, const char *set) {
2058e3e3a7aSWarner Losh   lua_assert(set[2] == '\0');
2068e3e3a7aSWarner Losh   if (ls->current == set[0] || ls->current == set[1]) {
2078e3e3a7aSWarner Losh     save_and_next(ls);
2088e3e3a7aSWarner Losh     return 1;
2098e3e3a7aSWarner Losh   }
2108e3e3a7aSWarner Losh   else return 0;
2118e3e3a7aSWarner Losh }
2128e3e3a7aSWarner Losh 
2138e3e3a7aSWarner Losh 
2148e3e3a7aSWarner Losh /* LUA_NUMBER */
2158e3e3a7aSWarner Losh /*
2160495ed39SKyle Evans ** This function is quite liberal in what it accepts, as 'luaO_str2num'
2170495ed39SKyle Evans ** will reject ill-formed numerals. Roughly, it accepts the following
2180495ed39SKyle Evans ** pattern:
2190495ed39SKyle Evans **
2200495ed39SKyle Evans **   %d(%x|%.|([Ee][+-]?))* | 0[Xx](%x|%.|([Pp][+-]?))*
2210495ed39SKyle Evans **
2220495ed39SKyle Evans ** The only tricky part is to accept [+-] only after a valid exponent
2230495ed39SKyle Evans ** mark, to avoid reading '3-4' or '0xe+1' as a single number.
2240495ed39SKyle Evans **
2250495ed39SKyle Evans ** The caller might have already read an initial dot.
2268e3e3a7aSWarner Losh */
read_numeral(LexState * ls,SemInfo * seminfo)2278e3e3a7aSWarner Losh static int read_numeral (LexState *ls, SemInfo *seminfo) {
2288e3e3a7aSWarner Losh   TValue obj;
2298e3e3a7aSWarner Losh   const char *expo = "Ee";
2308e3e3a7aSWarner Losh   int first = ls->current;
2318e3e3a7aSWarner Losh   lua_assert(lisdigit(ls->current));
2328e3e3a7aSWarner Losh   save_and_next(ls);
2338e3e3a7aSWarner Losh   if (first == '0' && check_next2(ls, "xX"))  /* hexadecimal? */
2348e3e3a7aSWarner Losh     expo = "Pp";
2358e3e3a7aSWarner Losh   for (;;) {
2360495ed39SKyle Evans     if (check_next2(ls, expo))  /* exponent mark? */
2378e3e3a7aSWarner Losh       check_next2(ls, "-+");  /* optional exponent sign */
2380495ed39SKyle Evans     else if (lisxdigit(ls->current) || ls->current == '.')  /* '%x|%.' */
2398e3e3a7aSWarner Losh       save_and_next(ls);
2408e3e3a7aSWarner Losh     else break;
2418e3e3a7aSWarner Losh   }
2420495ed39SKyle Evans   if (lislalpha(ls->current))  /* is numeral touching a letter? */
2430495ed39SKyle Evans     save_and_next(ls);  /* force an error */
2448e3e3a7aSWarner Losh   save(ls, '\0');
2458e3e3a7aSWarner Losh   if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0)  /* format error? */
2468e3e3a7aSWarner Losh     lexerror(ls, "malformed number", TK_FLT);
2478e3e3a7aSWarner Losh   if (ttisinteger(&obj)) {
2488e3e3a7aSWarner Losh     seminfo->i = ivalue(&obj);
2498e3e3a7aSWarner Losh     return TK_INT;
2508e3e3a7aSWarner Losh   }
2518e3e3a7aSWarner Losh   else {
2528e3e3a7aSWarner Losh     lua_assert(ttisfloat(&obj));
2538e3e3a7aSWarner Losh     seminfo->r = fltvalue(&obj);
2548e3e3a7aSWarner Losh     return TK_FLT;
2558e3e3a7aSWarner Losh   }
2568e3e3a7aSWarner Losh }
2578e3e3a7aSWarner Losh 
2588e3e3a7aSWarner Losh 
2598e3e3a7aSWarner Losh /*
2600495ed39SKyle Evans ** read a sequence '[=*[' or ']=*]', leaving the last bracket. If
2610495ed39SKyle Evans ** sequence is well formed, return its number of '='s + 2; otherwise,
2620495ed39SKyle Evans ** return 1 if it is a single bracket (no '='s and no 2nd bracket);
2630495ed39SKyle Evans ** otherwise (an unfinished '[==...') return 0.
2648e3e3a7aSWarner Losh */
skip_sep(LexState * ls)265bf9580a1SKyle Evans static size_t skip_sep (LexState *ls) {
266bf9580a1SKyle Evans   size_t count = 0;
2678e3e3a7aSWarner Losh   int s = ls->current;
2688e3e3a7aSWarner Losh   lua_assert(s == '[' || s == ']');
2698e3e3a7aSWarner Losh   save_and_next(ls);
2708e3e3a7aSWarner Losh   while (ls->current == '=') {
2718e3e3a7aSWarner Losh     save_and_next(ls);
2728e3e3a7aSWarner Losh     count++;
2738e3e3a7aSWarner Losh   }
274bf9580a1SKyle Evans   return (ls->current == s) ? count + 2
275bf9580a1SKyle Evans          : (count == 0) ? 1
276bf9580a1SKyle Evans          : 0;
2778e3e3a7aSWarner Losh }
2788e3e3a7aSWarner Losh 
2798e3e3a7aSWarner Losh 
read_long_string(LexState * ls,SemInfo * seminfo,size_t sep)280bf9580a1SKyle Evans static void read_long_string (LexState *ls, SemInfo *seminfo, size_t sep) {
2818e3e3a7aSWarner Losh   int line = ls->linenumber;  /* initial line (for error message) */
2828e3e3a7aSWarner Losh   save_and_next(ls);  /* skip 2nd '[' */
2838e3e3a7aSWarner Losh   if (currIsNewline(ls))  /* string starts with a newline? */
2848e3e3a7aSWarner Losh     inclinenumber(ls);  /* skip it */
2858e3e3a7aSWarner Losh   for (;;) {
2868e3e3a7aSWarner Losh     switch (ls->current) {
2878e3e3a7aSWarner Losh       case EOZ: {  /* error */
2888e3e3a7aSWarner Losh         const char *what = (seminfo ? "string" : "comment");
2898e3e3a7aSWarner Losh         const char *msg = luaO_pushfstring(ls->L,
2908e3e3a7aSWarner Losh                      "unfinished long %s (starting at line %d)", what, line);
2918e3e3a7aSWarner Losh         lexerror(ls, msg, TK_EOS);
2928e3e3a7aSWarner Losh         break;  /* to avoid warnings */
2938e3e3a7aSWarner Losh       }
2948e3e3a7aSWarner Losh       case ']': {
2958e3e3a7aSWarner Losh         if (skip_sep(ls) == sep) {
2968e3e3a7aSWarner Losh           save_and_next(ls);  /* skip 2nd ']' */
2978e3e3a7aSWarner Losh           goto endloop;
2988e3e3a7aSWarner Losh         }
2998e3e3a7aSWarner Losh         break;
3008e3e3a7aSWarner Losh       }
3018e3e3a7aSWarner Losh       case '\n': case '\r': {
3028e3e3a7aSWarner Losh         save(ls, '\n');
3038e3e3a7aSWarner Losh         inclinenumber(ls);
3048e3e3a7aSWarner Losh         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
3058e3e3a7aSWarner Losh         break;
3068e3e3a7aSWarner Losh       }
3078e3e3a7aSWarner Losh       default: {
3088e3e3a7aSWarner Losh         if (seminfo) save_and_next(ls);
3098e3e3a7aSWarner Losh         else next(ls);
3108e3e3a7aSWarner Losh       }
3118e3e3a7aSWarner Losh     }
3128e3e3a7aSWarner Losh   } endloop:
3138e3e3a7aSWarner Losh   if (seminfo)
314bf9580a1SKyle Evans     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + sep,
315bf9580a1SKyle Evans                                      luaZ_bufflen(ls->buff) - 2 * sep);
3168e3e3a7aSWarner Losh }
3178e3e3a7aSWarner Losh 
3188e3e3a7aSWarner Losh 
esccheck(LexState * ls,int c,const char * msg)3198e3e3a7aSWarner Losh static void esccheck (LexState *ls, int c, const char *msg) {
3208e3e3a7aSWarner Losh   if (!c) {
3218e3e3a7aSWarner Losh     if (ls->current != EOZ)
3228e3e3a7aSWarner Losh       save_and_next(ls);  /* add current to buffer for error message */
3238e3e3a7aSWarner Losh     lexerror(ls, msg, TK_STRING);
3248e3e3a7aSWarner Losh   }
3258e3e3a7aSWarner Losh }
3268e3e3a7aSWarner Losh 
3278e3e3a7aSWarner Losh 
gethexa(LexState * ls)3288e3e3a7aSWarner Losh static int gethexa (LexState *ls) {
3298e3e3a7aSWarner Losh   save_and_next(ls);
3308e3e3a7aSWarner Losh   esccheck (ls, lisxdigit(ls->current), "hexadecimal digit expected");
3318e3e3a7aSWarner Losh   return luaO_hexavalue(ls->current);
3328e3e3a7aSWarner Losh }
3338e3e3a7aSWarner Losh 
3348e3e3a7aSWarner Losh 
readhexaesc(LexState * ls)3358e3e3a7aSWarner Losh static int readhexaesc (LexState *ls) {
3368e3e3a7aSWarner Losh   int r = gethexa(ls);
3378e3e3a7aSWarner Losh   r = (r << 4) + gethexa(ls);
3388e3e3a7aSWarner Losh   luaZ_buffremove(ls->buff, 2);  /* remove saved chars from buffer */
3398e3e3a7aSWarner Losh   return r;
3408e3e3a7aSWarner Losh }
3418e3e3a7aSWarner Losh 
3428e3e3a7aSWarner Losh 
readutf8esc(LexState * ls)3438e3e3a7aSWarner Losh static unsigned long readutf8esc (LexState *ls) {
3448e3e3a7aSWarner Losh   unsigned long r;
3458e3e3a7aSWarner Losh   int i = 4;  /* chars to be removed: '\', 'u', '{', and first digit */
3468e3e3a7aSWarner Losh   save_and_next(ls);  /* skip 'u' */
3478e3e3a7aSWarner Losh   esccheck(ls, ls->current == '{', "missing '{'");
3488e3e3a7aSWarner Losh   r = gethexa(ls);  /* must have at least one digit */
3490495ed39SKyle Evans   while (cast_void(save_and_next(ls)), lisxdigit(ls->current)) {
3508e3e3a7aSWarner Losh     i++;
3510495ed39SKyle Evans     esccheck(ls, r <= (0x7FFFFFFFu >> 4), "UTF-8 value too large");
3528e3e3a7aSWarner Losh     r = (r << 4) + luaO_hexavalue(ls->current);
3538e3e3a7aSWarner Losh   }
3548e3e3a7aSWarner Losh   esccheck(ls, ls->current == '}', "missing '}'");
3558e3e3a7aSWarner Losh   next(ls);  /* skip '}' */
3568e3e3a7aSWarner Losh   luaZ_buffremove(ls->buff, i);  /* remove saved chars from buffer */
3578e3e3a7aSWarner Losh   return r;
3588e3e3a7aSWarner Losh }
3598e3e3a7aSWarner Losh 
3608e3e3a7aSWarner Losh 
utf8esc(LexState * ls)3618e3e3a7aSWarner Losh static void utf8esc (LexState *ls) {
3628e3e3a7aSWarner Losh   char buff[UTF8BUFFSZ];
3638e3e3a7aSWarner Losh   int n = luaO_utf8esc(buff, readutf8esc(ls));
3648e3e3a7aSWarner Losh   for (; n > 0; n--)  /* add 'buff' to string */
3658e3e3a7aSWarner Losh     save(ls, buff[UTF8BUFFSZ - n]);
3668e3e3a7aSWarner Losh }
3678e3e3a7aSWarner Losh 
3688e3e3a7aSWarner Losh 
readdecesc(LexState * ls)3698e3e3a7aSWarner Losh static int readdecesc (LexState *ls) {
3708e3e3a7aSWarner Losh   int i;
3718e3e3a7aSWarner Losh   int r = 0;  /* result accumulator */
3728e3e3a7aSWarner Losh   for (i = 0; i < 3 && lisdigit(ls->current); i++) {  /* read up to 3 digits */
3738e3e3a7aSWarner Losh     r = 10*r + ls->current - '0';
3748e3e3a7aSWarner Losh     save_and_next(ls);
3758e3e3a7aSWarner Losh   }
3768e3e3a7aSWarner Losh   esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
3778e3e3a7aSWarner Losh   luaZ_buffremove(ls->buff, i);  /* remove read digits from buffer */
3788e3e3a7aSWarner Losh   return r;
3798e3e3a7aSWarner Losh }
3808e3e3a7aSWarner Losh 
3818e3e3a7aSWarner Losh 
read_string(LexState * ls,int del,SemInfo * seminfo)3828e3e3a7aSWarner Losh static void read_string (LexState *ls, int del, SemInfo *seminfo) {
3838e3e3a7aSWarner Losh   save_and_next(ls);  /* keep delimiter (for error messages) */
3848e3e3a7aSWarner Losh   while (ls->current != del) {
3858e3e3a7aSWarner Losh     switch (ls->current) {
3868e3e3a7aSWarner Losh       case EOZ:
3878e3e3a7aSWarner Losh         lexerror(ls, "unfinished string", TK_EOS);
3888e3e3a7aSWarner Losh         break;  /* to avoid warnings */
3898e3e3a7aSWarner Losh       case '\n':
3908e3e3a7aSWarner Losh       case '\r':
3918e3e3a7aSWarner Losh         lexerror(ls, "unfinished string", TK_STRING);
3928e3e3a7aSWarner Losh         break;  /* to avoid warnings */
3938e3e3a7aSWarner Losh       case '\\': {  /* escape sequences */
3948e3e3a7aSWarner Losh         int c;  /* final character to be saved */
3958e3e3a7aSWarner Losh         save_and_next(ls);  /* keep '\\' for error messages */
3968e3e3a7aSWarner Losh         switch (ls->current) {
3978e3e3a7aSWarner Losh           case 'a': c = '\a'; goto read_save;
3988e3e3a7aSWarner Losh           case 'b': c = '\b'; goto read_save;
3998e3e3a7aSWarner Losh           case 'f': c = '\f'; goto read_save;
4008e3e3a7aSWarner Losh           case 'n': c = '\n'; goto read_save;
4018e3e3a7aSWarner Losh           case 'r': c = '\r'; goto read_save;
4028e3e3a7aSWarner Losh           case 't': c = '\t'; goto read_save;
4038e3e3a7aSWarner Losh           case 'v': c = '\v'; goto read_save;
4048e3e3a7aSWarner Losh           case 'x': c = readhexaesc(ls); goto read_save;
4058e3e3a7aSWarner Losh           case 'u': utf8esc(ls);  goto no_save;
4068e3e3a7aSWarner Losh           case '\n': case '\r':
4078e3e3a7aSWarner Losh             inclinenumber(ls); c = '\n'; goto only_save;
4088e3e3a7aSWarner Losh           case '\\': case '\"': case '\'':
4098e3e3a7aSWarner Losh             c = ls->current; goto read_save;
4108e3e3a7aSWarner Losh           case EOZ: goto no_save;  /* will raise an error next loop */
4118e3e3a7aSWarner Losh           case 'z': {  /* zap following span of spaces */
4128e3e3a7aSWarner Losh             luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
4138e3e3a7aSWarner Losh             next(ls);  /* skip the 'z' */
4148e3e3a7aSWarner Losh             while (lisspace(ls->current)) {
4158e3e3a7aSWarner Losh               if (currIsNewline(ls)) inclinenumber(ls);
4168e3e3a7aSWarner Losh               else next(ls);
4178e3e3a7aSWarner Losh             }
4188e3e3a7aSWarner Losh             goto no_save;
4198e3e3a7aSWarner Losh           }
4208e3e3a7aSWarner Losh           default: {
4218e3e3a7aSWarner Losh             esccheck(ls, lisdigit(ls->current), "invalid escape sequence");
4228e3e3a7aSWarner Losh             c = readdecesc(ls);  /* digital escape '\ddd' */
4238e3e3a7aSWarner Losh             goto only_save;
4248e3e3a7aSWarner Losh           }
4258e3e3a7aSWarner Losh         }
4268e3e3a7aSWarner Losh        read_save:
4278e3e3a7aSWarner Losh          next(ls);
4288e3e3a7aSWarner Losh          /* go through */
4298e3e3a7aSWarner Losh        only_save:
4308e3e3a7aSWarner Losh          luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
4318e3e3a7aSWarner Losh          save(ls, c);
4328e3e3a7aSWarner Losh          /* go through */
4338e3e3a7aSWarner Losh        no_save: break;
4348e3e3a7aSWarner Losh       }
4358e3e3a7aSWarner Losh       default:
4368e3e3a7aSWarner Losh         save_and_next(ls);
4378e3e3a7aSWarner Losh     }
4388e3e3a7aSWarner Losh   }
4398e3e3a7aSWarner Losh   save_and_next(ls);  /* skip delimiter */
4408e3e3a7aSWarner Losh   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
4418e3e3a7aSWarner Losh                                    luaZ_bufflen(ls->buff) - 2);
4428e3e3a7aSWarner Losh }
4438e3e3a7aSWarner Losh 
4448e3e3a7aSWarner Losh 
llex(LexState * ls,SemInfo * seminfo)4458e3e3a7aSWarner Losh static int llex (LexState *ls, SemInfo *seminfo) {
4468e3e3a7aSWarner Losh   luaZ_resetbuffer(ls->buff);
4478e3e3a7aSWarner Losh   for (;;) {
4488e3e3a7aSWarner Losh     switch (ls->current) {
4498e3e3a7aSWarner Losh       case '\n': case '\r': {  /* line breaks */
4508e3e3a7aSWarner Losh         inclinenumber(ls);
4518e3e3a7aSWarner Losh         break;
4528e3e3a7aSWarner Losh       }
4538e3e3a7aSWarner Losh       case ' ': case '\f': case '\t': case '\v': {  /* spaces */
4548e3e3a7aSWarner Losh         next(ls);
4558e3e3a7aSWarner Losh         break;
4568e3e3a7aSWarner Losh       }
4578e3e3a7aSWarner Losh       case '-': {  /* '-' or '--' (comment) */
4588e3e3a7aSWarner Losh         next(ls);
4598e3e3a7aSWarner Losh         if (ls->current != '-') return '-';
4608e3e3a7aSWarner Losh         /* else is a comment */
4618e3e3a7aSWarner Losh         next(ls);
4628e3e3a7aSWarner Losh         if (ls->current == '[') {  /* long comment? */
463bf9580a1SKyle Evans           size_t sep = skip_sep(ls);
4648e3e3a7aSWarner Losh           luaZ_resetbuffer(ls->buff);  /* 'skip_sep' may dirty the buffer */
465bf9580a1SKyle Evans           if (sep >= 2) {
4668e3e3a7aSWarner Losh             read_long_string(ls, NULL, sep);  /* skip long comment */
4678e3e3a7aSWarner Losh             luaZ_resetbuffer(ls->buff);  /* previous call may dirty the buff. */
4688e3e3a7aSWarner Losh             break;
4698e3e3a7aSWarner Losh           }
4708e3e3a7aSWarner Losh         }
4718e3e3a7aSWarner Losh         /* else short comment */
4728e3e3a7aSWarner Losh         while (!currIsNewline(ls) && ls->current != EOZ)
4738e3e3a7aSWarner Losh           next(ls);  /* skip until end of line (or end of file) */
4748e3e3a7aSWarner Losh         break;
4758e3e3a7aSWarner Losh       }
4768e3e3a7aSWarner Losh       case '[': {  /* long string or simply '[' */
477bf9580a1SKyle Evans         size_t sep = skip_sep(ls);
478bf9580a1SKyle Evans         if (sep >= 2) {
4798e3e3a7aSWarner Losh           read_long_string(ls, seminfo, sep);
4808e3e3a7aSWarner Losh           return TK_STRING;
4818e3e3a7aSWarner Losh         }
4820495ed39SKyle Evans         else if (sep == 0)  /* '[=...' missing second bracket? */
4838e3e3a7aSWarner Losh           lexerror(ls, "invalid long string delimiter", TK_STRING);
4848e3e3a7aSWarner Losh         return '[';
4858e3e3a7aSWarner Losh       }
4868e3e3a7aSWarner Losh       case '=': {
4878e3e3a7aSWarner Losh         next(ls);
4880495ed39SKyle Evans         if (check_next1(ls, '=')) return TK_EQ;  /* '==' */
4898e3e3a7aSWarner Losh         else return '=';
4908e3e3a7aSWarner Losh       }
4918e3e3a7aSWarner Losh       case '<': {
4928e3e3a7aSWarner Losh         next(ls);
4930495ed39SKyle Evans         if (check_next1(ls, '=')) return TK_LE;  /* '<=' */
4940495ed39SKyle Evans         else if (check_next1(ls, '<')) return TK_SHL;  /* '<<' */
4958e3e3a7aSWarner Losh         else return '<';
4968e3e3a7aSWarner Losh       }
4978e3e3a7aSWarner Losh       case '>': {
4988e3e3a7aSWarner Losh         next(ls);
4990495ed39SKyle Evans         if (check_next1(ls, '=')) return TK_GE;  /* '>=' */
5000495ed39SKyle Evans         else if (check_next1(ls, '>')) return TK_SHR;  /* '>>' */
5018e3e3a7aSWarner Losh         else return '>';
5028e3e3a7aSWarner Losh       }
5038e3e3a7aSWarner Losh       case '/': {
5048e3e3a7aSWarner Losh         next(ls);
5050495ed39SKyle Evans         if (check_next1(ls, '/')) return TK_IDIV;  /* '//' */
5068e3e3a7aSWarner Losh         else return '/';
5078e3e3a7aSWarner Losh       }
5088e3e3a7aSWarner Losh       case '~': {
5098e3e3a7aSWarner Losh         next(ls);
5100495ed39SKyle Evans         if (check_next1(ls, '=')) return TK_NE;  /* '~=' */
5118e3e3a7aSWarner Losh         else return '~';
5128e3e3a7aSWarner Losh       }
5138e3e3a7aSWarner Losh       case ':': {
5148e3e3a7aSWarner Losh         next(ls);
5150495ed39SKyle Evans         if (check_next1(ls, ':')) return TK_DBCOLON;  /* '::' */
5168e3e3a7aSWarner Losh         else return ':';
5178e3e3a7aSWarner Losh       }
5188e3e3a7aSWarner Losh       case '"': case '\'': {  /* short literal strings */
5198e3e3a7aSWarner Losh         read_string(ls, ls->current, seminfo);
5208e3e3a7aSWarner Losh         return TK_STRING;
5218e3e3a7aSWarner Losh       }
5228e3e3a7aSWarner Losh       case '.': {  /* '.', '..', '...', or number */
5238e3e3a7aSWarner Losh         save_and_next(ls);
5248e3e3a7aSWarner Losh         if (check_next1(ls, '.')) {
5258e3e3a7aSWarner Losh           if (check_next1(ls, '.'))
5268e3e3a7aSWarner Losh             return TK_DOTS;   /* '...' */
5278e3e3a7aSWarner Losh           else return TK_CONCAT;   /* '..' */
5288e3e3a7aSWarner Losh         }
5298e3e3a7aSWarner Losh         else if (!lisdigit(ls->current)) return '.';
5308e3e3a7aSWarner Losh         else return read_numeral(ls, seminfo);
5318e3e3a7aSWarner Losh       }
5328e3e3a7aSWarner Losh       case '0': case '1': case '2': case '3': case '4':
5338e3e3a7aSWarner Losh       case '5': case '6': case '7': case '8': case '9': {
5348e3e3a7aSWarner Losh         return read_numeral(ls, seminfo);
5358e3e3a7aSWarner Losh       }
5368e3e3a7aSWarner Losh       case EOZ: {
5378e3e3a7aSWarner Losh         return TK_EOS;
5388e3e3a7aSWarner Losh       }
5398e3e3a7aSWarner Losh       default: {
5408e3e3a7aSWarner Losh         if (lislalpha(ls->current)) {  /* identifier or reserved word? */
5418e3e3a7aSWarner Losh           TString *ts;
5428e3e3a7aSWarner Losh           do {
5438e3e3a7aSWarner Losh             save_and_next(ls);
5448e3e3a7aSWarner Losh           } while (lislalnum(ls->current));
5458e3e3a7aSWarner Losh           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
5468e3e3a7aSWarner Losh                                   luaZ_bufflen(ls->buff));
5478e3e3a7aSWarner Losh           seminfo->ts = ts;
5488e3e3a7aSWarner Losh           if (isreserved(ts))  /* reserved word? */
5498e3e3a7aSWarner Losh             return ts->extra - 1 + FIRST_RESERVED;
5508e3e3a7aSWarner Losh           else {
5518e3e3a7aSWarner Losh             return TK_NAME;
5528e3e3a7aSWarner Losh           }
5538e3e3a7aSWarner Losh         }
5540495ed39SKyle Evans         else {  /* single-char tokens ('+', '*', '%', '{', '}', ...) */
5558e3e3a7aSWarner Losh           int c = ls->current;
5568e3e3a7aSWarner Losh           next(ls);
5578e3e3a7aSWarner Losh           return c;
5588e3e3a7aSWarner Losh         }
5598e3e3a7aSWarner Losh       }
5608e3e3a7aSWarner Losh     }
5618e3e3a7aSWarner Losh   }
5628e3e3a7aSWarner Losh }
5638e3e3a7aSWarner Losh 
5648e3e3a7aSWarner Losh 
luaX_next(LexState * ls)5658e3e3a7aSWarner Losh void luaX_next (LexState *ls) {
5668e3e3a7aSWarner Losh   ls->lastline = ls->linenumber;
5678e3e3a7aSWarner Losh   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
5688e3e3a7aSWarner Losh     ls->t = ls->lookahead;  /* use this one */
5698e3e3a7aSWarner Losh     ls->lookahead.token = TK_EOS;  /* and discharge it */
5708e3e3a7aSWarner Losh   }
5718e3e3a7aSWarner Losh   else
5728e3e3a7aSWarner Losh     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
5738e3e3a7aSWarner Losh }
5748e3e3a7aSWarner Losh 
5758e3e3a7aSWarner Losh 
luaX_lookahead(LexState * ls)5768e3e3a7aSWarner Losh int luaX_lookahead (LexState *ls) {
5778e3e3a7aSWarner Losh   lua_assert(ls->lookahead.token == TK_EOS);
5788e3e3a7aSWarner Losh   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
5798e3e3a7aSWarner Losh   return ls->lookahead.token;
5808e3e3a7aSWarner Losh }
5818e3e3a7aSWarner Losh 
582