18e3e3a7aSWarner Losh /*
20495ed39SKyle Evans ** $Id: llex.c $
38e3e3a7aSWarner Losh ** Lexical Analyzer
48e3e3a7aSWarner Losh ** See Copyright Notice in lua.h
58e3e3a7aSWarner Losh */
68e3e3a7aSWarner Losh
78e3e3a7aSWarner Losh #define llex_c
88e3e3a7aSWarner Losh #define LUA_CORE
98e3e3a7aSWarner Losh
108e3e3a7aSWarner Losh #include "lprefix.h"
118e3e3a7aSWarner Losh
128e3e3a7aSWarner Losh
138e3e3a7aSWarner Losh #include <locale.h>
148e3e3a7aSWarner Losh #include <string.h>
158e3e3a7aSWarner Losh
168e3e3a7aSWarner Losh #include "lua.h"
178e3e3a7aSWarner Losh
188e3e3a7aSWarner Losh #include "lctype.h"
198e3e3a7aSWarner Losh #include "ldebug.h"
208e3e3a7aSWarner Losh #include "ldo.h"
218e3e3a7aSWarner Losh #include "lgc.h"
228e3e3a7aSWarner Losh #include "llex.h"
238e3e3a7aSWarner Losh #include "lobject.h"
248e3e3a7aSWarner Losh #include "lparser.h"
258e3e3a7aSWarner Losh #include "lstate.h"
268e3e3a7aSWarner Losh #include "lstring.h"
278e3e3a7aSWarner Losh #include "ltable.h"
288e3e3a7aSWarner Losh #include "lzio.h"
298e3e3a7aSWarner Losh
308e3e3a7aSWarner Losh
318e3e3a7aSWarner Losh
328e3e3a7aSWarner Losh #define next(ls) (ls->current = zgetc(ls->z))
338e3e3a7aSWarner Losh
348e3e3a7aSWarner Losh
358e3e3a7aSWarner Losh
368e3e3a7aSWarner Losh #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
378e3e3a7aSWarner Losh
388e3e3a7aSWarner Losh
398e3e3a7aSWarner Losh /* ORDER RESERVED */
408e3e3a7aSWarner Losh static const char *const luaX_tokens [] = {
418e3e3a7aSWarner Losh "and", "break", "do", "else", "elseif",
428e3e3a7aSWarner Losh "end", "false", "for", "function", "goto", "if",
438e3e3a7aSWarner Losh "in", "local", "nil", "not", "or", "repeat",
448e3e3a7aSWarner Losh "return", "then", "true", "until", "while",
458e3e3a7aSWarner Losh "//", "..", "...", "==", ">=", "<=", "~=",
468e3e3a7aSWarner Losh "<<", ">>", "::", "<eof>",
478e3e3a7aSWarner Losh "<number>", "<integer>", "<name>", "<string>"
488e3e3a7aSWarner Losh };
498e3e3a7aSWarner Losh
508e3e3a7aSWarner Losh
518e3e3a7aSWarner Losh #define save_and_next(ls) (save(ls, ls->current), next(ls))
528e3e3a7aSWarner Losh
538e3e3a7aSWarner Losh
548e3e3a7aSWarner Losh static l_noret lexerror (LexState *ls, const char *msg, int token);
558e3e3a7aSWarner Losh
568e3e3a7aSWarner Losh
save(LexState * ls,int c)578e3e3a7aSWarner Losh static void save (LexState *ls, int c) {
588e3e3a7aSWarner Losh Mbuffer *b = ls->buff;
598e3e3a7aSWarner Losh if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
608e3e3a7aSWarner Losh size_t newsize;
618e3e3a7aSWarner Losh if (luaZ_sizebuffer(b) >= MAX_SIZE/2)
628e3e3a7aSWarner Losh lexerror(ls, "lexical element too long", 0);
638e3e3a7aSWarner Losh newsize = luaZ_sizebuffer(b) * 2;
648e3e3a7aSWarner Losh luaZ_resizebuffer(ls->L, b, newsize);
658e3e3a7aSWarner Losh }
660495ed39SKyle Evans b->buffer[luaZ_bufflen(b)++] = cast_char(c);
678e3e3a7aSWarner Losh }
688e3e3a7aSWarner Losh
698e3e3a7aSWarner Losh
luaX_init(lua_State * L)708e3e3a7aSWarner Losh void luaX_init (lua_State *L) {
718e3e3a7aSWarner Losh int i;
728e3e3a7aSWarner Losh TString *e = luaS_newliteral(L, LUA_ENV); /* create env name */
738e3e3a7aSWarner Losh luaC_fix(L, obj2gco(e)); /* never collect this name */
748e3e3a7aSWarner Losh for (i=0; i<NUM_RESERVED; i++) {
758e3e3a7aSWarner Losh TString *ts = luaS_new(L, luaX_tokens[i]);
768e3e3a7aSWarner Losh luaC_fix(L, obj2gco(ts)); /* reserved words are never collected */
778e3e3a7aSWarner Losh ts->extra = cast_byte(i+1); /* reserved word */
788e3e3a7aSWarner Losh }
798e3e3a7aSWarner Losh }
808e3e3a7aSWarner Losh
818e3e3a7aSWarner Losh
luaX_token2str(LexState * ls,int token)828e3e3a7aSWarner Losh const char *luaX_token2str (LexState *ls, int token) {
838e3e3a7aSWarner Losh if (token < FIRST_RESERVED) { /* single-byte symbols? */
840495ed39SKyle Evans if (lisprint(token))
858e3e3a7aSWarner Losh return luaO_pushfstring(ls->L, "'%c'", token);
860495ed39SKyle Evans else /* control character */
870495ed39SKyle Evans return luaO_pushfstring(ls->L, "'<\\%d>'", token);
888e3e3a7aSWarner Losh }
898e3e3a7aSWarner Losh else {
908e3e3a7aSWarner Losh const char *s = luaX_tokens[token - FIRST_RESERVED];
918e3e3a7aSWarner Losh if (token < TK_EOS) /* fixed format (symbols and reserved words)? */
928e3e3a7aSWarner Losh return luaO_pushfstring(ls->L, "'%s'", s);
938e3e3a7aSWarner Losh else /* names, strings, and numerals */
948e3e3a7aSWarner Losh return s;
958e3e3a7aSWarner Losh }
968e3e3a7aSWarner Losh }
978e3e3a7aSWarner Losh
988e3e3a7aSWarner Losh
txtToken(LexState * ls,int token)998e3e3a7aSWarner Losh static const char *txtToken (LexState *ls, int token) {
1008e3e3a7aSWarner Losh switch (token) {
1018e3e3a7aSWarner Losh case TK_NAME: case TK_STRING:
1028e3e3a7aSWarner Losh case TK_FLT: case TK_INT:
1038e3e3a7aSWarner Losh save(ls, '\0');
1048e3e3a7aSWarner Losh return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
1058e3e3a7aSWarner Losh default:
1068e3e3a7aSWarner Losh return luaX_token2str(ls, token);
1078e3e3a7aSWarner Losh }
1088e3e3a7aSWarner Losh }
1098e3e3a7aSWarner Losh
1108e3e3a7aSWarner Losh
lexerror(LexState * ls,const char * msg,int token)1118e3e3a7aSWarner Losh static l_noret lexerror (LexState *ls, const char *msg, int token) {
1128e3e3a7aSWarner Losh msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
1138e3e3a7aSWarner Losh if (token)
1148e3e3a7aSWarner Losh luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
1158e3e3a7aSWarner Losh luaD_throw(ls->L, LUA_ERRSYNTAX);
1168e3e3a7aSWarner Losh }
1178e3e3a7aSWarner Losh
1188e3e3a7aSWarner Losh
luaX_syntaxerror(LexState * ls,const char * msg)1198e3e3a7aSWarner Losh l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
1208e3e3a7aSWarner Losh lexerror(ls, msg, ls->t.token);
1218e3e3a7aSWarner Losh }
1228e3e3a7aSWarner Losh
1238e3e3a7aSWarner Losh
1248e3e3a7aSWarner Losh /*
1258c784bb8SWarner Losh ** Creates a new string and anchors it in scanner's table so that it
1268c784bb8SWarner Losh ** will not be collected until the end of the compilation; by that time
1278c784bb8SWarner Losh ** it should be anchored somewhere. It also internalizes long strings,
1288c784bb8SWarner Losh ** ensuring there is only one copy of each unique string. The table
1298c784bb8SWarner Losh ** here is used as a set: the string enters as the key, while its value
1308c784bb8SWarner Losh ** is irrelevant. We use the string itself as the value only because it
131*a9490b81SWarner Losh ** is a TValue readily available. Later, the code generation can change
1328c784bb8SWarner Losh ** this value.
1338e3e3a7aSWarner Losh */
luaX_newstring(LexState * ls,const char * str,size_t l)1348e3e3a7aSWarner Losh TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
1358e3e3a7aSWarner Losh lua_State *L = ls->L;
1368e3e3a7aSWarner Losh TString *ts = luaS_newlstr(L, str, l); /* create new string */
1378c784bb8SWarner Losh const TValue *o = luaH_getstr(ls->h, ts);
1388c784bb8SWarner Losh if (!ttisnil(o)) /* string already present? */
1398c784bb8SWarner Losh ts = keystrval(nodefromval(o)); /* get saved copy */
1408c784bb8SWarner Losh else { /* not in use yet */
141*a9490b81SWarner Losh TValue *stv = s2v(L->top.p++); /* reserve stack space for string */
1428c784bb8SWarner Losh setsvalue(L, stv, ts); /* temporarily anchor the string */
1438c784bb8SWarner Losh luaH_finishset(L, ls->h, stv, o, stv); /* t[string] = string */
1448c784bb8SWarner Losh /* table is not a metatable, so it does not need to invalidate cache */
1458e3e3a7aSWarner Losh luaC_checkGC(L);
146*a9490b81SWarner Losh L->top.p--; /* remove string from stack */
1478c784bb8SWarner Losh }
1488e3e3a7aSWarner Losh return ts;
1498e3e3a7aSWarner Losh }
1508e3e3a7aSWarner Losh
1518e3e3a7aSWarner Losh
1528e3e3a7aSWarner Losh /*
1538e3e3a7aSWarner Losh ** increment line number and skips newline sequence (any of
1548e3e3a7aSWarner Losh ** \n, \r, \n\r, or \r\n)
1558e3e3a7aSWarner Losh */
inclinenumber(LexState * ls)1568e3e3a7aSWarner Losh static void inclinenumber (LexState *ls) {
1578e3e3a7aSWarner Losh int old = ls->current;
1588e3e3a7aSWarner Losh lua_assert(currIsNewline(ls));
1598e3e3a7aSWarner Losh next(ls); /* skip '\n' or '\r' */
1608e3e3a7aSWarner Losh if (currIsNewline(ls) && ls->current != old)
1618e3e3a7aSWarner Losh next(ls); /* skip '\n\r' or '\r\n' */
1628e3e3a7aSWarner Losh if (++ls->linenumber >= MAX_INT)
1638e3e3a7aSWarner Losh lexerror(ls, "chunk has too many lines", 0);
1648e3e3a7aSWarner Losh }
1658e3e3a7aSWarner Losh
1668e3e3a7aSWarner Losh
luaX_setinput(lua_State * L,LexState * ls,ZIO * z,TString * source,int firstchar)1678e3e3a7aSWarner Losh void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
1688e3e3a7aSWarner Losh int firstchar) {
1698e3e3a7aSWarner Losh ls->t.token = 0;
1708e3e3a7aSWarner Losh ls->L = L;
1718e3e3a7aSWarner Losh ls->current = firstchar;
1728e3e3a7aSWarner Losh ls->lookahead.token = TK_EOS; /* no look-ahead token */
1738e3e3a7aSWarner Losh ls->z = z;
1748e3e3a7aSWarner Losh ls->fs = NULL;
1758e3e3a7aSWarner Losh ls->linenumber = 1;
1768e3e3a7aSWarner Losh ls->lastline = 1;
1778e3e3a7aSWarner Losh ls->source = source;
1788e3e3a7aSWarner Losh ls->envn = luaS_newliteral(L, LUA_ENV); /* get env name */
1798e3e3a7aSWarner Losh luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */
1808e3e3a7aSWarner Losh }
1818e3e3a7aSWarner Losh
1828e3e3a7aSWarner Losh
1838e3e3a7aSWarner Losh
1848e3e3a7aSWarner Losh /*
1858e3e3a7aSWarner Losh ** =======================================================
1868e3e3a7aSWarner Losh ** LEXICAL ANALYZER
1878e3e3a7aSWarner Losh ** =======================================================
1888e3e3a7aSWarner Losh */
1898e3e3a7aSWarner Losh
1908e3e3a7aSWarner Losh
check_next1(LexState * ls,int c)1918e3e3a7aSWarner Losh static int check_next1 (LexState *ls, int c) {
1928e3e3a7aSWarner Losh if (ls->current == c) {
1938e3e3a7aSWarner Losh next(ls);
1948e3e3a7aSWarner Losh return 1;
1958e3e3a7aSWarner Losh }
1968e3e3a7aSWarner Losh else return 0;
1978e3e3a7aSWarner Losh }
1988e3e3a7aSWarner Losh
1998e3e3a7aSWarner Losh
2008e3e3a7aSWarner Losh /*
2018e3e3a7aSWarner Losh ** Check whether current char is in set 'set' (with two chars) and
2028e3e3a7aSWarner Losh ** saves it
2038e3e3a7aSWarner Losh */
check_next2(LexState * ls,const char * set)2048e3e3a7aSWarner Losh static int check_next2 (LexState *ls, const char *set) {
2058e3e3a7aSWarner Losh lua_assert(set[2] == '\0');
2068e3e3a7aSWarner Losh if (ls->current == set[0] || ls->current == set[1]) {
2078e3e3a7aSWarner Losh save_and_next(ls);
2088e3e3a7aSWarner Losh return 1;
2098e3e3a7aSWarner Losh }
2108e3e3a7aSWarner Losh else return 0;
2118e3e3a7aSWarner Losh }
2128e3e3a7aSWarner Losh
2138e3e3a7aSWarner Losh
2148e3e3a7aSWarner Losh /* LUA_NUMBER */
2158e3e3a7aSWarner Losh /*
2160495ed39SKyle Evans ** This function is quite liberal in what it accepts, as 'luaO_str2num'
2170495ed39SKyle Evans ** will reject ill-formed numerals. Roughly, it accepts the following
2180495ed39SKyle Evans ** pattern:
2190495ed39SKyle Evans **
2200495ed39SKyle Evans ** %d(%x|%.|([Ee][+-]?))* | 0[Xx](%x|%.|([Pp][+-]?))*
2210495ed39SKyle Evans **
2220495ed39SKyle Evans ** The only tricky part is to accept [+-] only after a valid exponent
2230495ed39SKyle Evans ** mark, to avoid reading '3-4' or '0xe+1' as a single number.
2240495ed39SKyle Evans **
2250495ed39SKyle Evans ** The caller might have already read an initial dot.
2268e3e3a7aSWarner Losh */
read_numeral(LexState * ls,SemInfo * seminfo)2278e3e3a7aSWarner Losh static int read_numeral (LexState *ls, SemInfo *seminfo) {
2288e3e3a7aSWarner Losh TValue obj;
2298e3e3a7aSWarner Losh const char *expo = "Ee";
2308e3e3a7aSWarner Losh int first = ls->current;
2318e3e3a7aSWarner Losh lua_assert(lisdigit(ls->current));
2328e3e3a7aSWarner Losh save_and_next(ls);
2338e3e3a7aSWarner Losh if (first == '0' && check_next2(ls, "xX")) /* hexadecimal? */
2348e3e3a7aSWarner Losh expo = "Pp";
2358e3e3a7aSWarner Losh for (;;) {
2360495ed39SKyle Evans if (check_next2(ls, expo)) /* exponent mark? */
2378e3e3a7aSWarner Losh check_next2(ls, "-+"); /* optional exponent sign */
2380495ed39SKyle Evans else if (lisxdigit(ls->current) || ls->current == '.') /* '%x|%.' */
2398e3e3a7aSWarner Losh save_and_next(ls);
2408e3e3a7aSWarner Losh else break;
2418e3e3a7aSWarner Losh }
2420495ed39SKyle Evans if (lislalpha(ls->current)) /* is numeral touching a letter? */
2430495ed39SKyle Evans save_and_next(ls); /* force an error */
2448e3e3a7aSWarner Losh save(ls, '\0');
2458e3e3a7aSWarner Losh if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0) /* format error? */
2468e3e3a7aSWarner Losh lexerror(ls, "malformed number", TK_FLT);
2478e3e3a7aSWarner Losh if (ttisinteger(&obj)) {
2488e3e3a7aSWarner Losh seminfo->i = ivalue(&obj);
2498e3e3a7aSWarner Losh return TK_INT;
2508e3e3a7aSWarner Losh }
2518e3e3a7aSWarner Losh else {
2528e3e3a7aSWarner Losh lua_assert(ttisfloat(&obj));
2538e3e3a7aSWarner Losh seminfo->r = fltvalue(&obj);
2548e3e3a7aSWarner Losh return TK_FLT;
2558e3e3a7aSWarner Losh }
2568e3e3a7aSWarner Losh }
2578e3e3a7aSWarner Losh
2588e3e3a7aSWarner Losh
2598e3e3a7aSWarner Losh /*
2600495ed39SKyle Evans ** read a sequence '[=*[' or ']=*]', leaving the last bracket. If
2610495ed39SKyle Evans ** sequence is well formed, return its number of '='s + 2; otherwise,
2620495ed39SKyle Evans ** return 1 if it is a single bracket (no '='s and no 2nd bracket);
2630495ed39SKyle Evans ** otherwise (an unfinished '[==...') return 0.
2648e3e3a7aSWarner Losh */
skip_sep(LexState * ls)265bf9580a1SKyle Evans static size_t skip_sep (LexState *ls) {
266bf9580a1SKyle Evans size_t count = 0;
2678e3e3a7aSWarner Losh int s = ls->current;
2688e3e3a7aSWarner Losh lua_assert(s == '[' || s == ']');
2698e3e3a7aSWarner Losh save_and_next(ls);
2708e3e3a7aSWarner Losh while (ls->current == '=') {
2718e3e3a7aSWarner Losh save_and_next(ls);
2728e3e3a7aSWarner Losh count++;
2738e3e3a7aSWarner Losh }
274bf9580a1SKyle Evans return (ls->current == s) ? count + 2
275bf9580a1SKyle Evans : (count == 0) ? 1
276bf9580a1SKyle Evans : 0;
2778e3e3a7aSWarner Losh }
2788e3e3a7aSWarner Losh
2798e3e3a7aSWarner Losh
read_long_string(LexState * ls,SemInfo * seminfo,size_t sep)280bf9580a1SKyle Evans static void read_long_string (LexState *ls, SemInfo *seminfo, size_t sep) {
2818e3e3a7aSWarner Losh int line = ls->linenumber; /* initial line (for error message) */
2828e3e3a7aSWarner Losh save_and_next(ls); /* skip 2nd '[' */
2838e3e3a7aSWarner Losh if (currIsNewline(ls)) /* string starts with a newline? */
2848e3e3a7aSWarner Losh inclinenumber(ls); /* skip it */
2858e3e3a7aSWarner Losh for (;;) {
2868e3e3a7aSWarner Losh switch (ls->current) {
2878e3e3a7aSWarner Losh case EOZ: { /* error */
2888e3e3a7aSWarner Losh const char *what = (seminfo ? "string" : "comment");
2898e3e3a7aSWarner Losh const char *msg = luaO_pushfstring(ls->L,
2908e3e3a7aSWarner Losh "unfinished long %s (starting at line %d)", what, line);
2918e3e3a7aSWarner Losh lexerror(ls, msg, TK_EOS);
2928e3e3a7aSWarner Losh break; /* to avoid warnings */
2938e3e3a7aSWarner Losh }
2948e3e3a7aSWarner Losh case ']': {
2958e3e3a7aSWarner Losh if (skip_sep(ls) == sep) {
2968e3e3a7aSWarner Losh save_and_next(ls); /* skip 2nd ']' */
2978e3e3a7aSWarner Losh goto endloop;
2988e3e3a7aSWarner Losh }
2998e3e3a7aSWarner Losh break;
3008e3e3a7aSWarner Losh }
3018e3e3a7aSWarner Losh case '\n': case '\r': {
3028e3e3a7aSWarner Losh save(ls, '\n');
3038e3e3a7aSWarner Losh inclinenumber(ls);
3048e3e3a7aSWarner Losh if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */
3058e3e3a7aSWarner Losh break;
3068e3e3a7aSWarner Losh }
3078e3e3a7aSWarner Losh default: {
3088e3e3a7aSWarner Losh if (seminfo) save_and_next(ls);
3098e3e3a7aSWarner Losh else next(ls);
3108e3e3a7aSWarner Losh }
3118e3e3a7aSWarner Losh }
3128e3e3a7aSWarner Losh } endloop:
3138e3e3a7aSWarner Losh if (seminfo)
314bf9580a1SKyle Evans seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + sep,
315bf9580a1SKyle Evans luaZ_bufflen(ls->buff) - 2 * sep);
3168e3e3a7aSWarner Losh }
3178e3e3a7aSWarner Losh
3188e3e3a7aSWarner Losh
esccheck(LexState * ls,int c,const char * msg)3198e3e3a7aSWarner Losh static void esccheck (LexState *ls, int c, const char *msg) {
3208e3e3a7aSWarner Losh if (!c) {
3218e3e3a7aSWarner Losh if (ls->current != EOZ)
3228e3e3a7aSWarner Losh save_and_next(ls); /* add current to buffer for error message */
3238e3e3a7aSWarner Losh lexerror(ls, msg, TK_STRING);
3248e3e3a7aSWarner Losh }
3258e3e3a7aSWarner Losh }
3268e3e3a7aSWarner Losh
3278e3e3a7aSWarner Losh
gethexa(LexState * ls)3288e3e3a7aSWarner Losh static int gethexa (LexState *ls) {
3298e3e3a7aSWarner Losh save_and_next(ls);
3308e3e3a7aSWarner Losh esccheck (ls, lisxdigit(ls->current), "hexadecimal digit expected");
3318e3e3a7aSWarner Losh return luaO_hexavalue(ls->current);
3328e3e3a7aSWarner Losh }
3338e3e3a7aSWarner Losh
3348e3e3a7aSWarner Losh
readhexaesc(LexState * ls)3358e3e3a7aSWarner Losh static int readhexaesc (LexState *ls) {
3368e3e3a7aSWarner Losh int r = gethexa(ls);
3378e3e3a7aSWarner Losh r = (r << 4) + gethexa(ls);
3388e3e3a7aSWarner Losh luaZ_buffremove(ls->buff, 2); /* remove saved chars from buffer */
3398e3e3a7aSWarner Losh return r;
3408e3e3a7aSWarner Losh }
3418e3e3a7aSWarner Losh
3428e3e3a7aSWarner Losh
readutf8esc(LexState * ls)3438e3e3a7aSWarner Losh static unsigned long readutf8esc (LexState *ls) {
3448e3e3a7aSWarner Losh unsigned long r;
3458e3e3a7aSWarner Losh int i = 4; /* chars to be removed: '\', 'u', '{', and first digit */
3468e3e3a7aSWarner Losh save_and_next(ls); /* skip 'u' */
3478e3e3a7aSWarner Losh esccheck(ls, ls->current == '{', "missing '{'");
3488e3e3a7aSWarner Losh r = gethexa(ls); /* must have at least one digit */
3490495ed39SKyle Evans while (cast_void(save_and_next(ls)), lisxdigit(ls->current)) {
3508e3e3a7aSWarner Losh i++;
3510495ed39SKyle Evans esccheck(ls, r <= (0x7FFFFFFFu >> 4), "UTF-8 value too large");
3528e3e3a7aSWarner Losh r = (r << 4) + luaO_hexavalue(ls->current);
3538e3e3a7aSWarner Losh }
3548e3e3a7aSWarner Losh esccheck(ls, ls->current == '}', "missing '}'");
3558e3e3a7aSWarner Losh next(ls); /* skip '}' */
3568e3e3a7aSWarner Losh luaZ_buffremove(ls->buff, i); /* remove saved chars from buffer */
3578e3e3a7aSWarner Losh return r;
3588e3e3a7aSWarner Losh }
3598e3e3a7aSWarner Losh
3608e3e3a7aSWarner Losh
utf8esc(LexState * ls)3618e3e3a7aSWarner Losh static void utf8esc (LexState *ls) {
3628e3e3a7aSWarner Losh char buff[UTF8BUFFSZ];
3638e3e3a7aSWarner Losh int n = luaO_utf8esc(buff, readutf8esc(ls));
3648e3e3a7aSWarner Losh for (; n > 0; n--) /* add 'buff' to string */
3658e3e3a7aSWarner Losh save(ls, buff[UTF8BUFFSZ - n]);
3668e3e3a7aSWarner Losh }
3678e3e3a7aSWarner Losh
3688e3e3a7aSWarner Losh
readdecesc(LexState * ls)3698e3e3a7aSWarner Losh static int readdecesc (LexState *ls) {
3708e3e3a7aSWarner Losh int i;
3718e3e3a7aSWarner Losh int r = 0; /* result accumulator */
3728e3e3a7aSWarner Losh for (i = 0; i < 3 && lisdigit(ls->current); i++) { /* read up to 3 digits */
3738e3e3a7aSWarner Losh r = 10*r + ls->current - '0';
3748e3e3a7aSWarner Losh save_and_next(ls);
3758e3e3a7aSWarner Losh }
3768e3e3a7aSWarner Losh esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
3778e3e3a7aSWarner Losh luaZ_buffremove(ls->buff, i); /* remove read digits from buffer */
3788e3e3a7aSWarner Losh return r;
3798e3e3a7aSWarner Losh }
3808e3e3a7aSWarner Losh
3818e3e3a7aSWarner Losh
read_string(LexState * ls,int del,SemInfo * seminfo)3828e3e3a7aSWarner Losh static void read_string (LexState *ls, int del, SemInfo *seminfo) {
3838e3e3a7aSWarner Losh save_and_next(ls); /* keep delimiter (for error messages) */
3848e3e3a7aSWarner Losh while (ls->current != del) {
3858e3e3a7aSWarner Losh switch (ls->current) {
3868e3e3a7aSWarner Losh case EOZ:
3878e3e3a7aSWarner Losh lexerror(ls, "unfinished string", TK_EOS);
3888e3e3a7aSWarner Losh break; /* to avoid warnings */
3898e3e3a7aSWarner Losh case '\n':
3908e3e3a7aSWarner Losh case '\r':
3918e3e3a7aSWarner Losh lexerror(ls, "unfinished string", TK_STRING);
3928e3e3a7aSWarner Losh break; /* to avoid warnings */
3938e3e3a7aSWarner Losh case '\\': { /* escape sequences */
3948e3e3a7aSWarner Losh int c; /* final character to be saved */
3958e3e3a7aSWarner Losh save_and_next(ls); /* keep '\\' for error messages */
3968e3e3a7aSWarner Losh switch (ls->current) {
3978e3e3a7aSWarner Losh case 'a': c = '\a'; goto read_save;
3988e3e3a7aSWarner Losh case 'b': c = '\b'; goto read_save;
3998e3e3a7aSWarner Losh case 'f': c = '\f'; goto read_save;
4008e3e3a7aSWarner Losh case 'n': c = '\n'; goto read_save;
4018e3e3a7aSWarner Losh case 'r': c = '\r'; goto read_save;
4028e3e3a7aSWarner Losh case 't': c = '\t'; goto read_save;
4038e3e3a7aSWarner Losh case 'v': c = '\v'; goto read_save;
4048e3e3a7aSWarner Losh case 'x': c = readhexaesc(ls); goto read_save;
4058e3e3a7aSWarner Losh case 'u': utf8esc(ls); goto no_save;
4068e3e3a7aSWarner Losh case '\n': case '\r':
4078e3e3a7aSWarner Losh inclinenumber(ls); c = '\n'; goto only_save;
4088e3e3a7aSWarner Losh case '\\': case '\"': case '\'':
4098e3e3a7aSWarner Losh c = ls->current; goto read_save;
4108e3e3a7aSWarner Losh case EOZ: goto no_save; /* will raise an error next loop */
4118e3e3a7aSWarner Losh case 'z': { /* zap following span of spaces */
4128e3e3a7aSWarner Losh luaZ_buffremove(ls->buff, 1); /* remove '\\' */
4138e3e3a7aSWarner Losh next(ls); /* skip the 'z' */
4148e3e3a7aSWarner Losh while (lisspace(ls->current)) {
4158e3e3a7aSWarner Losh if (currIsNewline(ls)) inclinenumber(ls);
4168e3e3a7aSWarner Losh else next(ls);
4178e3e3a7aSWarner Losh }
4188e3e3a7aSWarner Losh goto no_save;
4198e3e3a7aSWarner Losh }
4208e3e3a7aSWarner Losh default: {
4218e3e3a7aSWarner Losh esccheck(ls, lisdigit(ls->current), "invalid escape sequence");
4228e3e3a7aSWarner Losh c = readdecesc(ls); /* digital escape '\ddd' */
4238e3e3a7aSWarner Losh goto only_save;
4248e3e3a7aSWarner Losh }
4258e3e3a7aSWarner Losh }
4268e3e3a7aSWarner Losh read_save:
4278e3e3a7aSWarner Losh next(ls);
4288e3e3a7aSWarner Losh /* go through */
4298e3e3a7aSWarner Losh only_save:
4308e3e3a7aSWarner Losh luaZ_buffremove(ls->buff, 1); /* remove '\\' */
4318e3e3a7aSWarner Losh save(ls, c);
4328e3e3a7aSWarner Losh /* go through */
4338e3e3a7aSWarner Losh no_save: break;
4348e3e3a7aSWarner Losh }
4358e3e3a7aSWarner Losh default:
4368e3e3a7aSWarner Losh save_and_next(ls);
4378e3e3a7aSWarner Losh }
4388e3e3a7aSWarner Losh }
4398e3e3a7aSWarner Losh save_and_next(ls); /* skip delimiter */
4408e3e3a7aSWarner Losh seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
4418e3e3a7aSWarner Losh luaZ_bufflen(ls->buff) - 2);
4428e3e3a7aSWarner Losh }
4438e3e3a7aSWarner Losh
4448e3e3a7aSWarner Losh
llex(LexState * ls,SemInfo * seminfo)4458e3e3a7aSWarner Losh static int llex (LexState *ls, SemInfo *seminfo) {
4468e3e3a7aSWarner Losh luaZ_resetbuffer(ls->buff);
4478e3e3a7aSWarner Losh for (;;) {
4488e3e3a7aSWarner Losh switch (ls->current) {
4498e3e3a7aSWarner Losh case '\n': case '\r': { /* line breaks */
4508e3e3a7aSWarner Losh inclinenumber(ls);
4518e3e3a7aSWarner Losh break;
4528e3e3a7aSWarner Losh }
4538e3e3a7aSWarner Losh case ' ': case '\f': case '\t': case '\v': { /* spaces */
4548e3e3a7aSWarner Losh next(ls);
4558e3e3a7aSWarner Losh break;
4568e3e3a7aSWarner Losh }
4578e3e3a7aSWarner Losh case '-': { /* '-' or '--' (comment) */
4588e3e3a7aSWarner Losh next(ls);
4598e3e3a7aSWarner Losh if (ls->current != '-') return '-';
4608e3e3a7aSWarner Losh /* else is a comment */
4618e3e3a7aSWarner Losh next(ls);
4628e3e3a7aSWarner Losh if (ls->current == '[') { /* long comment? */
463bf9580a1SKyle Evans size_t sep = skip_sep(ls);
4648e3e3a7aSWarner Losh luaZ_resetbuffer(ls->buff); /* 'skip_sep' may dirty the buffer */
465bf9580a1SKyle Evans if (sep >= 2) {
4668e3e3a7aSWarner Losh read_long_string(ls, NULL, sep); /* skip long comment */
4678e3e3a7aSWarner Losh luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */
4688e3e3a7aSWarner Losh break;
4698e3e3a7aSWarner Losh }
4708e3e3a7aSWarner Losh }
4718e3e3a7aSWarner Losh /* else short comment */
4728e3e3a7aSWarner Losh while (!currIsNewline(ls) && ls->current != EOZ)
4738e3e3a7aSWarner Losh next(ls); /* skip until end of line (or end of file) */
4748e3e3a7aSWarner Losh break;
4758e3e3a7aSWarner Losh }
4768e3e3a7aSWarner Losh case '[': { /* long string or simply '[' */
477bf9580a1SKyle Evans size_t sep = skip_sep(ls);
478bf9580a1SKyle Evans if (sep >= 2) {
4798e3e3a7aSWarner Losh read_long_string(ls, seminfo, sep);
4808e3e3a7aSWarner Losh return TK_STRING;
4818e3e3a7aSWarner Losh }
4820495ed39SKyle Evans else if (sep == 0) /* '[=...' missing second bracket? */
4838e3e3a7aSWarner Losh lexerror(ls, "invalid long string delimiter", TK_STRING);
4848e3e3a7aSWarner Losh return '[';
4858e3e3a7aSWarner Losh }
4868e3e3a7aSWarner Losh case '=': {
4878e3e3a7aSWarner Losh next(ls);
4880495ed39SKyle Evans if (check_next1(ls, '=')) return TK_EQ; /* '==' */
4898e3e3a7aSWarner Losh else return '=';
4908e3e3a7aSWarner Losh }
4918e3e3a7aSWarner Losh case '<': {
4928e3e3a7aSWarner Losh next(ls);
4930495ed39SKyle Evans if (check_next1(ls, '=')) return TK_LE; /* '<=' */
4940495ed39SKyle Evans else if (check_next1(ls, '<')) return TK_SHL; /* '<<' */
4958e3e3a7aSWarner Losh else return '<';
4968e3e3a7aSWarner Losh }
4978e3e3a7aSWarner Losh case '>': {
4988e3e3a7aSWarner Losh next(ls);
4990495ed39SKyle Evans if (check_next1(ls, '=')) return TK_GE; /* '>=' */
5000495ed39SKyle Evans else if (check_next1(ls, '>')) return TK_SHR; /* '>>' */
5018e3e3a7aSWarner Losh else return '>';
5028e3e3a7aSWarner Losh }
5038e3e3a7aSWarner Losh case '/': {
5048e3e3a7aSWarner Losh next(ls);
5050495ed39SKyle Evans if (check_next1(ls, '/')) return TK_IDIV; /* '//' */
5068e3e3a7aSWarner Losh else return '/';
5078e3e3a7aSWarner Losh }
5088e3e3a7aSWarner Losh case '~': {
5098e3e3a7aSWarner Losh next(ls);
5100495ed39SKyle Evans if (check_next1(ls, '=')) return TK_NE; /* '~=' */
5118e3e3a7aSWarner Losh else return '~';
5128e3e3a7aSWarner Losh }
5138e3e3a7aSWarner Losh case ':': {
5148e3e3a7aSWarner Losh next(ls);
5150495ed39SKyle Evans if (check_next1(ls, ':')) return TK_DBCOLON; /* '::' */
5168e3e3a7aSWarner Losh else return ':';
5178e3e3a7aSWarner Losh }
5188e3e3a7aSWarner Losh case '"': case '\'': { /* short literal strings */
5198e3e3a7aSWarner Losh read_string(ls, ls->current, seminfo);
5208e3e3a7aSWarner Losh return TK_STRING;
5218e3e3a7aSWarner Losh }
5228e3e3a7aSWarner Losh case '.': { /* '.', '..', '...', or number */
5238e3e3a7aSWarner Losh save_and_next(ls);
5248e3e3a7aSWarner Losh if (check_next1(ls, '.')) {
5258e3e3a7aSWarner Losh if (check_next1(ls, '.'))
5268e3e3a7aSWarner Losh return TK_DOTS; /* '...' */
5278e3e3a7aSWarner Losh else return TK_CONCAT; /* '..' */
5288e3e3a7aSWarner Losh }
5298e3e3a7aSWarner Losh else if (!lisdigit(ls->current)) return '.';
5308e3e3a7aSWarner Losh else return read_numeral(ls, seminfo);
5318e3e3a7aSWarner Losh }
5328e3e3a7aSWarner Losh case '0': case '1': case '2': case '3': case '4':
5338e3e3a7aSWarner Losh case '5': case '6': case '7': case '8': case '9': {
5348e3e3a7aSWarner Losh return read_numeral(ls, seminfo);
5358e3e3a7aSWarner Losh }
5368e3e3a7aSWarner Losh case EOZ: {
5378e3e3a7aSWarner Losh return TK_EOS;
5388e3e3a7aSWarner Losh }
5398e3e3a7aSWarner Losh default: {
5408e3e3a7aSWarner Losh if (lislalpha(ls->current)) { /* identifier or reserved word? */
5418e3e3a7aSWarner Losh TString *ts;
5428e3e3a7aSWarner Losh do {
5438e3e3a7aSWarner Losh save_and_next(ls);
5448e3e3a7aSWarner Losh } while (lislalnum(ls->current));
5458e3e3a7aSWarner Losh ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
5468e3e3a7aSWarner Losh luaZ_bufflen(ls->buff));
5478e3e3a7aSWarner Losh seminfo->ts = ts;
5488e3e3a7aSWarner Losh if (isreserved(ts)) /* reserved word? */
5498e3e3a7aSWarner Losh return ts->extra - 1 + FIRST_RESERVED;
5508e3e3a7aSWarner Losh else {
5518e3e3a7aSWarner Losh return TK_NAME;
5528e3e3a7aSWarner Losh }
5538e3e3a7aSWarner Losh }
5540495ed39SKyle Evans else { /* single-char tokens ('+', '*', '%', '{', '}', ...) */
5558e3e3a7aSWarner Losh int c = ls->current;
5568e3e3a7aSWarner Losh next(ls);
5578e3e3a7aSWarner Losh return c;
5588e3e3a7aSWarner Losh }
5598e3e3a7aSWarner Losh }
5608e3e3a7aSWarner Losh }
5618e3e3a7aSWarner Losh }
5628e3e3a7aSWarner Losh }
5638e3e3a7aSWarner Losh
5648e3e3a7aSWarner Losh
luaX_next(LexState * ls)5658e3e3a7aSWarner Losh void luaX_next (LexState *ls) {
5668e3e3a7aSWarner Losh ls->lastline = ls->linenumber;
5678e3e3a7aSWarner Losh if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */
5688e3e3a7aSWarner Losh ls->t = ls->lookahead; /* use this one */
5698e3e3a7aSWarner Losh ls->lookahead.token = TK_EOS; /* and discharge it */
5708e3e3a7aSWarner Losh }
5718e3e3a7aSWarner Losh else
5728e3e3a7aSWarner Losh ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */
5738e3e3a7aSWarner Losh }
5748e3e3a7aSWarner Losh
5758e3e3a7aSWarner Losh
luaX_lookahead(LexState * ls)5768e3e3a7aSWarner Losh int luaX_lookahead (LexState *ls) {
5778e3e3a7aSWarner Losh lua_assert(ls->lookahead.token == TK_EOS);
5788e3e3a7aSWarner Losh ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
5798e3e3a7aSWarner Losh return ls->lookahead.token;
5808e3e3a7aSWarner Losh }
5818e3e3a7aSWarner Losh
582