xref: /freebsd/contrib/lua/src/llex.c (revision 8e3e3a7ae841ccf6f6ac30a2eeab85df5d7f04bc)
1*8e3e3a7aSWarner Losh /*
2*8e3e3a7aSWarner Losh ** $Id: llex.c,v 2.96 2016/05/02 14:02:12 roberto Exp $
3*8e3e3a7aSWarner Losh ** Lexical Analyzer
4*8e3e3a7aSWarner Losh ** See Copyright Notice in lua.h
5*8e3e3a7aSWarner Losh */
6*8e3e3a7aSWarner Losh 
7*8e3e3a7aSWarner Losh #define llex_c
8*8e3e3a7aSWarner Losh #define LUA_CORE
9*8e3e3a7aSWarner Losh 
10*8e3e3a7aSWarner Losh #include "lprefix.h"
11*8e3e3a7aSWarner Losh 
12*8e3e3a7aSWarner Losh 
13*8e3e3a7aSWarner Losh #include <locale.h>
14*8e3e3a7aSWarner Losh #include <string.h>
15*8e3e3a7aSWarner Losh 
16*8e3e3a7aSWarner Losh #include "lua.h"
17*8e3e3a7aSWarner Losh 
18*8e3e3a7aSWarner Losh #include "lctype.h"
19*8e3e3a7aSWarner Losh #include "ldebug.h"
20*8e3e3a7aSWarner Losh #include "ldo.h"
21*8e3e3a7aSWarner Losh #include "lgc.h"
22*8e3e3a7aSWarner Losh #include "llex.h"
23*8e3e3a7aSWarner Losh #include "lobject.h"
24*8e3e3a7aSWarner Losh #include "lparser.h"
25*8e3e3a7aSWarner Losh #include "lstate.h"
26*8e3e3a7aSWarner Losh #include "lstring.h"
27*8e3e3a7aSWarner Losh #include "ltable.h"
28*8e3e3a7aSWarner Losh #include "lzio.h"
29*8e3e3a7aSWarner Losh 
30*8e3e3a7aSWarner Losh 
31*8e3e3a7aSWarner Losh 
32*8e3e3a7aSWarner Losh #define next(ls) (ls->current = zgetc(ls->z))
33*8e3e3a7aSWarner Losh 
34*8e3e3a7aSWarner Losh 
35*8e3e3a7aSWarner Losh 
36*8e3e3a7aSWarner Losh #define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
37*8e3e3a7aSWarner Losh 
38*8e3e3a7aSWarner Losh 
39*8e3e3a7aSWarner Losh /* ORDER RESERVED */
40*8e3e3a7aSWarner Losh static const char *const luaX_tokens [] = {
41*8e3e3a7aSWarner Losh     "and", "break", "do", "else", "elseif",
42*8e3e3a7aSWarner Losh     "end", "false", "for", "function", "goto", "if",
43*8e3e3a7aSWarner Losh     "in", "local", "nil", "not", "or", "repeat",
44*8e3e3a7aSWarner Losh     "return", "then", "true", "until", "while",
45*8e3e3a7aSWarner Losh     "//", "..", "...", "==", ">=", "<=", "~=",
46*8e3e3a7aSWarner Losh     "<<", ">>", "::", "<eof>",
47*8e3e3a7aSWarner Losh     "<number>", "<integer>", "<name>", "<string>"
48*8e3e3a7aSWarner Losh };
49*8e3e3a7aSWarner Losh 
50*8e3e3a7aSWarner Losh 
51*8e3e3a7aSWarner Losh #define save_and_next(ls) (save(ls, ls->current), next(ls))
52*8e3e3a7aSWarner Losh 
53*8e3e3a7aSWarner Losh 
54*8e3e3a7aSWarner Losh static l_noret lexerror (LexState *ls, const char *msg, int token);
55*8e3e3a7aSWarner Losh 
56*8e3e3a7aSWarner Losh 
57*8e3e3a7aSWarner Losh static void save (LexState *ls, int c) {
58*8e3e3a7aSWarner Losh   Mbuffer *b = ls->buff;
59*8e3e3a7aSWarner Losh   if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
60*8e3e3a7aSWarner Losh     size_t newsize;
61*8e3e3a7aSWarner Losh     if (luaZ_sizebuffer(b) >= MAX_SIZE/2)
62*8e3e3a7aSWarner Losh       lexerror(ls, "lexical element too long", 0);
63*8e3e3a7aSWarner Losh     newsize = luaZ_sizebuffer(b) * 2;
64*8e3e3a7aSWarner Losh     luaZ_resizebuffer(ls->L, b, newsize);
65*8e3e3a7aSWarner Losh   }
66*8e3e3a7aSWarner Losh   b->buffer[luaZ_bufflen(b)++] = cast(char, c);
67*8e3e3a7aSWarner Losh }
68*8e3e3a7aSWarner Losh 
69*8e3e3a7aSWarner Losh 
70*8e3e3a7aSWarner Losh void luaX_init (lua_State *L) {
71*8e3e3a7aSWarner Losh   int i;
72*8e3e3a7aSWarner Losh   TString *e = luaS_newliteral(L, LUA_ENV);  /* create env name */
73*8e3e3a7aSWarner Losh   luaC_fix(L, obj2gco(e));  /* never collect this name */
74*8e3e3a7aSWarner Losh   for (i=0; i<NUM_RESERVED; i++) {
75*8e3e3a7aSWarner Losh     TString *ts = luaS_new(L, luaX_tokens[i]);
76*8e3e3a7aSWarner Losh     luaC_fix(L, obj2gco(ts));  /* reserved words are never collected */
77*8e3e3a7aSWarner Losh     ts->extra = cast_byte(i+1);  /* reserved word */
78*8e3e3a7aSWarner Losh   }
79*8e3e3a7aSWarner Losh }
80*8e3e3a7aSWarner Losh 
81*8e3e3a7aSWarner Losh 
82*8e3e3a7aSWarner Losh const char *luaX_token2str (LexState *ls, int token) {
83*8e3e3a7aSWarner Losh   if (token < FIRST_RESERVED) {  /* single-byte symbols? */
84*8e3e3a7aSWarner Losh     lua_assert(token == cast_uchar(token));
85*8e3e3a7aSWarner Losh     return luaO_pushfstring(ls->L, "'%c'", token);
86*8e3e3a7aSWarner Losh   }
87*8e3e3a7aSWarner Losh   else {
88*8e3e3a7aSWarner Losh     const char *s = luaX_tokens[token - FIRST_RESERVED];
89*8e3e3a7aSWarner Losh     if (token < TK_EOS)  /* fixed format (symbols and reserved words)? */
90*8e3e3a7aSWarner Losh       return luaO_pushfstring(ls->L, "'%s'", s);
91*8e3e3a7aSWarner Losh     else  /* names, strings, and numerals */
92*8e3e3a7aSWarner Losh       return s;
93*8e3e3a7aSWarner Losh   }
94*8e3e3a7aSWarner Losh }
95*8e3e3a7aSWarner Losh 
96*8e3e3a7aSWarner Losh 
97*8e3e3a7aSWarner Losh static const char *txtToken (LexState *ls, int token) {
98*8e3e3a7aSWarner Losh   switch (token) {
99*8e3e3a7aSWarner Losh     case TK_NAME: case TK_STRING:
100*8e3e3a7aSWarner Losh     case TK_FLT: case TK_INT:
101*8e3e3a7aSWarner Losh       save(ls, '\0');
102*8e3e3a7aSWarner Losh       return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
103*8e3e3a7aSWarner Losh     default:
104*8e3e3a7aSWarner Losh       return luaX_token2str(ls, token);
105*8e3e3a7aSWarner Losh   }
106*8e3e3a7aSWarner Losh }
107*8e3e3a7aSWarner Losh 
108*8e3e3a7aSWarner Losh 
109*8e3e3a7aSWarner Losh static l_noret lexerror (LexState *ls, const char *msg, int token) {
110*8e3e3a7aSWarner Losh   msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
111*8e3e3a7aSWarner Losh   if (token)
112*8e3e3a7aSWarner Losh     luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
113*8e3e3a7aSWarner Losh   luaD_throw(ls->L, LUA_ERRSYNTAX);
114*8e3e3a7aSWarner Losh }
115*8e3e3a7aSWarner Losh 
116*8e3e3a7aSWarner Losh 
117*8e3e3a7aSWarner Losh l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
118*8e3e3a7aSWarner Losh   lexerror(ls, msg, ls->t.token);
119*8e3e3a7aSWarner Losh }
120*8e3e3a7aSWarner Losh 
121*8e3e3a7aSWarner Losh 
122*8e3e3a7aSWarner Losh /*
123*8e3e3a7aSWarner Losh ** creates a new string and anchors it in scanner's table so that
124*8e3e3a7aSWarner Losh ** it will not be collected until the end of the compilation
125*8e3e3a7aSWarner Losh ** (by that time it should be anchored somewhere)
126*8e3e3a7aSWarner Losh */
127*8e3e3a7aSWarner Losh TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
128*8e3e3a7aSWarner Losh   lua_State *L = ls->L;
129*8e3e3a7aSWarner Losh   TValue *o;  /* entry for 'str' */
130*8e3e3a7aSWarner Losh   TString *ts = luaS_newlstr(L, str, l);  /* create new string */
131*8e3e3a7aSWarner Losh   setsvalue2s(L, L->top++, ts);  /* temporarily anchor it in stack */
132*8e3e3a7aSWarner Losh   o = luaH_set(L, ls->h, L->top - 1);
133*8e3e3a7aSWarner Losh   if (ttisnil(o)) {  /* not in use yet? */
134*8e3e3a7aSWarner Losh     /* boolean value does not need GC barrier;
135*8e3e3a7aSWarner Losh        table has no metatable, so it does not need to invalidate cache */
136*8e3e3a7aSWarner Losh     setbvalue(o, 1);  /* t[string] = true */
137*8e3e3a7aSWarner Losh     luaC_checkGC(L);
138*8e3e3a7aSWarner Losh   }
139*8e3e3a7aSWarner Losh   else {  /* string already present */
140*8e3e3a7aSWarner Losh     ts = tsvalue(keyfromval(o));  /* re-use value previously stored */
141*8e3e3a7aSWarner Losh   }
142*8e3e3a7aSWarner Losh   L->top--;  /* remove string from stack */
143*8e3e3a7aSWarner Losh   return ts;
144*8e3e3a7aSWarner Losh }
145*8e3e3a7aSWarner Losh 
146*8e3e3a7aSWarner Losh 
147*8e3e3a7aSWarner Losh /*
148*8e3e3a7aSWarner Losh ** increment line number and skips newline sequence (any of
149*8e3e3a7aSWarner Losh ** \n, \r, \n\r, or \r\n)
150*8e3e3a7aSWarner Losh */
151*8e3e3a7aSWarner Losh static void inclinenumber (LexState *ls) {
152*8e3e3a7aSWarner Losh   int old = ls->current;
153*8e3e3a7aSWarner Losh   lua_assert(currIsNewline(ls));
154*8e3e3a7aSWarner Losh   next(ls);  /* skip '\n' or '\r' */
155*8e3e3a7aSWarner Losh   if (currIsNewline(ls) && ls->current != old)
156*8e3e3a7aSWarner Losh     next(ls);  /* skip '\n\r' or '\r\n' */
157*8e3e3a7aSWarner Losh   if (++ls->linenumber >= MAX_INT)
158*8e3e3a7aSWarner Losh     lexerror(ls, "chunk has too many lines", 0);
159*8e3e3a7aSWarner Losh }
160*8e3e3a7aSWarner Losh 
161*8e3e3a7aSWarner Losh 
162*8e3e3a7aSWarner Losh void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
163*8e3e3a7aSWarner Losh                     int firstchar) {
164*8e3e3a7aSWarner Losh   ls->t.token = 0;
165*8e3e3a7aSWarner Losh   ls->L = L;
166*8e3e3a7aSWarner Losh   ls->current = firstchar;
167*8e3e3a7aSWarner Losh   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
168*8e3e3a7aSWarner Losh   ls->z = z;
169*8e3e3a7aSWarner Losh   ls->fs = NULL;
170*8e3e3a7aSWarner Losh   ls->linenumber = 1;
171*8e3e3a7aSWarner Losh   ls->lastline = 1;
172*8e3e3a7aSWarner Losh   ls->source = source;
173*8e3e3a7aSWarner Losh   ls->envn = luaS_newliteral(L, LUA_ENV);  /* get env name */
174*8e3e3a7aSWarner Losh   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
175*8e3e3a7aSWarner Losh }
176*8e3e3a7aSWarner Losh 
177*8e3e3a7aSWarner Losh 
178*8e3e3a7aSWarner Losh 
179*8e3e3a7aSWarner Losh /*
180*8e3e3a7aSWarner Losh ** =======================================================
181*8e3e3a7aSWarner Losh ** LEXICAL ANALYZER
182*8e3e3a7aSWarner Losh ** =======================================================
183*8e3e3a7aSWarner Losh */
184*8e3e3a7aSWarner Losh 
185*8e3e3a7aSWarner Losh 
186*8e3e3a7aSWarner Losh static int check_next1 (LexState *ls, int c) {
187*8e3e3a7aSWarner Losh   if (ls->current == c) {
188*8e3e3a7aSWarner Losh     next(ls);
189*8e3e3a7aSWarner Losh     return 1;
190*8e3e3a7aSWarner Losh   }
191*8e3e3a7aSWarner Losh   else return 0;
192*8e3e3a7aSWarner Losh }
193*8e3e3a7aSWarner Losh 
194*8e3e3a7aSWarner Losh 
195*8e3e3a7aSWarner Losh /*
196*8e3e3a7aSWarner Losh ** Check whether current char is in set 'set' (with two chars) and
197*8e3e3a7aSWarner Losh ** saves it
198*8e3e3a7aSWarner Losh */
199*8e3e3a7aSWarner Losh static int check_next2 (LexState *ls, const char *set) {
200*8e3e3a7aSWarner Losh   lua_assert(set[2] == '\0');
201*8e3e3a7aSWarner Losh   if (ls->current == set[0] || ls->current == set[1]) {
202*8e3e3a7aSWarner Losh     save_and_next(ls);
203*8e3e3a7aSWarner Losh     return 1;
204*8e3e3a7aSWarner Losh   }
205*8e3e3a7aSWarner Losh   else return 0;
206*8e3e3a7aSWarner Losh }
207*8e3e3a7aSWarner Losh 
208*8e3e3a7aSWarner Losh 
209*8e3e3a7aSWarner Losh /* LUA_NUMBER */
210*8e3e3a7aSWarner Losh /*
211*8e3e3a7aSWarner Losh ** this function is quite liberal in what it accepts, as 'luaO_str2num'
212*8e3e3a7aSWarner Losh ** will reject ill-formed numerals.
213*8e3e3a7aSWarner Losh */
214*8e3e3a7aSWarner Losh static int read_numeral (LexState *ls, SemInfo *seminfo) {
215*8e3e3a7aSWarner Losh   TValue obj;
216*8e3e3a7aSWarner Losh   const char *expo = "Ee";
217*8e3e3a7aSWarner Losh   int first = ls->current;
218*8e3e3a7aSWarner Losh   lua_assert(lisdigit(ls->current));
219*8e3e3a7aSWarner Losh   save_and_next(ls);
220*8e3e3a7aSWarner Losh   if (first == '0' && check_next2(ls, "xX"))  /* hexadecimal? */
221*8e3e3a7aSWarner Losh     expo = "Pp";
222*8e3e3a7aSWarner Losh   for (;;) {
223*8e3e3a7aSWarner Losh     if (check_next2(ls, expo))  /* exponent part? */
224*8e3e3a7aSWarner Losh       check_next2(ls, "-+");  /* optional exponent sign */
225*8e3e3a7aSWarner Losh     if (lisxdigit(ls->current))
226*8e3e3a7aSWarner Losh       save_and_next(ls);
227*8e3e3a7aSWarner Losh     else if (ls->current == '.')
228*8e3e3a7aSWarner Losh       save_and_next(ls);
229*8e3e3a7aSWarner Losh     else break;
230*8e3e3a7aSWarner Losh   }
231*8e3e3a7aSWarner Losh   save(ls, '\0');
232*8e3e3a7aSWarner Losh   if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0)  /* format error? */
233*8e3e3a7aSWarner Losh     lexerror(ls, "malformed number", TK_FLT);
234*8e3e3a7aSWarner Losh   if (ttisinteger(&obj)) {
235*8e3e3a7aSWarner Losh     seminfo->i = ivalue(&obj);
236*8e3e3a7aSWarner Losh     return TK_INT;
237*8e3e3a7aSWarner Losh   }
238*8e3e3a7aSWarner Losh   else {
239*8e3e3a7aSWarner Losh     lua_assert(ttisfloat(&obj));
240*8e3e3a7aSWarner Losh     seminfo->r = fltvalue(&obj);
241*8e3e3a7aSWarner Losh     return TK_FLT;
242*8e3e3a7aSWarner Losh   }
243*8e3e3a7aSWarner Losh }
244*8e3e3a7aSWarner Losh 
245*8e3e3a7aSWarner Losh 
246*8e3e3a7aSWarner Losh /*
247*8e3e3a7aSWarner Losh ** skip a sequence '[=*[' or ']=*]'; if sequence is well formed, return
248*8e3e3a7aSWarner Losh ** its number of '='s; otherwise, return a negative number (-1 iff there
249*8e3e3a7aSWarner Losh ** are no '='s after initial bracket)
250*8e3e3a7aSWarner Losh */
251*8e3e3a7aSWarner Losh static int skip_sep (LexState *ls) {
252*8e3e3a7aSWarner Losh   int count = 0;
253*8e3e3a7aSWarner Losh   int s = ls->current;
254*8e3e3a7aSWarner Losh   lua_assert(s == '[' || s == ']');
255*8e3e3a7aSWarner Losh   save_and_next(ls);
256*8e3e3a7aSWarner Losh   while (ls->current == '=') {
257*8e3e3a7aSWarner Losh     save_and_next(ls);
258*8e3e3a7aSWarner Losh     count++;
259*8e3e3a7aSWarner Losh   }
260*8e3e3a7aSWarner Losh   return (ls->current == s) ? count : (-count) - 1;
261*8e3e3a7aSWarner Losh }
262*8e3e3a7aSWarner Losh 
263*8e3e3a7aSWarner Losh 
264*8e3e3a7aSWarner Losh static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
265*8e3e3a7aSWarner Losh   int line = ls->linenumber;  /* initial line (for error message) */
266*8e3e3a7aSWarner Losh   save_and_next(ls);  /* skip 2nd '[' */
267*8e3e3a7aSWarner Losh   if (currIsNewline(ls))  /* string starts with a newline? */
268*8e3e3a7aSWarner Losh     inclinenumber(ls);  /* skip it */
269*8e3e3a7aSWarner Losh   for (;;) {
270*8e3e3a7aSWarner Losh     switch (ls->current) {
271*8e3e3a7aSWarner Losh       case EOZ: {  /* error */
272*8e3e3a7aSWarner Losh         const char *what = (seminfo ? "string" : "comment");
273*8e3e3a7aSWarner Losh         const char *msg = luaO_pushfstring(ls->L,
274*8e3e3a7aSWarner Losh                      "unfinished long %s (starting at line %d)", what, line);
275*8e3e3a7aSWarner Losh         lexerror(ls, msg, TK_EOS);
276*8e3e3a7aSWarner Losh         break;  /* to avoid warnings */
277*8e3e3a7aSWarner Losh       }
278*8e3e3a7aSWarner Losh       case ']': {
279*8e3e3a7aSWarner Losh         if (skip_sep(ls) == sep) {
280*8e3e3a7aSWarner Losh           save_and_next(ls);  /* skip 2nd ']' */
281*8e3e3a7aSWarner Losh           goto endloop;
282*8e3e3a7aSWarner Losh         }
283*8e3e3a7aSWarner Losh         break;
284*8e3e3a7aSWarner Losh       }
285*8e3e3a7aSWarner Losh       case '\n': case '\r': {
286*8e3e3a7aSWarner Losh         save(ls, '\n');
287*8e3e3a7aSWarner Losh         inclinenumber(ls);
288*8e3e3a7aSWarner Losh         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
289*8e3e3a7aSWarner Losh         break;
290*8e3e3a7aSWarner Losh       }
291*8e3e3a7aSWarner Losh       default: {
292*8e3e3a7aSWarner Losh         if (seminfo) save_and_next(ls);
293*8e3e3a7aSWarner Losh         else next(ls);
294*8e3e3a7aSWarner Losh       }
295*8e3e3a7aSWarner Losh     }
296*8e3e3a7aSWarner Losh   } endloop:
297*8e3e3a7aSWarner Losh   if (seminfo)
298*8e3e3a7aSWarner Losh     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
299*8e3e3a7aSWarner Losh                                      luaZ_bufflen(ls->buff) - 2*(2 + sep));
300*8e3e3a7aSWarner Losh }
301*8e3e3a7aSWarner Losh 
302*8e3e3a7aSWarner Losh 
303*8e3e3a7aSWarner Losh static void esccheck (LexState *ls, int c, const char *msg) {
304*8e3e3a7aSWarner Losh   if (!c) {
305*8e3e3a7aSWarner Losh     if (ls->current != EOZ)
306*8e3e3a7aSWarner Losh       save_and_next(ls);  /* add current to buffer for error message */
307*8e3e3a7aSWarner Losh     lexerror(ls, msg, TK_STRING);
308*8e3e3a7aSWarner Losh   }
309*8e3e3a7aSWarner Losh }
310*8e3e3a7aSWarner Losh 
311*8e3e3a7aSWarner Losh 
312*8e3e3a7aSWarner Losh static int gethexa (LexState *ls) {
313*8e3e3a7aSWarner Losh   save_and_next(ls);
314*8e3e3a7aSWarner Losh   esccheck (ls, lisxdigit(ls->current), "hexadecimal digit expected");
315*8e3e3a7aSWarner Losh   return luaO_hexavalue(ls->current);
316*8e3e3a7aSWarner Losh }
317*8e3e3a7aSWarner Losh 
318*8e3e3a7aSWarner Losh 
319*8e3e3a7aSWarner Losh static int readhexaesc (LexState *ls) {
320*8e3e3a7aSWarner Losh   int r = gethexa(ls);
321*8e3e3a7aSWarner Losh   r = (r << 4) + gethexa(ls);
322*8e3e3a7aSWarner Losh   luaZ_buffremove(ls->buff, 2);  /* remove saved chars from buffer */
323*8e3e3a7aSWarner Losh   return r;
324*8e3e3a7aSWarner Losh }
325*8e3e3a7aSWarner Losh 
326*8e3e3a7aSWarner Losh 
327*8e3e3a7aSWarner Losh static unsigned long readutf8esc (LexState *ls) {
328*8e3e3a7aSWarner Losh   unsigned long r;
329*8e3e3a7aSWarner Losh   int i = 4;  /* chars to be removed: '\', 'u', '{', and first digit */
330*8e3e3a7aSWarner Losh   save_and_next(ls);  /* skip 'u' */
331*8e3e3a7aSWarner Losh   esccheck(ls, ls->current == '{', "missing '{'");
332*8e3e3a7aSWarner Losh   r = gethexa(ls);  /* must have at least one digit */
333*8e3e3a7aSWarner Losh   while ((save_and_next(ls), lisxdigit(ls->current))) {
334*8e3e3a7aSWarner Losh     i++;
335*8e3e3a7aSWarner Losh     r = (r << 4) + luaO_hexavalue(ls->current);
336*8e3e3a7aSWarner Losh     esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large");
337*8e3e3a7aSWarner Losh   }
338*8e3e3a7aSWarner Losh   esccheck(ls, ls->current == '}', "missing '}'");
339*8e3e3a7aSWarner Losh   next(ls);  /* skip '}' */
340*8e3e3a7aSWarner Losh   luaZ_buffremove(ls->buff, i);  /* remove saved chars from buffer */
341*8e3e3a7aSWarner Losh   return r;
342*8e3e3a7aSWarner Losh }
343*8e3e3a7aSWarner Losh 
344*8e3e3a7aSWarner Losh 
345*8e3e3a7aSWarner Losh static void utf8esc (LexState *ls) {
346*8e3e3a7aSWarner Losh   char buff[UTF8BUFFSZ];
347*8e3e3a7aSWarner Losh   int n = luaO_utf8esc(buff, readutf8esc(ls));
348*8e3e3a7aSWarner Losh   for (; n > 0; n--)  /* add 'buff' to string */
349*8e3e3a7aSWarner Losh     save(ls, buff[UTF8BUFFSZ - n]);
350*8e3e3a7aSWarner Losh }
351*8e3e3a7aSWarner Losh 
352*8e3e3a7aSWarner Losh 
353*8e3e3a7aSWarner Losh static int readdecesc (LexState *ls) {
354*8e3e3a7aSWarner Losh   int i;
355*8e3e3a7aSWarner Losh   int r = 0;  /* result accumulator */
356*8e3e3a7aSWarner Losh   for (i = 0; i < 3 && lisdigit(ls->current); i++) {  /* read up to 3 digits */
357*8e3e3a7aSWarner Losh     r = 10*r + ls->current - '0';
358*8e3e3a7aSWarner Losh     save_and_next(ls);
359*8e3e3a7aSWarner Losh   }
360*8e3e3a7aSWarner Losh   esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
361*8e3e3a7aSWarner Losh   luaZ_buffremove(ls->buff, i);  /* remove read digits from buffer */
362*8e3e3a7aSWarner Losh   return r;
363*8e3e3a7aSWarner Losh }
364*8e3e3a7aSWarner Losh 
365*8e3e3a7aSWarner Losh 
366*8e3e3a7aSWarner Losh static void read_string (LexState *ls, int del, SemInfo *seminfo) {
367*8e3e3a7aSWarner Losh   save_and_next(ls);  /* keep delimiter (for error messages) */
368*8e3e3a7aSWarner Losh   while (ls->current != del) {
369*8e3e3a7aSWarner Losh     switch (ls->current) {
370*8e3e3a7aSWarner Losh       case EOZ:
371*8e3e3a7aSWarner Losh         lexerror(ls, "unfinished string", TK_EOS);
372*8e3e3a7aSWarner Losh         break;  /* to avoid warnings */
373*8e3e3a7aSWarner Losh       case '\n':
374*8e3e3a7aSWarner Losh       case '\r':
375*8e3e3a7aSWarner Losh         lexerror(ls, "unfinished string", TK_STRING);
376*8e3e3a7aSWarner Losh         break;  /* to avoid warnings */
377*8e3e3a7aSWarner Losh       case '\\': {  /* escape sequences */
378*8e3e3a7aSWarner Losh         int c;  /* final character to be saved */
379*8e3e3a7aSWarner Losh         save_and_next(ls);  /* keep '\\' for error messages */
380*8e3e3a7aSWarner Losh         switch (ls->current) {
381*8e3e3a7aSWarner Losh           case 'a': c = '\a'; goto read_save;
382*8e3e3a7aSWarner Losh           case 'b': c = '\b'; goto read_save;
383*8e3e3a7aSWarner Losh           case 'f': c = '\f'; goto read_save;
384*8e3e3a7aSWarner Losh           case 'n': c = '\n'; goto read_save;
385*8e3e3a7aSWarner Losh           case 'r': c = '\r'; goto read_save;
386*8e3e3a7aSWarner Losh           case 't': c = '\t'; goto read_save;
387*8e3e3a7aSWarner Losh           case 'v': c = '\v'; goto read_save;
388*8e3e3a7aSWarner Losh           case 'x': c = readhexaesc(ls); goto read_save;
389*8e3e3a7aSWarner Losh           case 'u': utf8esc(ls);  goto no_save;
390*8e3e3a7aSWarner Losh           case '\n': case '\r':
391*8e3e3a7aSWarner Losh             inclinenumber(ls); c = '\n'; goto only_save;
392*8e3e3a7aSWarner Losh           case '\\': case '\"': case '\'':
393*8e3e3a7aSWarner Losh             c = ls->current; goto read_save;
394*8e3e3a7aSWarner Losh           case EOZ: goto no_save;  /* will raise an error next loop */
395*8e3e3a7aSWarner Losh           case 'z': {  /* zap following span of spaces */
396*8e3e3a7aSWarner Losh             luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
397*8e3e3a7aSWarner Losh             next(ls);  /* skip the 'z' */
398*8e3e3a7aSWarner Losh             while (lisspace(ls->current)) {
399*8e3e3a7aSWarner Losh               if (currIsNewline(ls)) inclinenumber(ls);
400*8e3e3a7aSWarner Losh               else next(ls);
401*8e3e3a7aSWarner Losh             }
402*8e3e3a7aSWarner Losh             goto no_save;
403*8e3e3a7aSWarner Losh           }
404*8e3e3a7aSWarner Losh           default: {
405*8e3e3a7aSWarner Losh             esccheck(ls, lisdigit(ls->current), "invalid escape sequence");
406*8e3e3a7aSWarner Losh             c = readdecesc(ls);  /* digital escape '\ddd' */
407*8e3e3a7aSWarner Losh             goto only_save;
408*8e3e3a7aSWarner Losh           }
409*8e3e3a7aSWarner Losh         }
410*8e3e3a7aSWarner Losh        read_save:
411*8e3e3a7aSWarner Losh          next(ls);
412*8e3e3a7aSWarner Losh          /* go through */
413*8e3e3a7aSWarner Losh        only_save:
414*8e3e3a7aSWarner Losh          luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
415*8e3e3a7aSWarner Losh          save(ls, c);
416*8e3e3a7aSWarner Losh          /* go through */
417*8e3e3a7aSWarner Losh        no_save: break;
418*8e3e3a7aSWarner Losh       }
419*8e3e3a7aSWarner Losh       default:
420*8e3e3a7aSWarner Losh         save_and_next(ls);
421*8e3e3a7aSWarner Losh     }
422*8e3e3a7aSWarner Losh   }
423*8e3e3a7aSWarner Losh   save_and_next(ls);  /* skip delimiter */
424*8e3e3a7aSWarner Losh   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
425*8e3e3a7aSWarner Losh                                    luaZ_bufflen(ls->buff) - 2);
426*8e3e3a7aSWarner Losh }
427*8e3e3a7aSWarner Losh 
428*8e3e3a7aSWarner Losh 
429*8e3e3a7aSWarner Losh static int llex (LexState *ls, SemInfo *seminfo) {
430*8e3e3a7aSWarner Losh   luaZ_resetbuffer(ls->buff);
431*8e3e3a7aSWarner Losh   for (;;) {
432*8e3e3a7aSWarner Losh     switch (ls->current) {
433*8e3e3a7aSWarner Losh       case '\n': case '\r': {  /* line breaks */
434*8e3e3a7aSWarner Losh         inclinenumber(ls);
435*8e3e3a7aSWarner Losh         break;
436*8e3e3a7aSWarner Losh       }
437*8e3e3a7aSWarner Losh       case ' ': case '\f': case '\t': case '\v': {  /* spaces */
438*8e3e3a7aSWarner Losh         next(ls);
439*8e3e3a7aSWarner Losh         break;
440*8e3e3a7aSWarner Losh       }
441*8e3e3a7aSWarner Losh       case '-': {  /* '-' or '--' (comment) */
442*8e3e3a7aSWarner Losh         next(ls);
443*8e3e3a7aSWarner Losh         if (ls->current != '-') return '-';
444*8e3e3a7aSWarner Losh         /* else is a comment */
445*8e3e3a7aSWarner Losh         next(ls);
446*8e3e3a7aSWarner Losh         if (ls->current == '[') {  /* long comment? */
447*8e3e3a7aSWarner Losh           int sep = skip_sep(ls);
448*8e3e3a7aSWarner Losh           luaZ_resetbuffer(ls->buff);  /* 'skip_sep' may dirty the buffer */
449*8e3e3a7aSWarner Losh           if (sep >= 0) {
450*8e3e3a7aSWarner Losh             read_long_string(ls, NULL, sep);  /* skip long comment */
451*8e3e3a7aSWarner Losh             luaZ_resetbuffer(ls->buff);  /* previous call may dirty the buff. */
452*8e3e3a7aSWarner Losh             break;
453*8e3e3a7aSWarner Losh           }
454*8e3e3a7aSWarner Losh         }
455*8e3e3a7aSWarner Losh         /* else short comment */
456*8e3e3a7aSWarner Losh         while (!currIsNewline(ls) && ls->current != EOZ)
457*8e3e3a7aSWarner Losh           next(ls);  /* skip until end of line (or end of file) */
458*8e3e3a7aSWarner Losh         break;
459*8e3e3a7aSWarner Losh       }
460*8e3e3a7aSWarner Losh       case '[': {  /* long string or simply '[' */
461*8e3e3a7aSWarner Losh         int sep = skip_sep(ls);
462*8e3e3a7aSWarner Losh         if (sep >= 0) {
463*8e3e3a7aSWarner Losh           read_long_string(ls, seminfo, sep);
464*8e3e3a7aSWarner Losh           return TK_STRING;
465*8e3e3a7aSWarner Losh         }
466*8e3e3a7aSWarner Losh         else if (sep != -1)  /* '[=...' missing second bracket */
467*8e3e3a7aSWarner Losh           lexerror(ls, "invalid long string delimiter", TK_STRING);
468*8e3e3a7aSWarner Losh         return '[';
469*8e3e3a7aSWarner Losh       }
470*8e3e3a7aSWarner Losh       case '=': {
471*8e3e3a7aSWarner Losh         next(ls);
472*8e3e3a7aSWarner Losh         if (check_next1(ls, '=')) return TK_EQ;
473*8e3e3a7aSWarner Losh         else return '=';
474*8e3e3a7aSWarner Losh       }
475*8e3e3a7aSWarner Losh       case '<': {
476*8e3e3a7aSWarner Losh         next(ls);
477*8e3e3a7aSWarner Losh         if (check_next1(ls, '=')) return TK_LE;
478*8e3e3a7aSWarner Losh         else if (check_next1(ls, '<')) return TK_SHL;
479*8e3e3a7aSWarner Losh         else return '<';
480*8e3e3a7aSWarner Losh       }
481*8e3e3a7aSWarner Losh       case '>': {
482*8e3e3a7aSWarner Losh         next(ls);
483*8e3e3a7aSWarner Losh         if (check_next1(ls, '=')) return TK_GE;
484*8e3e3a7aSWarner Losh         else if (check_next1(ls, '>')) return TK_SHR;
485*8e3e3a7aSWarner Losh         else return '>';
486*8e3e3a7aSWarner Losh       }
487*8e3e3a7aSWarner Losh       case '/': {
488*8e3e3a7aSWarner Losh         next(ls);
489*8e3e3a7aSWarner Losh         if (check_next1(ls, '/')) return TK_IDIV;
490*8e3e3a7aSWarner Losh         else return '/';
491*8e3e3a7aSWarner Losh       }
492*8e3e3a7aSWarner Losh       case '~': {
493*8e3e3a7aSWarner Losh         next(ls);
494*8e3e3a7aSWarner Losh         if (check_next1(ls, '=')) return TK_NE;
495*8e3e3a7aSWarner Losh         else return '~';
496*8e3e3a7aSWarner Losh       }
497*8e3e3a7aSWarner Losh       case ':': {
498*8e3e3a7aSWarner Losh         next(ls);
499*8e3e3a7aSWarner Losh         if (check_next1(ls, ':')) return TK_DBCOLON;
500*8e3e3a7aSWarner Losh         else return ':';
501*8e3e3a7aSWarner Losh       }
502*8e3e3a7aSWarner Losh       case '"': case '\'': {  /* short literal strings */
503*8e3e3a7aSWarner Losh         read_string(ls, ls->current, seminfo);
504*8e3e3a7aSWarner Losh         return TK_STRING;
505*8e3e3a7aSWarner Losh       }
506*8e3e3a7aSWarner Losh       case '.': {  /* '.', '..', '...', or number */
507*8e3e3a7aSWarner Losh         save_and_next(ls);
508*8e3e3a7aSWarner Losh         if (check_next1(ls, '.')) {
509*8e3e3a7aSWarner Losh           if (check_next1(ls, '.'))
510*8e3e3a7aSWarner Losh             return TK_DOTS;   /* '...' */
511*8e3e3a7aSWarner Losh           else return TK_CONCAT;   /* '..' */
512*8e3e3a7aSWarner Losh         }
513*8e3e3a7aSWarner Losh         else if (!lisdigit(ls->current)) return '.';
514*8e3e3a7aSWarner Losh         else return read_numeral(ls, seminfo);
515*8e3e3a7aSWarner Losh       }
516*8e3e3a7aSWarner Losh       case '0': case '1': case '2': case '3': case '4':
517*8e3e3a7aSWarner Losh       case '5': case '6': case '7': case '8': case '9': {
518*8e3e3a7aSWarner Losh         return read_numeral(ls, seminfo);
519*8e3e3a7aSWarner Losh       }
520*8e3e3a7aSWarner Losh       case EOZ: {
521*8e3e3a7aSWarner Losh         return TK_EOS;
522*8e3e3a7aSWarner Losh       }
523*8e3e3a7aSWarner Losh       default: {
524*8e3e3a7aSWarner Losh         if (lislalpha(ls->current)) {  /* identifier or reserved word? */
525*8e3e3a7aSWarner Losh           TString *ts;
526*8e3e3a7aSWarner Losh           do {
527*8e3e3a7aSWarner Losh             save_and_next(ls);
528*8e3e3a7aSWarner Losh           } while (lislalnum(ls->current));
529*8e3e3a7aSWarner Losh           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
530*8e3e3a7aSWarner Losh                                   luaZ_bufflen(ls->buff));
531*8e3e3a7aSWarner Losh           seminfo->ts = ts;
532*8e3e3a7aSWarner Losh           if (isreserved(ts))  /* reserved word? */
533*8e3e3a7aSWarner Losh             return ts->extra - 1 + FIRST_RESERVED;
534*8e3e3a7aSWarner Losh           else {
535*8e3e3a7aSWarner Losh             return TK_NAME;
536*8e3e3a7aSWarner Losh           }
537*8e3e3a7aSWarner Losh         }
538*8e3e3a7aSWarner Losh         else {  /* single-char tokens (+ - / ...) */
539*8e3e3a7aSWarner Losh           int c = ls->current;
540*8e3e3a7aSWarner Losh           next(ls);
541*8e3e3a7aSWarner Losh           return c;
542*8e3e3a7aSWarner Losh         }
543*8e3e3a7aSWarner Losh       }
544*8e3e3a7aSWarner Losh     }
545*8e3e3a7aSWarner Losh   }
546*8e3e3a7aSWarner Losh }
547*8e3e3a7aSWarner Losh 
548*8e3e3a7aSWarner Losh 
549*8e3e3a7aSWarner Losh void luaX_next (LexState *ls) {
550*8e3e3a7aSWarner Losh   ls->lastline = ls->linenumber;
551*8e3e3a7aSWarner Losh   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
552*8e3e3a7aSWarner Losh     ls->t = ls->lookahead;  /* use this one */
553*8e3e3a7aSWarner Losh     ls->lookahead.token = TK_EOS;  /* and discharge it */
554*8e3e3a7aSWarner Losh   }
555*8e3e3a7aSWarner Losh   else
556*8e3e3a7aSWarner Losh     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
557*8e3e3a7aSWarner Losh }
558*8e3e3a7aSWarner Losh 
559*8e3e3a7aSWarner Losh 
560*8e3e3a7aSWarner Losh int luaX_lookahead (LexState *ls) {
561*8e3e3a7aSWarner Losh   lua_assert(ls->lookahead.token == TK_EOS);
562*8e3e3a7aSWarner Losh   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
563*8e3e3a7aSWarner Losh   return ls->lookahead.token;
564*8e3e3a7aSWarner Losh }
565*8e3e3a7aSWarner Losh 
566