xref: /linux/scripts/genksyms/lex.l (revision 15a1fbdcfb519c2bd291ed01c6c94e0b89537a77)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * Lexical analysis for genksyms.
4  * Copyright 1996, 1997 Linux International.
5  *
6  * New implementation contributed by Richard Henderson <rth@tamu.edu>
7  * Based on original work by Bjorn Ekwall <bj0rn@blox.se>
8  *
9  * Taken from Linux modutils 2.4.22.
10  */
11 
12 %{
13 
14 #include <limits.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <ctype.h>
18 
19 #include "genksyms.h"
20 #include "parse.tab.h"
21 
22 /* We've got a two-level lexer here.  We let flex do basic tokenization
23    and then we categorize those basic tokens in the second stage.  */
24 #define YY_DECL		static int yylex1(void)
25 
26 %}
27 
28 IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*
29 
30 O_INT			0[0-7]*
31 D_INT			[1-9][0-9]*
32 X_INT			0[Xx][0-9A-Fa-f]+
33 I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
34 INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?
35 
36 FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
37 EXP			[Ee][+-]?[0-9]+
38 F_SUF			[FfLl]
39 REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
40 
41 STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
42 CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'
43 
44 MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
45 
46 /* We don't do multiple input files.  */
47 %option noyywrap
48 
49 %option noinput
50 
51 %%
52 
53 
54  /* Keep track of our location in the original source files.  */
55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
56 ^#.*\n					cur_line++;
57 \n					cur_line++;
58 
59  /* Ignore all other whitespace.  */
60 [ \t\f\v\r]+				;
61 
62 
63 {STRING}				return STRING;
64 {CHAR}					return CHAR;
65 {IDENT}					return IDENT;
66 
67  /* The Pedant requires that the other C multi-character tokens be
68     recognized as tokens.  We don't actually use them since we don't
69     parse expressions, but we do want whitespace to be arranged
70     around them properly.  */
71 {MC_TOKEN}				return OTHER;
72 {INT}					return INT;
73 {REAL}					return REAL;
74 
75 "..."					return DOTS;
76 
77  /* All other tokens are single characters.  */
78 .					return yytext[0];
79 
80 
81 %%
82 
83 /* Bring in the keyword recognizer.  */
84 
85 #include "keywords.c"
86 
87 
88 /* Macros to append to our phrase collection list.  */
89 
90 /*
91  * We mark any token, that that equals to a known enumerator, as
92  * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
93  * the only problem is struct and union members:
94  *    enum e { a, b }; struct s { int a, b; }
95  * but in this case, the only effect will be, that the ABI checksums become
96  * more volatile, which is acceptable. Also, such collisions are quite rare,
97  * so far it was only observed in include/linux/telephony.h.
98  */
99 #define _APP(T,L)	do {						   \
100 			  cur_node = next_node;				   \
101 			  next_node = xmalloc(sizeof(*next_node));	   \
102 			  next_node->next = cur_node;			   \
103 			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
104 			  cur_node->tag =				   \
105 			    find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
106 			    SYM_ENUM_CONST : SYM_NORMAL ;		   \
107 			  cur_node->in_source_file = in_source_file;       \
108 			} while (0)
109 
110 #define APP		_APP(yytext, yyleng)
111 
112 
113 /* The second stage lexer.  Here we incorporate knowledge of the state
114    of the parser to tailor the tokens that are returned.  */
115 
116 int
117 yylex(void)
118 {
119   static enum {
120     ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
121     ST_BRACKET, ST_BRACE, ST_EXPRESSION,
122     ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
123     ST_TABLE_5, ST_TABLE_6
124   } lexstate = ST_NOTSTARTED;
125 
126   static int suppress_type_lookup, dont_want_brace_phrase;
127   static struct string_list *next_node;
128 
129   int token, count = 0;
130   struct string_list *cur_node;
131 
132   if (lexstate == ST_NOTSTARTED)
133     {
134       next_node = xmalloc(sizeof(*next_node));
135       next_node->next = NULL;
136       lexstate = ST_NORMAL;
137     }
138 
139 repeat:
140   token = yylex1();
141 
142   if (token == 0)
143     return 0;
144   else if (token == FILENAME)
145     {
146       char *file, *e;
147 
148       /* Save the filename and line number for later error messages.  */
149 
150       if (cur_filename)
151 	free(cur_filename);
152 
153       file = strchr(yytext, '\"')+1;
154       e = strchr(file, '\"');
155       *e = '\0';
156       cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
157       cur_line = atoi(yytext+2);
158 
159       if (!source_file) {
160         source_file = xstrdup(cur_filename);
161         in_source_file = 1;
162       } else {
163         in_source_file = (strcmp(cur_filename, source_file) == 0);
164       }
165 
166       goto repeat;
167     }
168 
169   switch (lexstate)
170     {
171     case ST_NORMAL:
172       switch (token)
173 	{
174 	case IDENT:
175 	  APP;
176 	  {
177 	    int r = is_reserved_word(yytext, yyleng);
178 	    if (r >= 0)
179 	      {
180 		switch (token = r)
181 		  {
182 		  case ATTRIBUTE_KEYW:
183 		    lexstate = ST_ATTRIBUTE;
184 		    count = 0;
185 		    goto repeat;
186 		  case ASM_KEYW:
187 		    lexstate = ST_ASM;
188 		    count = 0;
189 		    goto repeat;
190 		  case TYPEOF_KEYW:
191 		    lexstate = ST_TYPEOF;
192 		    count = 0;
193 		    goto repeat;
194 
195 		  case STRUCT_KEYW:
196 		  case UNION_KEYW:
197 		  case ENUM_KEYW:
198 		    dont_want_brace_phrase = 3;
199 		    suppress_type_lookup = 2;
200 		    goto fini;
201 
202 		  case EXPORT_SYMBOL_KEYW:
203 		      goto fini;
204 		  }
205 	      }
206 	    if (!suppress_type_lookup)
207 	      {
208 		if (find_symbol(yytext, SYM_TYPEDEF, 1))
209 		  token = TYPE;
210 	      }
211 	  }
212 	  break;
213 
214 	case '[':
215 	  APP;
216 	  lexstate = ST_BRACKET;
217 	  count = 1;
218 	  goto repeat;
219 
220 	case '{':
221 	  APP;
222 	  if (dont_want_brace_phrase)
223 	    break;
224 	  lexstate = ST_BRACE;
225 	  count = 1;
226 	  goto repeat;
227 
228 	case '=': case ':':
229 	  APP;
230 	  lexstate = ST_EXPRESSION;
231 	  break;
232 
233 	case DOTS:
234 	default:
235 	  APP;
236 	  break;
237 	}
238       break;
239 
240     case ST_ATTRIBUTE:
241       APP;
242       switch (token)
243 	{
244 	case '(':
245 	  ++count;
246 	  goto repeat;
247 	case ')':
248 	  if (--count == 0)
249 	    {
250 	      lexstate = ST_NORMAL;
251 	      token = ATTRIBUTE_PHRASE;
252 	      break;
253 	    }
254 	  goto repeat;
255 	default:
256 	  goto repeat;
257 	}
258       break;
259 
260     case ST_ASM:
261       APP;
262       switch (token)
263 	{
264 	case '(':
265 	  ++count;
266 	  goto repeat;
267 	case ')':
268 	  if (--count == 0)
269 	    {
270 	      lexstate = ST_NORMAL;
271 	      token = ASM_PHRASE;
272 	      break;
273 	    }
274 	  goto repeat;
275 	default:
276 	  goto repeat;
277 	}
278       break;
279 
280     case ST_TYPEOF_1:
281       if (token == IDENT)
282 	{
283 	  if (is_reserved_word(yytext, yyleng) >= 0
284 	      || find_symbol(yytext, SYM_TYPEDEF, 1))
285 	    {
286 	      yyless(0);
287 	      unput('(');
288 	      lexstate = ST_NORMAL;
289 	      token = TYPEOF_KEYW;
290 	      break;
291 	    }
292 	  _APP("(", 1);
293 	}
294 	lexstate = ST_TYPEOF;
295 	/* FALLTHRU */
296 
297     case ST_TYPEOF:
298       switch (token)
299 	{
300 	case '(':
301 	  if ( ++count == 1 )
302 	    lexstate = ST_TYPEOF_1;
303 	  else
304 	    APP;
305 	  goto repeat;
306 	case ')':
307 	  APP;
308 	  if (--count == 0)
309 	    {
310 	      lexstate = ST_NORMAL;
311 	      token = TYPEOF_PHRASE;
312 	      break;
313 	    }
314 	  goto repeat;
315 	default:
316 	  APP;
317 	  goto repeat;
318 	}
319       break;
320 
321     case ST_BRACKET:
322       APP;
323       switch (token)
324 	{
325 	case '[':
326 	  ++count;
327 	  goto repeat;
328 	case ']':
329 	  if (--count == 0)
330 	    {
331 	      lexstate = ST_NORMAL;
332 	      token = BRACKET_PHRASE;
333 	      break;
334 	    }
335 	  goto repeat;
336 	default:
337 	  goto repeat;
338 	}
339       break;
340 
341     case ST_BRACE:
342       APP;
343       switch (token)
344 	{
345 	case '{':
346 	  ++count;
347 	  goto repeat;
348 	case '}':
349 	  if (--count == 0)
350 	    {
351 	      lexstate = ST_NORMAL;
352 	      token = BRACE_PHRASE;
353 	      break;
354 	    }
355 	  goto repeat;
356 	default:
357 	  goto repeat;
358 	}
359       break;
360 
361     case ST_EXPRESSION:
362       switch (token)
363 	{
364 	case '(': case '[': case '{':
365 	  ++count;
366 	  APP;
367 	  goto repeat;
368 	case '}':
369 	  /* is this the last line of an enum declaration? */
370 	  if (count == 0)
371 	    {
372 	      /* Put back the token we just read so's we can find it again
373 		 after registering the expression.  */
374 	      unput(token);
375 
376 	      lexstate = ST_NORMAL;
377 	      token = EXPRESSION_PHRASE;
378 	      break;
379 	    }
380 	  /* FALLTHRU */
381 	case ')': case ']':
382 	  --count;
383 	  APP;
384 	  goto repeat;
385 	case ',': case ';':
386 	  if (count == 0)
387 	    {
388 	      /* Put back the token we just read so's we can find it again
389 		 after registering the expression.  */
390 	      unput(token);
391 
392 	      lexstate = ST_NORMAL;
393 	      token = EXPRESSION_PHRASE;
394 	      break;
395 	    }
396 	  APP;
397 	  goto repeat;
398 	default:
399 	  APP;
400 	  goto repeat;
401 	}
402       break;
403 
404     case ST_TABLE_1:
405       goto repeat;
406 
407     case ST_TABLE_2:
408       if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
409 	{
410 	  token = EXPORT_SYMBOL_KEYW;
411 	  lexstate = ST_TABLE_5;
412 	  APP;
413 	  break;
414 	}
415       lexstate = ST_TABLE_6;
416       /* FALLTHRU */
417 
418     case ST_TABLE_6:
419       switch (token)
420 	{
421 	case '{': case '[': case '(':
422 	  ++count;
423 	  break;
424 	case '}': case ']': case ')':
425 	  --count;
426 	  break;
427 	case ',':
428 	  if (count == 0)
429 	    lexstate = ST_TABLE_2;
430 	  break;
431 	};
432       goto repeat;
433 
434     case ST_TABLE_3:
435       goto repeat;
436 
437     case ST_TABLE_4:
438       if (token == ';')
439 	lexstate = ST_NORMAL;
440       goto repeat;
441 
442     case ST_TABLE_5:
443       switch (token)
444 	{
445 	case ',':
446 	  token = ';';
447 	  lexstate = ST_TABLE_2;
448 	  APP;
449 	  break;
450 	default:
451 	  APP;
452 	  break;
453 	}
454       break;
455 
456     default:
457       exit(1);
458     }
459 fini:
460 
461   if (suppress_type_lookup > 0)
462     --suppress_type_lookup;
463   if (dont_want_brace_phrase > 0)
464     --dont_want_brace_phrase;
465 
466   yylval = &next_node->next;
467 
468   return token;
469 }
470