xref: /linux/scripts/genksyms/lex.l (revision 4f3c8320c78cdd11c8fdd23c33787407f719322e)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * Lexical analysis for genksyms.
4  * Copyright 1996, 1997 Linux International.
5  *
6  * New implementation contributed by Richard Henderson <rth@tamu.edu>
7  * Based on original work by Bjorn Ekwall <bj0rn@blox.se>
8  *
9  * Taken from Linux modutils 2.4.22.
10  */
11 
12 %{
13 
14 #include <limits.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <ctype.h>
18 
19 #include "genksyms.h"
20 #include "parse.tab.h"
21 
22 /* We've got a two-level lexer here.  We let flex do basic tokenization
23    and then we categorize those basic tokens in the second stage.  */
24 #define YY_DECL		static int yylex1(void)
25 
26 %}
27 
28 IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*
29 
30 O_INT			0[0-7]*
31 D_INT			[1-9][0-9]*
32 X_INT			0[Xx][0-9A-Fa-f]+
33 I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
34 INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?
35 
36 FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
37 EXP			[Ee][+-]?[0-9]+
38 F_SUF			[FfLl]
39 REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
40 
41 STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
42 CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'
43 
44 MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
45 
46 /* We don't do multiple input files.  */
47 %option noyywrap
48 
49 %option noinput
50 
51 %%
52 
53 
54  /* Keep track of our location in the original source files.  */
55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
56 ^#.*\n					cur_line++;
57 \n					cur_line++;
58 
59  /* Ignore all other whitespace.  */
60 [ \t\f\v\r]+				;
61 
62 
63 {STRING}				return STRING;
64 {CHAR}					return CHAR;
65 {IDENT}					return IDENT;
66 
67  /* The Pedant requires that the other C multi-character tokens be
68     recognized as tokens.  We don't actually use them since we don't
69     parse expressions, but we do want whitespace to be arranged
70     around them properly.  */
71 {MC_TOKEN}				return OTHER;
72 {INT}					return INT;
73 {REAL}					return REAL;
74 
75 "..."					return DOTS;
76 
77  /* All other tokens are single characters.  */
78 .					return yytext[0];
79 
80 
81 %%
82 
83 /* Bring in the keyword recognizer.  */
84 
85 #include "keywords.c"
86 
87 
88 /* Macros to append to our phrase collection list.  */
89 
90 /*
91  * We mark any token, that that equals to a known enumerator, as
92  * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
93  * the only problem is struct and union members:
94  *    enum e { a, b }; struct s { int a, b; }
95  * but in this case, the only effect will be, that the ABI checksums become
96  * more volatile, which is acceptable. Also, such collisions are quite rare,
97  * so far it was only observed in include/linux/telephony.h.
98  */
99 #define _APP(T,L)	do {						   \
100 			  cur_node = next_node;				   \
101 			  next_node = xmalloc(sizeof(*next_node));	   \
102 			  next_node->next = cur_node;			   \
103 			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
104 			  cur_node->tag =				   \
105 			    find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
106 			    SYM_ENUM_CONST : SYM_NORMAL ;		   \
107 			  cur_node->in_source_file = in_source_file;       \
108 			} while (0)
109 
110 #define APP		_APP(yytext, yyleng)
111 
112 
113 /* The second stage lexer.  Here we incorporate knowledge of the state
114    of the parser to tailor the tokens that are returned.  */
115 
116 int
117 yylex(void)
118 {
119   static enum {
120     ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
121     ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
122     ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
123     ST_TABLE_5, ST_TABLE_6
124   } lexstate = ST_NOTSTARTED;
125 
126   static int suppress_type_lookup, dont_want_brace_phrase;
127   static struct string_list *next_node;
128 
129   int token, count = 0;
130   struct string_list *cur_node;
131 
132   if (lexstate == ST_NOTSTARTED)
133     {
134       next_node = xmalloc(sizeof(*next_node));
135       next_node->next = NULL;
136       lexstate = ST_NORMAL;
137     }
138 
139 repeat:
140   token = yylex1();
141 
142   if (token == 0)
143     return 0;
144   else if (token == FILENAME)
145     {
146       char *file, *e;
147 
148       /* Save the filename and line number for later error messages.  */
149 
150       if (cur_filename)
151 	free(cur_filename);
152 
153       file = strchr(yytext, '\"')+1;
154       e = strchr(file, '\"');
155       *e = '\0';
156       cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
157       cur_line = atoi(yytext+2);
158 
159       if (!source_file) {
160         source_file = xstrdup(cur_filename);
161         in_source_file = 1;
162       } else {
163         in_source_file = (strcmp(cur_filename, source_file) == 0);
164       }
165 
166       goto repeat;
167     }
168 
169   switch (lexstate)
170     {
171     case ST_NORMAL:
172       switch (token)
173 	{
174 	case IDENT:
175 	  APP;
176 	  {
177 	    int r = is_reserved_word(yytext, yyleng);
178 	    if (r >= 0)
179 	      {
180 		switch (token = r)
181 		  {
182 		  case ATTRIBUTE_KEYW:
183 		    lexstate = ST_ATTRIBUTE;
184 		    count = 0;
185 		    goto repeat;
186 		  case ASM_KEYW:
187 		    lexstate = ST_ASM;
188 		    count = 0;
189 		    goto repeat;
190 		  case TYPEOF_KEYW:
191 		    lexstate = ST_TYPEOF;
192 		    count = 0;
193 		    goto repeat;
194 
195 		  case STRUCT_KEYW:
196 		  case UNION_KEYW:
197 		  case ENUM_KEYW:
198 		    dont_want_brace_phrase = 3;
199 		    suppress_type_lookup = 2;
200 		    goto fini;
201 
202 		  case EXPORT_SYMBOL_KEYW:
203 		      goto fini;
204 
205 		  case STATIC_ASSERT_KEYW:
206 		    lexstate = ST_STATIC_ASSERT;
207 		    count = 0;
208 		    goto repeat;
209 		  }
210 	      }
211 	    if (!suppress_type_lookup)
212 	      {
213 		if (find_symbol(yytext, SYM_TYPEDEF, 1))
214 		  token = TYPE;
215 	      }
216 	  }
217 	  break;
218 
219 	case '[':
220 	  APP;
221 	  lexstate = ST_BRACKET;
222 	  count = 1;
223 	  goto repeat;
224 
225 	case '{':
226 	  APP;
227 	  if (dont_want_brace_phrase)
228 	    break;
229 	  lexstate = ST_BRACE;
230 	  count = 1;
231 	  goto repeat;
232 
233 	case '=': case ':':
234 	  APP;
235 	  lexstate = ST_EXPRESSION;
236 	  break;
237 
238 	case DOTS:
239 	default:
240 	  APP;
241 	  break;
242 	}
243       break;
244 
245     case ST_ATTRIBUTE:
246       APP;
247       switch (token)
248 	{
249 	case '(':
250 	  ++count;
251 	  goto repeat;
252 	case ')':
253 	  if (--count == 0)
254 	    {
255 	      lexstate = ST_NORMAL;
256 	      token = ATTRIBUTE_PHRASE;
257 	      break;
258 	    }
259 	  goto repeat;
260 	default:
261 	  goto repeat;
262 	}
263       break;
264 
265     case ST_ASM:
266       APP;
267       switch (token)
268 	{
269 	case '(':
270 	  ++count;
271 	  goto repeat;
272 	case ')':
273 	  if (--count == 0)
274 	    {
275 	      lexstate = ST_NORMAL;
276 	      token = ASM_PHRASE;
277 	      break;
278 	    }
279 	  goto repeat;
280 	default:
281 	  goto repeat;
282 	}
283       break;
284 
285     case ST_TYPEOF_1:
286       if (token == IDENT)
287 	{
288 	  if (is_reserved_word(yytext, yyleng) >= 0
289 	      || find_symbol(yytext, SYM_TYPEDEF, 1))
290 	    {
291 	      yyless(0);
292 	      unput('(');
293 	      lexstate = ST_NORMAL;
294 	      token = TYPEOF_KEYW;
295 	      break;
296 	    }
297 	  _APP("(", 1);
298 	}
299 	lexstate = ST_TYPEOF;
300 	/* FALLTHRU */
301 
302     case ST_TYPEOF:
303       switch (token)
304 	{
305 	case '(':
306 	  if ( ++count == 1 )
307 	    lexstate = ST_TYPEOF_1;
308 	  else
309 	    APP;
310 	  goto repeat;
311 	case ')':
312 	  APP;
313 	  if (--count == 0)
314 	    {
315 	      lexstate = ST_NORMAL;
316 	      token = TYPEOF_PHRASE;
317 	      break;
318 	    }
319 	  goto repeat;
320 	default:
321 	  APP;
322 	  goto repeat;
323 	}
324       break;
325 
326     case ST_BRACKET:
327       APP;
328       switch (token)
329 	{
330 	case '[':
331 	  ++count;
332 	  goto repeat;
333 	case ']':
334 	  if (--count == 0)
335 	    {
336 	      lexstate = ST_NORMAL;
337 	      token = BRACKET_PHRASE;
338 	      break;
339 	    }
340 	  goto repeat;
341 	default:
342 	  goto repeat;
343 	}
344       break;
345 
346     case ST_BRACE:
347       APP;
348       switch (token)
349 	{
350 	case '{':
351 	  ++count;
352 	  goto repeat;
353 	case '}':
354 	  if (--count == 0)
355 	    {
356 	      lexstate = ST_NORMAL;
357 	      token = BRACE_PHRASE;
358 	      break;
359 	    }
360 	  goto repeat;
361 	default:
362 	  goto repeat;
363 	}
364       break;
365 
366     case ST_EXPRESSION:
367       switch (token)
368 	{
369 	case '(': case '[': case '{':
370 	  ++count;
371 	  APP;
372 	  goto repeat;
373 	case '}':
374 	  /* is this the last line of an enum declaration? */
375 	  if (count == 0)
376 	    {
377 	      /* Put back the token we just read so's we can find it again
378 		 after registering the expression.  */
379 	      unput(token);
380 
381 	      lexstate = ST_NORMAL;
382 	      token = EXPRESSION_PHRASE;
383 	      break;
384 	    }
385 	  /* FALLTHRU */
386 	case ')': case ']':
387 	  --count;
388 	  APP;
389 	  goto repeat;
390 	case ',': case ';':
391 	  if (count == 0)
392 	    {
393 	      /* Put back the token we just read so's we can find it again
394 		 after registering the expression.  */
395 	      unput(token);
396 
397 	      lexstate = ST_NORMAL;
398 	      token = EXPRESSION_PHRASE;
399 	      break;
400 	    }
401 	  APP;
402 	  goto repeat;
403 	default:
404 	  APP;
405 	  goto repeat;
406 	}
407       break;
408 
409     case ST_STATIC_ASSERT:
410       APP;
411       switch (token)
412 	{
413 	case '(':
414 	  ++count;
415 	  goto repeat;
416 	case ')':
417 	  if (--count == 0)
418 	    {
419 	      lexstate = ST_NORMAL;
420 	      token = STATIC_ASSERT_PHRASE;
421 	      break;
422 	    }
423 	  goto repeat;
424 	default:
425 	  goto repeat;
426 	}
427       break;
428 
429     case ST_TABLE_1:
430       goto repeat;
431 
432     case ST_TABLE_2:
433       if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
434 	{
435 	  token = EXPORT_SYMBOL_KEYW;
436 	  lexstate = ST_TABLE_5;
437 	  APP;
438 	  break;
439 	}
440       lexstate = ST_TABLE_6;
441       /* FALLTHRU */
442 
443     case ST_TABLE_6:
444       switch (token)
445 	{
446 	case '{': case '[': case '(':
447 	  ++count;
448 	  break;
449 	case '}': case ']': case ')':
450 	  --count;
451 	  break;
452 	case ',':
453 	  if (count == 0)
454 	    lexstate = ST_TABLE_2;
455 	  break;
456 	};
457       goto repeat;
458 
459     case ST_TABLE_3:
460       goto repeat;
461 
462     case ST_TABLE_4:
463       if (token == ';')
464 	lexstate = ST_NORMAL;
465       goto repeat;
466 
467     case ST_TABLE_5:
468       switch (token)
469 	{
470 	case ',':
471 	  token = ';';
472 	  lexstate = ST_TABLE_2;
473 	  APP;
474 	  break;
475 	default:
476 	  APP;
477 	  break;
478 	}
479       break;
480 
481     default:
482       exit(1);
483     }
484 fini:
485 
486   if (suppress_type_lookup > 0)
487     --suppress_type_lookup;
488   if (dont_want_brace_phrase > 0)
489     --dont_want_brace_phrase;
490 
491   yylval = &next_node->next;
492 
493   return token;
494 }
495