1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * Lexical analysis for genksyms. 4 * Copyright 1996, 1997 Linux International. 5 * 6 * New implementation contributed by Richard Henderson <rth@tamu.edu> 7 * Based on original work by Bjorn Ekwall <bj0rn@blox.se> 8 * 9 * Taken from Linux modutils 2.4.22. 10 */ 11 12 %{ 13 14 #include <limits.h> 15 #include <stdbool.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <ctype.h> 19 20 #include "genksyms.h" 21 #include "parse.tab.h" 22 23 /* We've got a two-level lexer here. We let flex do basic tokenization 24 and then we categorize those basic tokens in the second stage. */ 25 #define YY_DECL static int yylex1(void) 26 27 %} 28 29 IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 30 31 O_INT 0[0-7]* 32 D_INT [1-9][0-9]* 33 X_INT 0[Xx][0-9A-Fa-f]+ 34 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 35 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 36 37 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 38 EXP [Ee][+-]?[0-9]+ 39 F_SUF [FfLl] 40 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 41 42 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 43 CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 44 45 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 46 47 /* We don't do multiple input files. */ 48 %option noyywrap 49 50 %option noinput 51 52 %% 53 54 55 /* Keep track of our location in the original source files. */ 56 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 57 ^#.*\n cur_line++; 58 \n cur_line++; 59 60 /* Ignore all other whitespace. */ 61 [ \t\f\v\r]+ ; 62 63 64 {STRING} return STRING; 65 {CHAR} return CHAR; 66 {IDENT} return IDENT; 67 68 /* The Pedant requires that the other C multi-character tokens be 69 recognized as tokens. We don't actually use them since we don't 70 parse expressions, but we do want whitespace to be arranged 71 around them properly. */ 72 {MC_TOKEN} return OTHER; 73 {INT} return INT; 74 {REAL} return REAL; 75 76 "..." return DOTS; 77 78 /* All other tokens are single characters. */ 79 . return yytext[0]; 80 81 82 %% 83 84 /* Bring in the keyword recognizer. */ 85 86 #include "keywords.c" 87 88 89 /* Macros to append to our phrase collection list. */ 90 91 /* 92 * We mark any token, that that equals to a known enumerator, as 93 * SYM_ENUM_CONST. The parser will change this for struct and union tags later, 94 * the only problem is struct and union members: 95 * enum e { a, b }; struct s { int a, b; } 96 * but in this case, the only effect will be, that the ABI checksums become 97 * more volatile, which is acceptable. Also, such collisions are quite rare, 98 * so far it was only observed in include/linux/telephony.h. 99 */ 100 #define _APP(T,L) do { \ 101 cur_node = next_node; \ 102 next_node = xmalloc(sizeof(*next_node)); \ 103 next_node->next = cur_node; \ 104 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 105 cur_node->tag = \ 106 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ 107 SYM_ENUM_CONST : SYM_NORMAL ; \ 108 cur_node->in_source_file = in_source_file; \ 109 } while (0) 110 111 #define APP _APP(yytext, yyleng) 112 113 114 /* The second stage lexer. Here we incorporate knowledge of the state 115 of the parser to tailor the tokens that are returned. */ 116 117 /* 118 * The lexer cannot distinguish whether a typedef'ed string is a TYPE or an 119 * IDENT. We need a hint from the parser to handle this accurately. 120 */ 121 bool dont_want_type_specifier; 122 123 int 124 yylex(void) 125 { 126 static enum { 127 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1, 128 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT, 129 } lexstate = ST_NOTSTARTED; 130 131 static int suppress_type_lookup, dont_want_brace_phrase; 132 static struct string_list *next_node; 133 static char *source_file; 134 135 int token, count = 0; 136 struct string_list *cur_node; 137 138 if (lexstate == ST_NOTSTARTED) 139 { 140 next_node = xmalloc(sizeof(*next_node)); 141 next_node->next = NULL; 142 lexstate = ST_NORMAL; 143 } 144 145 repeat: 146 token = yylex1(); 147 148 if (token == 0) 149 return 0; 150 else if (token == FILENAME) 151 { 152 char *file, *e; 153 154 /* Save the filename and line number for later error messages. */ 155 156 if (cur_filename) 157 free(cur_filename); 158 159 file = strchr(yytext, '\"')+1; 160 e = strchr(file, '\"'); 161 *e = '\0'; 162 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 163 cur_line = atoi(yytext+2); 164 165 if (!source_file) { 166 source_file = xstrdup(cur_filename); 167 in_source_file = 1; 168 } else { 169 in_source_file = (strcmp(cur_filename, source_file) == 0); 170 } 171 172 goto repeat; 173 } 174 175 switch (lexstate) 176 { 177 case ST_NORMAL: 178 switch (token) 179 { 180 case IDENT: 181 APP; 182 { 183 int r = is_reserved_word(yytext, yyleng); 184 if (r >= 0) 185 { 186 switch (token = r) 187 { 188 case ATTRIBUTE_KEYW: 189 lexstate = ST_ATTRIBUTE; 190 count = 0; 191 goto repeat; 192 case ASM_KEYW: 193 lexstate = ST_ASM; 194 count = 0; 195 goto repeat; 196 case TYPEOF_KEYW: 197 lexstate = ST_TYPEOF; 198 count = 0; 199 goto repeat; 200 201 case STRUCT_KEYW: 202 case UNION_KEYW: 203 case ENUM_KEYW: 204 dont_want_brace_phrase = 3; 205 suppress_type_lookup = 2; 206 goto fini; 207 208 case EXPORT_SYMBOL_KEYW: 209 goto fini; 210 211 case STATIC_ASSERT_KEYW: 212 lexstate = ST_STATIC_ASSERT; 213 count = 0; 214 goto repeat; 215 } 216 } 217 if (!suppress_type_lookup && !dont_want_type_specifier) 218 { 219 if (find_symbol(yytext, SYM_TYPEDEF, 1)) 220 token = TYPE; 221 } 222 } 223 break; 224 225 case '[': 226 APP; 227 lexstate = ST_BRACKET; 228 count = 1; 229 goto repeat; 230 231 case '{': 232 APP; 233 if (dont_want_brace_phrase) 234 break; 235 lexstate = ST_BRACE; 236 count = 1; 237 goto repeat; 238 239 case '=': case ':': 240 APP; 241 lexstate = ST_EXPRESSION; 242 break; 243 244 default: 245 APP; 246 break; 247 } 248 break; 249 250 case ST_ATTRIBUTE: 251 APP; 252 switch (token) 253 { 254 case '(': 255 ++count; 256 goto repeat; 257 case ')': 258 if (--count == 0) 259 { 260 lexstate = ST_NORMAL; 261 token = ATTRIBUTE_PHRASE; 262 break; 263 } 264 goto repeat; 265 default: 266 goto repeat; 267 } 268 break; 269 270 case ST_ASM: 271 APP; 272 switch (token) 273 { 274 case '(': 275 ++count; 276 goto repeat; 277 case ')': 278 if (--count == 0) 279 { 280 lexstate = ST_NORMAL; 281 token = ASM_PHRASE; 282 break; 283 } 284 goto repeat; 285 default: 286 goto repeat; 287 } 288 break; 289 290 case ST_TYPEOF_1: 291 if (token == IDENT) 292 { 293 if (is_reserved_word(yytext, yyleng) >= 0 294 || find_symbol(yytext, SYM_TYPEDEF, 1)) 295 { 296 yyless(0); 297 unput('('); 298 lexstate = ST_NORMAL; 299 token = TYPEOF_KEYW; 300 break; 301 } 302 _APP("(", 1); 303 } 304 lexstate = ST_TYPEOF; 305 /* FALLTHRU */ 306 307 case ST_TYPEOF: 308 switch (token) 309 { 310 case '(': 311 if ( ++count == 1 ) 312 lexstate = ST_TYPEOF_1; 313 else 314 APP; 315 goto repeat; 316 case ')': 317 APP; 318 if (--count == 0) 319 { 320 lexstate = ST_NORMAL; 321 token = TYPEOF_PHRASE; 322 break; 323 } 324 goto repeat; 325 default: 326 APP; 327 goto repeat; 328 } 329 break; 330 331 case ST_BRACKET: 332 APP; 333 switch (token) 334 { 335 case '[': 336 ++count; 337 goto repeat; 338 case ']': 339 if (--count == 0) 340 { 341 lexstate = ST_NORMAL; 342 token = BRACKET_PHRASE; 343 break; 344 } 345 goto repeat; 346 default: 347 goto repeat; 348 } 349 break; 350 351 case ST_BRACE: 352 APP; 353 switch (token) 354 { 355 case '{': 356 ++count; 357 goto repeat; 358 case '}': 359 if (--count == 0) 360 { 361 lexstate = ST_NORMAL; 362 token = BRACE_PHRASE; 363 break; 364 } 365 goto repeat; 366 default: 367 goto repeat; 368 } 369 break; 370 371 case ST_EXPRESSION: 372 switch (token) 373 { 374 case '(': case '[': case '{': 375 ++count; 376 APP; 377 goto repeat; 378 case '}': 379 /* is this the last line of an enum declaration? */ 380 if (count == 0) 381 { 382 /* Put back the token we just read so's we can find it again 383 after registering the expression. */ 384 unput(token); 385 386 lexstate = ST_NORMAL; 387 token = EXPRESSION_PHRASE; 388 break; 389 } 390 /* FALLTHRU */ 391 case ')': case ']': 392 --count; 393 APP; 394 goto repeat; 395 case ',': case ';': 396 if (count == 0) 397 { 398 /* Put back the token we just read so's we can find it again 399 after registering the expression. */ 400 unput(token); 401 402 lexstate = ST_NORMAL; 403 token = EXPRESSION_PHRASE; 404 break; 405 } 406 APP; 407 goto repeat; 408 default: 409 APP; 410 goto repeat; 411 } 412 break; 413 414 case ST_STATIC_ASSERT: 415 APP; 416 switch (token) 417 { 418 case '(': 419 ++count; 420 goto repeat; 421 case ')': 422 if (--count == 0) 423 { 424 lexstate = ST_NORMAL; 425 token = STATIC_ASSERT_PHRASE; 426 break; 427 } 428 goto repeat; 429 default: 430 goto repeat; 431 } 432 break; 433 434 default: 435 exit(1); 436 } 437 fini: 438 439 if (suppress_type_lookup > 0) 440 --suppress_type_lookup; 441 if (dont_want_brace_phrase > 0) 442 --dont_want_brace_phrase; 443 444 yylval = &next_node->next; 445 446 return token; 447 } 448