1 /* Lexical analysis for genksyms. 2 Copyright 1996, 1997 Linux International. 3 4 New implementation contributed by Richard Henderson <rth@tamu.edu> 5 Based on original work by Bjorn Ekwall <bj0rn@blox.se> 6 7 Taken from Linux modutils 2.4.22. 8 9 This program is free software; you can redistribute it and/or modify it 10 under the terms of the GNU General Public License as published by the 11 Free Software Foundation; either version 2 of the License, or (at your 12 option) any later version. 13 14 This program is distributed in the hope that it will be useful, but 15 WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program; if not, write to the Free Software Foundation, 21 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 22 23 24 %{ 25 26 #include <limits.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <ctype.h> 30 31 #include "genksyms.h" 32 #include "parse.h" 33 34 /* We've got a two-level lexer here. We let flex do basic tokenization 35 and then we categorize those basic tokens in the second stage. */ 36 #define YY_DECL static int yylex1(void) 37 38 %} 39 40 IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 41 42 O_INT 0[0-7]* 43 D_INT [1-9][0-9]* 44 X_INT 0[Xx][0-9A-Fa-f]+ 45 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 46 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 47 48 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 49 EXP [Ee][+-]?[0-9]+ 50 F_SUF [FfLl] 51 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 52 53 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 54 CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 55 56 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 57 58 /* Version 2 checksumming does proper tokenization; version 1 wasn't 59 quite so pedantic. */ 60 %s V2_TOKENS 61 62 /* We don't do multiple input files. */ 63 %option noyywrap 64 65 %option noinput 66 67 %% 68 69 70 /* Keep track of our location in the original source files. */ 71 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 72 ^#.*\n cur_line++; 73 \n cur_line++; 74 75 /* Ignore all other whitespace. */ 76 [ \t\f\v\r]+ ; 77 78 79 {STRING} return STRING; 80 {CHAR} return CHAR; 81 {IDENT} return IDENT; 82 83 /* The Pedant requires that the other C multi-character tokens be 84 recognized as tokens. We don't actually use them since we don't 85 parse expressions, but we do want whitespace to be arranged 86 around them properly. */ 87 <V2_TOKENS>{MC_TOKEN} return OTHER; 88 <V2_TOKENS>{INT} return INT; 89 <V2_TOKENS>{REAL} return REAL; 90 91 "..." return DOTS; 92 93 /* All other tokens are single characters. */ 94 . return yytext[0]; 95 96 97 %% 98 99 /* Bring in the keyword recognizer. */ 100 101 #include "keywords.c" 102 103 104 /* Macros to append to our phrase collection list. */ 105 106 #define _APP(T,L) do { \ 107 cur_node = next_node; \ 108 next_node = xmalloc(sizeof(*next_node)); \ 109 next_node->next = cur_node; \ 110 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 111 cur_node->tag = SYM_NORMAL; \ 112 } while (0) 113 114 #define APP _APP(yytext, yyleng) 115 116 117 /* The second stage lexer. Here we incorporate knowledge of the state 118 of the parser to tailor the tokens that are returned. */ 119 120 int 121 yylex(void) 122 { 123 static enum { 124 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE, 125 ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, 126 ST_TABLE_5, ST_TABLE_6 127 } lexstate = ST_NOTSTARTED; 128 129 static int suppress_type_lookup, dont_want_brace_phrase; 130 static struct string_list *next_node; 131 132 int token, count = 0; 133 struct string_list *cur_node; 134 135 if (lexstate == ST_NOTSTARTED) 136 { 137 BEGIN(V2_TOKENS); 138 next_node = xmalloc(sizeof(*next_node)); 139 next_node->next = NULL; 140 lexstate = ST_NORMAL; 141 } 142 143 repeat: 144 token = yylex1(); 145 146 if (token == 0) 147 return 0; 148 else if (token == FILENAME) 149 { 150 char *file, *e; 151 152 /* Save the filename and line number for later error messages. */ 153 154 if (cur_filename) 155 free(cur_filename); 156 157 file = strchr(yytext, '\"')+1; 158 e = strchr(file, '\"'); 159 *e = '\0'; 160 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 161 cur_line = atoi(yytext+2); 162 163 goto repeat; 164 } 165 166 switch (lexstate) 167 { 168 case ST_NORMAL: 169 switch (token) 170 { 171 case IDENT: 172 APP; 173 { 174 const struct resword *r = is_reserved_word(yytext, yyleng); 175 if (r) 176 { 177 switch (token = r->token) 178 { 179 case ATTRIBUTE_KEYW: 180 lexstate = ST_ATTRIBUTE; 181 count = 0; 182 goto repeat; 183 case ASM_KEYW: 184 lexstate = ST_ASM; 185 count = 0; 186 goto repeat; 187 188 case STRUCT_KEYW: 189 case UNION_KEYW: 190 dont_want_brace_phrase = 3; 191 case ENUM_KEYW: 192 suppress_type_lookup = 2; 193 goto fini; 194 195 case EXPORT_SYMBOL_KEYW: 196 goto fini; 197 } 198 } 199 if (!suppress_type_lookup) 200 { 201 struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF); 202 if (sym && sym->type == SYM_TYPEDEF) 203 token = TYPE; 204 } 205 } 206 break; 207 208 case '[': 209 APP; 210 lexstate = ST_BRACKET; 211 count = 1; 212 goto repeat; 213 214 case '{': 215 APP; 216 if (dont_want_brace_phrase) 217 break; 218 lexstate = ST_BRACE; 219 count = 1; 220 goto repeat; 221 222 case '=': case ':': 223 APP; 224 lexstate = ST_EXPRESSION; 225 break; 226 227 case DOTS: 228 default: 229 APP; 230 break; 231 } 232 break; 233 234 case ST_ATTRIBUTE: 235 APP; 236 switch (token) 237 { 238 case '(': 239 ++count; 240 goto repeat; 241 case ')': 242 if (--count == 0) 243 { 244 lexstate = ST_NORMAL; 245 token = ATTRIBUTE_PHRASE; 246 break; 247 } 248 goto repeat; 249 default: 250 goto repeat; 251 } 252 break; 253 254 case ST_ASM: 255 APP; 256 switch (token) 257 { 258 case '(': 259 ++count; 260 goto repeat; 261 case ')': 262 if (--count == 0) 263 { 264 lexstate = ST_NORMAL; 265 token = ASM_PHRASE; 266 break; 267 } 268 goto repeat; 269 default: 270 goto repeat; 271 } 272 break; 273 274 case ST_BRACKET: 275 APP; 276 switch (token) 277 { 278 case '[': 279 ++count; 280 goto repeat; 281 case ']': 282 if (--count == 0) 283 { 284 lexstate = ST_NORMAL; 285 token = BRACKET_PHRASE; 286 break; 287 } 288 goto repeat; 289 default: 290 goto repeat; 291 } 292 break; 293 294 case ST_BRACE: 295 APP; 296 switch (token) 297 { 298 case '{': 299 ++count; 300 goto repeat; 301 case '}': 302 if (--count == 0) 303 { 304 lexstate = ST_NORMAL; 305 token = BRACE_PHRASE; 306 break; 307 } 308 goto repeat; 309 default: 310 goto repeat; 311 } 312 break; 313 314 case ST_EXPRESSION: 315 switch (token) 316 { 317 case '(': case '[': case '{': 318 ++count; 319 APP; 320 goto repeat; 321 case ')': case ']': case '}': 322 --count; 323 APP; 324 goto repeat; 325 case ',': case ';': 326 if (count == 0) 327 { 328 /* Put back the token we just read so's we can find it again 329 after registering the expression. */ 330 unput(token); 331 332 lexstate = ST_NORMAL; 333 token = EXPRESSION_PHRASE; 334 break; 335 } 336 APP; 337 goto repeat; 338 default: 339 APP; 340 goto repeat; 341 } 342 break; 343 344 case ST_TABLE_1: 345 goto repeat; 346 347 case ST_TABLE_2: 348 if (token == IDENT && yyleng == 1 && yytext[0] == 'X') 349 { 350 token = EXPORT_SYMBOL_KEYW; 351 lexstate = ST_TABLE_5; 352 APP; 353 break; 354 } 355 lexstate = ST_TABLE_6; 356 /* FALLTHRU */ 357 358 case ST_TABLE_6: 359 switch (token) 360 { 361 case '{': case '[': case '(': 362 ++count; 363 break; 364 case '}': case ']': case ')': 365 --count; 366 break; 367 case ',': 368 if (count == 0) 369 lexstate = ST_TABLE_2; 370 break; 371 }; 372 goto repeat; 373 374 case ST_TABLE_3: 375 goto repeat; 376 377 case ST_TABLE_4: 378 if (token == ';') 379 lexstate = ST_NORMAL; 380 goto repeat; 381 382 case ST_TABLE_5: 383 switch (token) 384 { 385 case ',': 386 token = ';'; 387 lexstate = ST_TABLE_2; 388 APP; 389 break; 390 default: 391 APP; 392 break; 393 } 394 break; 395 396 default: 397 exit(1); 398 } 399 fini: 400 401 if (suppress_type_lookup > 0) 402 --suppress_type_lookup; 403 if (dont_want_brace_phrase > 0) 404 --dont_want_brace_phrase; 405 406 yylval = &next_node->next; 407 408 return token; 409 } 410