11da177e4SLinus Torvalds /* Lexical analysis for genksyms. 21da177e4SLinus Torvalds Copyright 1996, 1997 Linux International. 31da177e4SLinus Torvalds 41da177e4SLinus Torvalds New implementation contributed by Richard Henderson <rth@tamu.edu> 51da177e4SLinus Torvalds Based on original work by Bjorn Ekwall <bj0rn@blox.se> 61da177e4SLinus Torvalds 71da177e4SLinus Torvalds Taken from Linux modutils 2.4.22. 81da177e4SLinus Torvalds 91da177e4SLinus Torvalds This program is free software; you can redistribute it and/or modify it 101da177e4SLinus Torvalds under the terms of the GNU General Public License as published by the 111da177e4SLinus Torvalds Free Software Foundation; either version 2 of the License, or (at your 121da177e4SLinus Torvalds option) any later version. 131da177e4SLinus Torvalds 141da177e4SLinus Torvalds This program is distributed in the hope that it will be useful, but 151da177e4SLinus Torvalds WITHOUT ANY WARRANTY; without even the implied warranty of 161da177e4SLinus Torvalds MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 171da177e4SLinus Torvalds General Public License for more details. 181da177e4SLinus Torvalds 191da177e4SLinus Torvalds You should have received a copy of the GNU General Public License 201da177e4SLinus Torvalds along with this program; if not, write to the Free Software Foundation, 211da177e4SLinus Torvalds Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 221da177e4SLinus Torvalds 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds %{ 251da177e4SLinus Torvalds 261da177e4SLinus Torvalds #include <limits.h> 271da177e4SLinus Torvalds #include <stdlib.h> 281da177e4SLinus Torvalds #include <string.h> 291da177e4SLinus Torvalds #include <ctype.h> 301da177e4SLinus Torvalds 311da177e4SLinus Torvalds #include "genksyms.h" 32*880f4499SArnaud Lacombe #include "parse.tab.h" 331da177e4SLinus Torvalds 341da177e4SLinus Torvalds /* We've got a two-level lexer here. We let flex do basic tokenization 351da177e4SLinus Torvalds and then we categorize those basic tokens in the second stage. */ 361da177e4SLinus Torvalds #define YY_DECL static int yylex1(void) 371da177e4SLinus Torvalds 381da177e4SLinus Torvalds %} 391da177e4SLinus Torvalds 401da177e4SLinus Torvalds IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 411da177e4SLinus Torvalds 421da177e4SLinus Torvalds O_INT 0[0-7]* 431da177e4SLinus Torvalds D_INT [1-9][0-9]* 441da177e4SLinus Torvalds X_INT 0[Xx][0-9A-Fa-f]+ 451da177e4SLinus Torvalds I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 461da177e4SLinus Torvalds INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 471da177e4SLinus Torvalds 481da177e4SLinus Torvalds FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 491da177e4SLinus Torvalds EXP [Ee][+-]?[0-9]+ 501da177e4SLinus Torvalds F_SUF [FfLl] 511da177e4SLinus Torvalds REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 521da177e4SLinus Torvalds 531da177e4SLinus Torvalds STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 541da177e4SLinus Torvalds CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 551da177e4SLinus Torvalds 561da177e4SLinus Torvalds MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 571da177e4SLinus Torvalds 581da177e4SLinus Torvalds /* We don't do multiple input files. */ 591da177e4SLinus Torvalds %option noyywrap 601da177e4SLinus Torvalds 6111ddad39SAdrian Bunk %option noinput 6211ddad39SAdrian Bunk 631da177e4SLinus Torvalds %% 641da177e4SLinus Torvalds 651da177e4SLinus Torvalds 661da177e4SLinus Torvalds /* Keep track of our location in the original source files. */ 671da177e4SLinus Torvalds ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 681da177e4SLinus Torvalds ^#.*\n cur_line++; 691da177e4SLinus Torvalds \n cur_line++; 701da177e4SLinus Torvalds 711da177e4SLinus Torvalds /* Ignore all other whitespace. */ 721da177e4SLinus Torvalds [ \t\f\v\r]+ ; 731da177e4SLinus Torvalds 741da177e4SLinus Torvalds 751da177e4SLinus Torvalds {STRING} return STRING; 761da177e4SLinus Torvalds {CHAR} return CHAR; 771da177e4SLinus Torvalds {IDENT} return IDENT; 781da177e4SLinus Torvalds 791da177e4SLinus Torvalds /* The Pedant requires that the other C multi-character tokens be 801da177e4SLinus Torvalds recognized as tokens. We don't actually use them since we don't 811da177e4SLinus Torvalds parse expressions, but we do want whitespace to be arranged 821da177e4SLinus Torvalds around them properly. */ 8395f1d639SMichal Marek {MC_TOKEN} return OTHER; 8495f1d639SMichal Marek {INT} return INT; 8595f1d639SMichal Marek {REAL} return REAL; 861da177e4SLinus Torvalds 871da177e4SLinus Torvalds "..." return DOTS; 881da177e4SLinus Torvalds 891da177e4SLinus Torvalds /* All other tokens are single characters. */ 901da177e4SLinus Torvalds . return yytext[0]; 911da177e4SLinus Torvalds 921da177e4SLinus Torvalds 931da177e4SLinus Torvalds %% 941da177e4SLinus Torvalds 951da177e4SLinus Torvalds /* Bring in the keyword recognizer. */ 961da177e4SLinus Torvalds 97*880f4499SArnaud Lacombe #include "keywords.hash.c" 981da177e4SLinus Torvalds 991da177e4SLinus Torvalds 1001da177e4SLinus Torvalds /* Macros to append to our phrase collection list. */ 1011da177e4SLinus Torvalds 102e37ddb82SMichal Marek /* 103e37ddb82SMichal Marek * We mark any token, that that equals to a known enumerator, as 104e37ddb82SMichal Marek * SYM_ENUM_CONST. The parser will change this for struct and union tags later, 105e37ddb82SMichal Marek * the only problem is struct and union members: 106e37ddb82SMichal Marek * enum e { a, b }; struct s { int a, b; } 107e37ddb82SMichal Marek * but in this case, the only effect will be, that the ABI checksums become 108e37ddb82SMichal Marek * more volatile, which is acceptable. Also, such collisions are quite rare, 109e37ddb82SMichal Marek * so far it was only observed in include/linux/telephony.h. 110e37ddb82SMichal Marek */ 1111da177e4SLinus Torvalds #define _APP(T,L) do { \ 1121da177e4SLinus Torvalds cur_node = next_node; \ 1131da177e4SLinus Torvalds next_node = xmalloc(sizeof(*next_node)); \ 1141da177e4SLinus Torvalds next_node->next = cur_node; \ 1151da177e4SLinus Torvalds cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 116e37ddb82SMichal Marek cur_node->tag = \ 117e37ddb82SMichal Marek find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ 118e37ddb82SMichal Marek SYM_ENUM_CONST : SYM_NORMAL ; \ 1191da177e4SLinus Torvalds } while (0) 1201da177e4SLinus Torvalds 1211da177e4SLinus Torvalds #define APP _APP(yytext, yyleng) 1221da177e4SLinus Torvalds 1231da177e4SLinus Torvalds 1241da177e4SLinus Torvalds /* The second stage lexer. Here we incorporate knowledge of the state 1251da177e4SLinus Torvalds of the parser to tailor the tokens that are returned. */ 1261da177e4SLinus Torvalds 1271da177e4SLinus Torvalds int 1281da177e4SLinus Torvalds yylex(void) 1291da177e4SLinus Torvalds { 1301da177e4SLinus Torvalds static enum { 1311da177e4SLinus Torvalds ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE, 1321da177e4SLinus Torvalds ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, 1331da177e4SLinus Torvalds ST_TABLE_5, ST_TABLE_6 1341da177e4SLinus Torvalds } lexstate = ST_NOTSTARTED; 1351da177e4SLinus Torvalds 1361da177e4SLinus Torvalds static int suppress_type_lookup, dont_want_brace_phrase; 1371da177e4SLinus Torvalds static struct string_list *next_node; 1381da177e4SLinus Torvalds 1391da177e4SLinus Torvalds int token, count = 0; 1401da177e4SLinus Torvalds struct string_list *cur_node; 1411da177e4SLinus Torvalds 1421da177e4SLinus Torvalds if (lexstate == ST_NOTSTARTED) 1431da177e4SLinus Torvalds { 1441da177e4SLinus Torvalds next_node = xmalloc(sizeof(*next_node)); 1451da177e4SLinus Torvalds next_node->next = NULL; 1461da177e4SLinus Torvalds lexstate = ST_NORMAL; 1471da177e4SLinus Torvalds } 1481da177e4SLinus Torvalds 1491da177e4SLinus Torvalds repeat: 1501da177e4SLinus Torvalds token = yylex1(); 1511da177e4SLinus Torvalds 1521da177e4SLinus Torvalds if (token == 0) 1531da177e4SLinus Torvalds return 0; 1541da177e4SLinus Torvalds else if (token == FILENAME) 1551da177e4SLinus Torvalds { 1561da177e4SLinus Torvalds char *file, *e; 1571da177e4SLinus Torvalds 1581da177e4SLinus Torvalds /* Save the filename and line number for later error messages. */ 1591da177e4SLinus Torvalds 1601da177e4SLinus Torvalds if (cur_filename) 1611da177e4SLinus Torvalds free(cur_filename); 1621da177e4SLinus Torvalds 1631da177e4SLinus Torvalds file = strchr(yytext, '\"')+1; 1641da177e4SLinus Torvalds e = strchr(file, '\"'); 1651da177e4SLinus Torvalds *e = '\0'; 1661da177e4SLinus Torvalds cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 1671da177e4SLinus Torvalds cur_line = atoi(yytext+2); 1681da177e4SLinus Torvalds 1691da177e4SLinus Torvalds goto repeat; 1701da177e4SLinus Torvalds } 1711da177e4SLinus Torvalds 1721da177e4SLinus Torvalds switch (lexstate) 1731da177e4SLinus Torvalds { 1741da177e4SLinus Torvalds case ST_NORMAL: 1751da177e4SLinus Torvalds switch (token) 1761da177e4SLinus Torvalds { 1771da177e4SLinus Torvalds case IDENT: 1781da177e4SLinus Torvalds APP; 1791da177e4SLinus Torvalds { 1801da177e4SLinus Torvalds const struct resword *r = is_reserved_word(yytext, yyleng); 1811da177e4SLinus Torvalds if (r) 1821da177e4SLinus Torvalds { 1831da177e4SLinus Torvalds switch (token = r->token) 1841da177e4SLinus Torvalds { 1851da177e4SLinus Torvalds case ATTRIBUTE_KEYW: 1861da177e4SLinus Torvalds lexstate = ST_ATTRIBUTE; 1871da177e4SLinus Torvalds count = 0; 1881da177e4SLinus Torvalds goto repeat; 1891da177e4SLinus Torvalds case ASM_KEYW: 1901da177e4SLinus Torvalds lexstate = ST_ASM; 1911da177e4SLinus Torvalds count = 0; 1921da177e4SLinus Torvalds goto repeat; 1931da177e4SLinus Torvalds 1941da177e4SLinus Torvalds case STRUCT_KEYW: 1951da177e4SLinus Torvalds case UNION_KEYW: 1961da177e4SLinus Torvalds case ENUM_KEYW: 197e37ddb82SMichal Marek dont_want_brace_phrase = 3; 1981da177e4SLinus Torvalds suppress_type_lookup = 2; 1991da177e4SLinus Torvalds goto fini; 2001da177e4SLinus Torvalds 2011da177e4SLinus Torvalds case EXPORT_SYMBOL_KEYW: 2021da177e4SLinus Torvalds goto fini; 2031da177e4SLinus Torvalds } 2041da177e4SLinus Torvalds } 2051da177e4SLinus Torvalds if (!suppress_type_lookup) 2061da177e4SLinus Torvalds { 20701762c4eSMichal Marek if (find_symbol(yytext, SYM_TYPEDEF, 1)) 2081da177e4SLinus Torvalds token = TYPE; 2091da177e4SLinus Torvalds } 2101da177e4SLinus Torvalds } 2111da177e4SLinus Torvalds break; 2121da177e4SLinus Torvalds 2131da177e4SLinus Torvalds case '[': 2141da177e4SLinus Torvalds APP; 2151da177e4SLinus Torvalds lexstate = ST_BRACKET; 2161da177e4SLinus Torvalds count = 1; 2171da177e4SLinus Torvalds goto repeat; 2181da177e4SLinus Torvalds 2191da177e4SLinus Torvalds case '{': 2201da177e4SLinus Torvalds APP; 2211da177e4SLinus Torvalds if (dont_want_brace_phrase) 2221da177e4SLinus Torvalds break; 2231da177e4SLinus Torvalds lexstate = ST_BRACE; 2241da177e4SLinus Torvalds count = 1; 2251da177e4SLinus Torvalds goto repeat; 2261da177e4SLinus Torvalds 2271da177e4SLinus Torvalds case '=': case ':': 2281da177e4SLinus Torvalds APP; 2291da177e4SLinus Torvalds lexstate = ST_EXPRESSION; 2301da177e4SLinus Torvalds break; 2311da177e4SLinus Torvalds 2321da177e4SLinus Torvalds case DOTS: 2331da177e4SLinus Torvalds default: 2341da177e4SLinus Torvalds APP; 2351da177e4SLinus Torvalds break; 2361da177e4SLinus Torvalds } 2371da177e4SLinus Torvalds break; 2381da177e4SLinus Torvalds 2391da177e4SLinus Torvalds case ST_ATTRIBUTE: 2401da177e4SLinus Torvalds APP; 2411da177e4SLinus Torvalds switch (token) 2421da177e4SLinus Torvalds { 2431da177e4SLinus Torvalds case '(': 2441da177e4SLinus Torvalds ++count; 2451da177e4SLinus Torvalds goto repeat; 2461da177e4SLinus Torvalds case ')': 2471da177e4SLinus Torvalds if (--count == 0) 2481da177e4SLinus Torvalds { 2491da177e4SLinus Torvalds lexstate = ST_NORMAL; 2501da177e4SLinus Torvalds token = ATTRIBUTE_PHRASE; 2511da177e4SLinus Torvalds break; 2521da177e4SLinus Torvalds } 2531da177e4SLinus Torvalds goto repeat; 2541da177e4SLinus Torvalds default: 2551da177e4SLinus Torvalds goto repeat; 2561da177e4SLinus Torvalds } 2571da177e4SLinus Torvalds break; 2581da177e4SLinus Torvalds 2591da177e4SLinus Torvalds case ST_ASM: 2601da177e4SLinus Torvalds APP; 2611da177e4SLinus Torvalds switch (token) 2621da177e4SLinus Torvalds { 2631da177e4SLinus Torvalds case '(': 2641da177e4SLinus Torvalds ++count; 2651da177e4SLinus Torvalds goto repeat; 2661da177e4SLinus Torvalds case ')': 2671da177e4SLinus Torvalds if (--count == 0) 2681da177e4SLinus Torvalds { 2691da177e4SLinus Torvalds lexstate = ST_NORMAL; 2701da177e4SLinus Torvalds token = ASM_PHRASE; 2711da177e4SLinus Torvalds break; 2721da177e4SLinus Torvalds } 2731da177e4SLinus Torvalds goto repeat; 2741da177e4SLinus Torvalds default: 2751da177e4SLinus Torvalds goto repeat; 2761da177e4SLinus Torvalds } 2771da177e4SLinus Torvalds break; 2781da177e4SLinus Torvalds 2791da177e4SLinus Torvalds case ST_BRACKET: 2801da177e4SLinus Torvalds APP; 2811da177e4SLinus Torvalds switch (token) 2821da177e4SLinus Torvalds { 2831da177e4SLinus Torvalds case '[': 2841da177e4SLinus Torvalds ++count; 2851da177e4SLinus Torvalds goto repeat; 2861da177e4SLinus Torvalds case ']': 2871da177e4SLinus Torvalds if (--count == 0) 2881da177e4SLinus Torvalds { 2891da177e4SLinus Torvalds lexstate = ST_NORMAL; 2901da177e4SLinus Torvalds token = BRACKET_PHRASE; 2911da177e4SLinus Torvalds break; 2921da177e4SLinus Torvalds } 2931da177e4SLinus Torvalds goto repeat; 2941da177e4SLinus Torvalds default: 2951da177e4SLinus Torvalds goto repeat; 2961da177e4SLinus Torvalds } 2971da177e4SLinus Torvalds break; 2981da177e4SLinus Torvalds 2991da177e4SLinus Torvalds case ST_BRACE: 3001da177e4SLinus Torvalds APP; 3011da177e4SLinus Torvalds switch (token) 3021da177e4SLinus Torvalds { 3031da177e4SLinus Torvalds case '{': 3041da177e4SLinus Torvalds ++count; 3051da177e4SLinus Torvalds goto repeat; 3061da177e4SLinus Torvalds case '}': 3071da177e4SLinus Torvalds if (--count == 0) 3081da177e4SLinus Torvalds { 3091da177e4SLinus Torvalds lexstate = ST_NORMAL; 3101da177e4SLinus Torvalds token = BRACE_PHRASE; 3111da177e4SLinus Torvalds break; 3121da177e4SLinus Torvalds } 3131da177e4SLinus Torvalds goto repeat; 3141da177e4SLinus Torvalds default: 3151da177e4SLinus Torvalds goto repeat; 3161da177e4SLinus Torvalds } 3171da177e4SLinus Torvalds break; 3181da177e4SLinus Torvalds 3191da177e4SLinus Torvalds case ST_EXPRESSION: 3201da177e4SLinus Torvalds switch (token) 3211da177e4SLinus Torvalds { 3221da177e4SLinus Torvalds case '(': case '[': case '{': 3231da177e4SLinus Torvalds ++count; 3241da177e4SLinus Torvalds APP; 3251da177e4SLinus Torvalds goto repeat; 326e37ddb82SMichal Marek case '}': 327e37ddb82SMichal Marek /* is this the last line of an enum declaration? */ 328e37ddb82SMichal Marek if (count == 0) 329e37ddb82SMichal Marek { 330e37ddb82SMichal Marek /* Put back the token we just read so's we can find it again 331e37ddb82SMichal Marek after registering the expression. */ 332e37ddb82SMichal Marek unput(token); 333e37ddb82SMichal Marek 334e37ddb82SMichal Marek lexstate = ST_NORMAL; 335e37ddb82SMichal Marek token = EXPRESSION_PHRASE; 336e37ddb82SMichal Marek break; 337e37ddb82SMichal Marek } 338e37ddb82SMichal Marek /* FALLTHRU */ 339e37ddb82SMichal Marek case ')': case ']': 3401da177e4SLinus Torvalds --count; 3411da177e4SLinus Torvalds APP; 3421da177e4SLinus Torvalds goto repeat; 3431da177e4SLinus Torvalds case ',': case ';': 3441da177e4SLinus Torvalds if (count == 0) 3451da177e4SLinus Torvalds { 3461da177e4SLinus Torvalds /* Put back the token we just read so's we can find it again 3471da177e4SLinus Torvalds after registering the expression. */ 3481da177e4SLinus Torvalds unput(token); 3491da177e4SLinus Torvalds 3501da177e4SLinus Torvalds lexstate = ST_NORMAL; 3511da177e4SLinus Torvalds token = EXPRESSION_PHRASE; 3521da177e4SLinus Torvalds break; 3531da177e4SLinus Torvalds } 3541da177e4SLinus Torvalds APP; 3551da177e4SLinus Torvalds goto repeat; 3561da177e4SLinus Torvalds default: 3571da177e4SLinus Torvalds APP; 3581da177e4SLinus Torvalds goto repeat; 3591da177e4SLinus Torvalds } 3601da177e4SLinus Torvalds break; 3611da177e4SLinus Torvalds 3621da177e4SLinus Torvalds case ST_TABLE_1: 3631da177e4SLinus Torvalds goto repeat; 3641da177e4SLinus Torvalds 3651da177e4SLinus Torvalds case ST_TABLE_2: 3661da177e4SLinus Torvalds if (token == IDENT && yyleng == 1 && yytext[0] == 'X') 3671da177e4SLinus Torvalds { 3681da177e4SLinus Torvalds token = EXPORT_SYMBOL_KEYW; 3691da177e4SLinus Torvalds lexstate = ST_TABLE_5; 3701da177e4SLinus Torvalds APP; 3711da177e4SLinus Torvalds break; 3721da177e4SLinus Torvalds } 3731da177e4SLinus Torvalds lexstate = ST_TABLE_6; 3741da177e4SLinus Torvalds /* FALLTHRU */ 3751da177e4SLinus Torvalds 3761da177e4SLinus Torvalds case ST_TABLE_6: 3771da177e4SLinus Torvalds switch (token) 3781da177e4SLinus Torvalds { 3791da177e4SLinus Torvalds case '{': case '[': case '(': 3801da177e4SLinus Torvalds ++count; 3811da177e4SLinus Torvalds break; 3821da177e4SLinus Torvalds case '}': case ']': case ')': 3831da177e4SLinus Torvalds --count; 3841da177e4SLinus Torvalds break; 3851da177e4SLinus Torvalds case ',': 3861da177e4SLinus Torvalds if (count == 0) 3871da177e4SLinus Torvalds lexstate = ST_TABLE_2; 3881da177e4SLinus Torvalds break; 3891da177e4SLinus Torvalds }; 3901da177e4SLinus Torvalds goto repeat; 3911da177e4SLinus Torvalds 3921da177e4SLinus Torvalds case ST_TABLE_3: 3931da177e4SLinus Torvalds goto repeat; 3941da177e4SLinus Torvalds 3951da177e4SLinus Torvalds case ST_TABLE_4: 3961da177e4SLinus Torvalds if (token == ';') 3971da177e4SLinus Torvalds lexstate = ST_NORMAL; 3981da177e4SLinus Torvalds goto repeat; 3991da177e4SLinus Torvalds 4001da177e4SLinus Torvalds case ST_TABLE_5: 4011da177e4SLinus Torvalds switch (token) 4021da177e4SLinus Torvalds { 4031da177e4SLinus Torvalds case ',': 4041da177e4SLinus Torvalds token = ';'; 4051da177e4SLinus Torvalds lexstate = ST_TABLE_2; 4061da177e4SLinus Torvalds APP; 4071da177e4SLinus Torvalds break; 4081da177e4SLinus Torvalds default: 4091da177e4SLinus Torvalds APP; 4101da177e4SLinus Torvalds break; 4111da177e4SLinus Torvalds } 4121da177e4SLinus Torvalds break; 4131da177e4SLinus Torvalds 4141da177e4SLinus Torvalds default: 4156803dc0eSSam Ravnborg exit(1); 4161da177e4SLinus Torvalds } 4171da177e4SLinus Torvalds fini: 4181da177e4SLinus Torvalds 4191da177e4SLinus Torvalds if (suppress_type_lookup > 0) 4201da177e4SLinus Torvalds --suppress_type_lookup; 4211da177e4SLinus Torvalds if (dont_want_brace_phrase > 0) 4221da177e4SLinus Torvalds --dont_want_brace_phrase; 4231da177e4SLinus Torvalds 4241da177e4SLinus Torvalds yylval = &next_node->next; 4251da177e4SLinus Torvalds 4261da177e4SLinus Torvalds return token; 4271da177e4SLinus Torvalds } 428