xref: /linux/scripts/genksyms/lex.l (revision d524dac9279b6a41ffdf7ff7958c577f2e387db6)
1 /* Lexical analysis for genksyms.
2    Copyright 1996, 1997 Linux International.
3 
4    New implementation contributed by Richard Henderson <rth@tamu.edu>
5    Based on original work by Bjorn Ekwall <bj0rn@blox.se>
6 
7    Taken from Linux modutils 2.4.22.
8 
9    This program is free software; you can redistribute it and/or modify it
10    under the terms of the GNU General Public License as published by the
11    Free Software Foundation; either version 2 of the License, or (at your
12    option) any later version.
13 
14    This program is distributed in the hope that it will be useful, but
15    WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17    General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software Foundation,
21    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22 
23 
24 %{
25 
26 #include <limits.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <ctype.h>
30 
31 #include "genksyms.h"
32 #include "parse.h"
33 
34 /* We've got a two-level lexer here.  We let flex do basic tokenization
35    and then we categorize those basic tokens in the second stage.  */
36 #define YY_DECL		static int yylex1(void)
37 
38 %}
39 
40 IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*
41 
42 O_INT			0[0-7]*
43 D_INT			[1-9][0-9]*
44 X_INT			0[Xx][0-9A-Fa-f]+
45 I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
46 INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?
47 
48 FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
49 EXP			[Ee][+-]?[0-9]+
50 F_SUF			[FfLl]
51 REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
52 
53 STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
54 CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'
55 
56 MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
57 
58 /* Version 2 checksumming does proper tokenization; version 1 wasn't
59    quite so pedantic.  */
60 %s V2_TOKENS
61 
62 /* We don't do multiple input files.  */
63 %option noyywrap
64 
65 %option noinput
66 
67 %%
68 
69 
70  /* Keep track of our location in the original source files.  */
71 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
72 ^#.*\n					cur_line++;
73 \n					cur_line++;
74 
75  /* Ignore all other whitespace.  */
76 [ \t\f\v\r]+				;
77 
78 
79 {STRING}				return STRING;
80 {CHAR}					return CHAR;
81 {IDENT}					return IDENT;
82 
83  /* The Pedant requires that the other C multi-character tokens be
84     recognized as tokens.  We don't actually use them since we don't
85     parse expressions, but we do want whitespace to be arranged
86     around them properly.  */
87 <V2_TOKENS>{MC_TOKEN}			return OTHER;
88 <V2_TOKENS>{INT}			return INT;
89 <V2_TOKENS>{REAL}			return REAL;
90 
91 "..."					return DOTS;
92 
93  /* All other tokens are single characters.  */
94 .					return yytext[0];
95 
96 
97 %%
98 
99 /* Bring in the keyword recognizer.  */
100 
101 #include "keywords.c"
102 
103 
104 /* Macros to append to our phrase collection list.  */
105 
106 #define _APP(T,L)	do {						   \
107 			  cur_node = next_node;				   \
108 			  next_node = xmalloc(sizeof(*next_node));	   \
109 			  next_node->next = cur_node;			   \
110 			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
111 			  cur_node->tag = SYM_NORMAL;			   \
112 			} while (0)
113 
114 #define APP		_APP(yytext, yyleng)
115 
116 
117 /* The second stage lexer.  Here we incorporate knowledge of the state
118    of the parser to tailor the tokens that are returned.  */
119 
120 int
121 yylex(void)
122 {
123   static enum {
124     ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
125     ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
126     ST_TABLE_5, ST_TABLE_6
127   } lexstate = ST_NOTSTARTED;
128 
129   static int suppress_type_lookup, dont_want_brace_phrase;
130   static struct string_list *next_node;
131 
132   int token, count = 0;
133   struct string_list *cur_node;
134 
135   if (lexstate == ST_NOTSTARTED)
136     {
137       BEGIN(V2_TOKENS);
138       next_node = xmalloc(sizeof(*next_node));
139       next_node->next = NULL;
140       lexstate = ST_NORMAL;
141     }
142 
143 repeat:
144   token = yylex1();
145 
146   if (token == 0)
147     return 0;
148   else if (token == FILENAME)
149     {
150       char *file, *e;
151 
152       /* Save the filename and line number for later error messages.  */
153 
154       if (cur_filename)
155 	free(cur_filename);
156 
157       file = strchr(yytext, '\"')+1;
158       e = strchr(file, '\"');
159       *e = '\0';
160       cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
161       cur_line = atoi(yytext+2);
162 
163       goto repeat;
164     }
165 
166   switch (lexstate)
167     {
168     case ST_NORMAL:
169       switch (token)
170 	{
171 	case IDENT:
172 	  APP;
173 	  {
174 	    const struct resword *r = is_reserved_word(yytext, yyleng);
175 	    if (r)
176 	      {
177 		switch (token = r->token)
178 		  {
179 		  case ATTRIBUTE_KEYW:
180 		    lexstate = ST_ATTRIBUTE;
181 		    count = 0;
182 		    goto repeat;
183 		  case ASM_KEYW:
184 		    lexstate = ST_ASM;
185 		    count = 0;
186 		    goto repeat;
187 
188 		  case STRUCT_KEYW:
189 		  case UNION_KEYW:
190 		    dont_want_brace_phrase = 3;
191 		  case ENUM_KEYW:
192 		    suppress_type_lookup = 2;
193 		    goto fini;
194 
195 		  case EXPORT_SYMBOL_KEYW:
196 		      goto fini;
197 		  }
198 	      }
199 	    if (!suppress_type_lookup)
200 	      {
201 		struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF);
202 		if (sym && sym->type == SYM_TYPEDEF)
203 		  token = TYPE;
204 	      }
205 	  }
206 	  break;
207 
208 	case '[':
209 	  APP;
210 	  lexstate = ST_BRACKET;
211 	  count = 1;
212 	  goto repeat;
213 
214 	case '{':
215 	  APP;
216 	  if (dont_want_brace_phrase)
217 	    break;
218 	  lexstate = ST_BRACE;
219 	  count = 1;
220 	  goto repeat;
221 
222 	case '=': case ':':
223 	  APP;
224 	  lexstate = ST_EXPRESSION;
225 	  break;
226 
227 	case DOTS:
228 	default:
229 	  APP;
230 	  break;
231 	}
232       break;
233 
234     case ST_ATTRIBUTE:
235       APP;
236       switch (token)
237 	{
238 	case '(':
239 	  ++count;
240 	  goto repeat;
241 	case ')':
242 	  if (--count == 0)
243 	    {
244 	      lexstate = ST_NORMAL;
245 	      token = ATTRIBUTE_PHRASE;
246 	      break;
247 	    }
248 	  goto repeat;
249 	default:
250 	  goto repeat;
251 	}
252       break;
253 
254     case ST_ASM:
255       APP;
256       switch (token)
257 	{
258 	case '(':
259 	  ++count;
260 	  goto repeat;
261 	case ')':
262 	  if (--count == 0)
263 	    {
264 	      lexstate = ST_NORMAL;
265 	      token = ASM_PHRASE;
266 	      break;
267 	    }
268 	  goto repeat;
269 	default:
270 	  goto repeat;
271 	}
272       break;
273 
274     case ST_BRACKET:
275       APP;
276       switch (token)
277 	{
278 	case '[':
279 	  ++count;
280 	  goto repeat;
281 	case ']':
282 	  if (--count == 0)
283 	    {
284 	      lexstate = ST_NORMAL;
285 	      token = BRACKET_PHRASE;
286 	      break;
287 	    }
288 	  goto repeat;
289 	default:
290 	  goto repeat;
291 	}
292       break;
293 
294     case ST_BRACE:
295       APP;
296       switch (token)
297 	{
298 	case '{':
299 	  ++count;
300 	  goto repeat;
301 	case '}':
302 	  if (--count == 0)
303 	    {
304 	      lexstate = ST_NORMAL;
305 	      token = BRACE_PHRASE;
306 	      break;
307 	    }
308 	  goto repeat;
309 	default:
310 	  goto repeat;
311 	}
312       break;
313 
314     case ST_EXPRESSION:
315       switch (token)
316 	{
317 	case '(': case '[': case '{':
318 	  ++count;
319 	  APP;
320 	  goto repeat;
321 	case ')': case ']': case '}':
322 	  --count;
323 	  APP;
324 	  goto repeat;
325 	case ',': case ';':
326 	  if (count == 0)
327 	    {
328 	      /* Put back the token we just read so's we can find it again
329 		 after registering the expression.  */
330 	      unput(token);
331 
332 	      lexstate = ST_NORMAL;
333 	      token = EXPRESSION_PHRASE;
334 	      break;
335 	    }
336 	  APP;
337 	  goto repeat;
338 	default:
339 	  APP;
340 	  goto repeat;
341 	}
342       break;
343 
344     case ST_TABLE_1:
345       goto repeat;
346 
347     case ST_TABLE_2:
348       if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
349 	{
350 	  token = EXPORT_SYMBOL_KEYW;
351 	  lexstate = ST_TABLE_5;
352 	  APP;
353 	  break;
354 	}
355       lexstate = ST_TABLE_6;
356       /* FALLTHRU */
357 
358     case ST_TABLE_6:
359       switch (token)
360 	{
361 	case '{': case '[': case '(':
362 	  ++count;
363 	  break;
364 	case '}': case ']': case ')':
365 	  --count;
366 	  break;
367 	case ',':
368 	  if (count == 0)
369 	    lexstate = ST_TABLE_2;
370 	  break;
371 	};
372       goto repeat;
373 
374     case ST_TABLE_3:
375       goto repeat;
376 
377     case ST_TABLE_4:
378       if (token == ';')
379 	lexstate = ST_NORMAL;
380       goto repeat;
381 
382     case ST_TABLE_5:
383       switch (token)
384 	{
385 	case ',':
386 	  token = ';';
387 	  lexstate = ST_TABLE_2;
388 	  APP;
389 	  break;
390 	default:
391 	  APP;
392 	  break;
393 	}
394       break;
395 
396     default:
397       exit(1);
398     }
399 fini:
400 
401   if (suppress_type_lookup > 0)
402     --suppress_type_lookup;
403   if (dont_want_brace_phrase > 0)
404     --dont_want_brace_phrase;
405 
406   yylval = &next_node->next;
407 
408   return token;
409 }
410