xref: /illumos-gate/usr/src/tools/cscope-fast/scanner.l (revision b1e2e3fb17324e9ddf43db264a0c64da7756d9e6)
1 %{
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License, Version 1.0 only
7  * (the "License").  You may not use this file except in compliance
8  * with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  *
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1988 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  *	cscope - interactive C symbol cross-reference
32  *
33  *
34  *	C symbol scanner
35  */
36 #ident	"@(#)scanner.l	1.2	93/06/07 SMI"
37 #include "global.h"
38 
39 /* the line counting has been moved from character reading for speed */
40 /* comments are discarded */
41 #undef	input
42 #define	input() \
43 	((yytchar = (yytchar = yysptr > yysbuf ? \
44 	    *--yysptr : getc(yyin)) == '/' ? comment() : yytchar) == \
45 	    EOF ? 0 : toascii(yytchar))
46 #define	noncommentinput() \
47 	((yytchar = yysptr > yysbuf ? *--yysptr : getc(yyin)) == \
48 	    EOF ? 0 : yytchar)
49 #undef	unput
50 #define	unput(c) (*yysptr++ = (c))
51 
52 /* not a preprocessor line (allow Ingres(TM) "## char var;" lines) */
53 #define	notpp()	(ppdefine == NO && (*yytext != '#' || yytext[1] == '#'))
54 
55 #define	IFLEVELINC	5	/* #if nesting level size increment */
56 
57 /* keyword text for fast testing of keywords in the scanner */
58 extern	char	externtext[];
59 extern	char	typedeftext[];
60 
61 int	first;	/* buffer index for first char of symbol */
62 int	last;	/* buffer index for last char of symbol */
63 int	lineno;	/* symbol line number */
64 
65 static	BOOL	arraydimension;		/* inside array dimension declaration */
66 static	BOOL	bplisting;		/* breakpoint listing */
67 static	int	braces;			/* unmatched left brace count */
68 static	int	cesudeftoken;		/* class/enum/struct/union definition */
69 static	BOOL	classdef;		/* c++ class definition */
70 static	BOOL	elseelif;		/* #else or #elif found */
71 static	BOOL	esudef;			/* enum/struct/union definition */
72 static	int	esubraces;		/* outermost enum/struct/union */
73 					/* brace count */
74 static	BOOL	externdec;		/* extern declaration */
75 static	BOOL	fcndef;			/* function definition */
76 static	BOOL	globalscope;		/* file global scope */
77 					/* (outside functions) */
78 static	int	iflevel;		/* #if nesting level */
79 static	BOOL	initializer;		/* data initializer */
80 static	int	initializerbraces;	/* data initializer outer brace count */
81 static	BOOL	lex;			/* lex file */
82 static	BOOL	localdef;		/* function/block local definition */
83 static	int	miflevel = IFLEVELINC;	/* maximum #if nesting level */
84 static	int	*maxifbraces;		/* maximum brace count within #if */
85 static	int	*preifbraces;		/* brace count before #if */
86 static	int	parens;			/* unmatched left parenthesis count */
87 static	BOOL	ppdefine;		/* preprocessor define statement */
88 static	BOOL	psuedoelif;		/* psuedo-#elif */
89 static	BOOL	oldtype;		/* next identifier is an old type */
90 static	BOOL	rules;			/* lex/yacc rules */
91 static	BOOL	sdl;			/* SDL file */
92 static	BOOL	structfield;		/* structure field declaration */
93 static	BOOL	template;		/* function template */
94 static	int	templateparens;	/* function template outer parentheses count */
95 static	BOOL	typedefdef;	/* typedef name definition */
96 static	BOOL	typedefname;	/* typedef name use */
97 static	int	token;		/* token found */
98 
99 static	BOOL	asy;			/* assembly file */
100 
101 void multicharconstant(char terminator);
102 int do_assembly(int token);
103 %}
104 identifier	[a-zA-Z_][a-zA-Z_0-9]*
105 number		\.?[0-9][.0-9a-fA-FlLuUxX]*
106 %start SDL
107 %a 6000
108 %o 11000
109 %p 3000
110 %%
111 %\{		{	/* lex/yacc C declarations/definitions */
112 			globalscope = YES;
113 			goto more;
114 			/* NOTREACHED */
115 		}
116 %\}		{
117 			globalscope = NO;
118 			goto more;
119 			/* NOTREACHED */
120 		}
121 ^%%		{	/* lex/yacc rules delimiter */
122 			braces = 0;
123 			if (rules == NO) {
124 				rules = YES;
125 
126 				/* simulate a yylex() or yyparse() definition */
127 				(void) strcat(yytext, " /* ");
128 				first = strlen(yytext);
129 				if (lex == YES) {
130 					(void) strcat(yytext, "yylex");
131 				} else {
132 					/*
133 					 * yacc: yyparse implicitly calls yylex
134 					 */
135 					char *s = " yylex()";
136 					char *cp = s + strlen(s);
137 					while (--cp >= s) {
138 						unput(*cp);
139 					}
140 					(void) strcat(yytext, "yyparse");
141 				}
142 				last = strlen(yytext);
143 				(void) strcat(yytext, " */");
144 				yyleng = strlen(yytext);
145 				yymore();
146 				return (FCNDEF);
147 			} else {
148 				rules = NO;
149 				globalscope = YES;
150 				last = first;
151 				yymore();
152 				return (FCNEND);
153 			}
154 			/* NOTREACHED */
155 		}
156 <SDL>(PROCEDURE|STATE)[ \t]+({identifier}|\*)	{ /* SDL procedure or state */
157 			braces = 1;
158 			fcndef = YES;	/* treat as function definition */
159 			token = FCNDEF;
160 			globalscope = NO;
161 			goto findident;
162 			/* NOTREACHED */
163 		}
164 <SDL>(CALL|NEXTSTATE)[ \t]+({identifier}|\*)	{ /* SDL call or nextstate */
165 			token = FCNCALL;
166 			goto findident;	/* treat as function call */
167 			/* NOTREACHED */
168 		}
169 <SDL>END(PROCEDURE|STATE)[ \t]+({identifier}|\*)	{
170 			/* end of an SDL procedure or state */
171 			goto endstate;	/* treat as the end of a function */
172 			/* NOTREACHED */
173 		}
174 \{		{
175 			/* count unmatched left braces for fcn def detection */
176 			++braces;
177 
178 			/*
179 			 * mark an untagged enum/struct/union so its beginning
180 			 * can be found
181 			 */
182 			if (cesudeftoken) {
183 				last = first;
184 				savesymbol(cesudeftoken);
185 				cesudeftoken = '\0';
186 			}
187 			goto more;
188 			/* NOTREACHED */
189 		}
190 \#[ \t]*endif/.*[\n\r][ \t\n\r]*#[ \t]*if	{
191 			/*
192 			 * attempt to correct erroneous brace count caused by:
193 			 *
194 			 * #if ...
195 			 * 	... {
196 			 * #endif
197 			 * #if ...
198 			 * 	... {
199 			 * #endif
200 			 */
201 			/* the current #if must not have an #else or #elif */
202 			if (elseelif == YES) {
203 				goto endif;
204 			}
205 			psuedoelif = YES;
206 			goto more;
207 			/* NOTREACHED */
208 		}
209 \#[ \t]*ifn?(def)?	{ /* #if, #ifdef or #ifndef */
210 			elseelif = NO;
211 			if (psuedoelif == YES) {
212 				psuedoelif = NO;
213 				goto elif;
214 			}
215 			/*
216 			 * make sure there is room for the current brace count
217 			 */
218 			if (iflevel == miflevel) {
219 				miflevel += IFLEVELINC;
220 				maxifbraces = myrealloc(maxifbraces,
221 				    miflevel * sizeof (int));
222 				preifbraces = myrealloc(preifbraces,
223 				    miflevel * sizeof (int));
224 			}
225 			/* push the current brace count */
226 			preifbraces[iflevel] = braces;
227 			maxifbraces[iflevel++] = 0;
228 			goto more;
229 			/* NOTREACHED */
230 		}
231 \#[ \t]*el(se|if)	{ /* #elif or #else */
232 			elseelif = YES;
233 		elif:
234 			if (iflevel > 0) {
235 
236 				/* save the maximum brace count for this #if */
237 				if (braces > maxifbraces[iflevel]) {
238 					maxifbraces[iflevel - 1] = braces;
239 				}
240 				/* restore the brace count to before the #if */
241 				braces = preifbraces[iflevel - 1];
242 			}
243 			goto more;
244 			/* NOTREACHED */
245 		}
246 \#[ \t]*endif	{	/* #endif */
247 		endif:
248 			if (iflevel > 0) {
249 
250 				/* get the maximum brace count for this #if */
251 				if (braces < maxifbraces[--iflevel]) {
252 					braces = maxifbraces[iflevel];
253 				}
254 			}
255 			goto more;
256 			/* NOTREACHED */
257 		}
258 \}		{
259 			/* could be the last enum member initializer */
260 			if (braces == initializerbraces) {
261 				initializerbraces = -1;
262 				initializer = NO;
263 			}
264 			if (--braces <= 0) {
265 		endstate:
266 				braces = 0;
267 				classdef = NO;
268 			}
269 			/*
270 			 * if the end of an outermost enum/struct/union
271 			 * definition
272 			 */
273 			if (esudef == YES && braces == esubraces) {
274 				esudef = NO;
275 				esubraces = -1;
276 				last = first;
277 				yymore();
278 				return (ESUEND);
279 			}
280 			/* if the end of a function */
281 			if ((braces == 0 || braces == 1 && classdef == YES) &&
282 			    fcndef == YES) {
283 				fcndef = NO;
284 				globalscope = YES;
285 				last = first;
286 				yymore();
287 				return (FCNEND);
288 			}
289 			goto more;
290 			/* NOTREACHED */
291 		}
292 \(		{
293 			/*
294 			 * count unmatched left parentheses for function
295 			 * templates
296 			 */
297 			++parens;
298 			goto more;
299 			/* NOTREACHED */
300 		}
301 \)		{
302 			if (--parens <= 0) {
303 				parens = 0;
304 			}
305 			/* if the end of a function template */
306 			if (parens == templateparens) {
307 				templateparens = -1;
308 				template = NO;
309 			}
310 			goto more;
311 			/* NOTREACHED */
312 		}
313 =		{	/* if a global definition initializer */
314 			if ((globalscope == YES || localdef == YES) &&
315 			    notpp()) {
316 				initializerbraces = braces;
317 				initializer = YES;
318 			}
319 			goto more;
320 			/* NOTREACHED */
321 		}
322 :		{	/* if a structure field */
323 			/* note: a pr header has a colon in the date */
324 			if (esudef == YES && notpp()) {
325 				structfield = YES;
326 			}
327 			goto more;
328 			/* NOTREACHED */
329 		}
330 \,		{
331 			if (braces == initializerbraces) {
332 				initializerbraces = -1;
333 				initializer = NO;
334 			}
335 			structfield = NO;
336 			goto more;
337 			/* NOTREACHED */
338 		}
339 "##"		|	/* start of Ingres(TM) code line */
340 ;		{
341 			/* if not in an enum/struct/union declaration */
342 			if (esudef == NO) {
343 				externdec = NO;
344 				typedefdef = NO;
345 				localdef = NO;
346 			}
347 			structfield = NO;
348 			initializer = NO;
349 			oldtype = NO;
350 			goto more;
351 			/* NOTREACHED */
352 		}
353 \#[ \t]*define[ \t]+{identifier}	{
354 
355 			/* preprocessor macro or constant definition */
356 			ppdefine = YES;
357 			token = DEFINE;
358 			if (compress == YES) {
359 				/* compress the keyword */
360 				yytext[0] = '\7';
361 			}
362 		findident:
363 			first = yyleng - 1;
364 			while (isalnum(yytext[first]) || yytext[first] == '_') {
365 				--first;
366 			}
367 			++first;
368 			goto iflongline;
369 			/* NOTREACHED */
370 		}
371 class[ \t]+{identifier}[ \t\n\ra-zA-Z0-9_():]*\{	{
372 			/* class definition */
373 			classdef = YES;
374 			cesudeftoken = 'c';
375 			REJECT;
376 			/* NOTREACHED */
377 		}
378 (enum|struct|union)/([ \t\n\r]+{identifier})?[ \t\n\r]*\{	{
379 			/* enum/struct/union definition */
380 			esudef = YES;
381 			if (esubraces < 0) {
382 				/* if outermost enum/struct/union */
383 				esubraces = braces;
384 			}
385 			cesudeftoken = *(yytext + first);
386 			goto iflongline;
387 			/* NOTREACHED */
388 		}
389 {identifier}/[ \t]*\(([ \t\n\ra-zA-Z0-9_*&[\]=,.]*|\([ \ta-zA-Z0-9_*[\],]*\))*\)[ \t\n\r()]*[:a-zA-Z_#{]	{
390 
391 			/*
392 			 * warning: "if (...)" must not overflow yytext, so
393 			 * the content of function argument definitions is
394 			 * restricted, in particular parentheses are
395 			 * not allowed
396 			 */
397 
398 			if (asy) {
399 				/*
400 				 * In assembly files, if it looks like
401 				 * a definition, pass it down as one and we'll
402 				 * take care of it later.
403 				 */
404 				token = FCNDEF;
405 				goto iflongline;
406 			}
407 
408 			/* if a function definition */
409 			/*
410 			 * note: "#define a (b) {" and "#if defined(a)\n#"
411 			 * are not
412 			 */
413 			if (braces == 0 && notpp() && rules == NO ||
414 			    braces == 1 && classdef == YES) {
415 				fcndef = YES;
416 				token = FCNDEF;
417 				globalscope = NO;
418 				goto iflongline;
419 			}
420 			goto iffcncall;
421 			/* NOTREACHED */
422 		}
423 {identifier}/[ \t]*\(	{
424 			if (asy) {
425 				/*
426 				 * Macro calls can get here if they have
427 				 * arguments which contain %'s (i.e.,
428 				 * registers).
429 				 */
430 				token = FCNDEF;
431 				goto iflongline;
432 			}
433 
434 			/* if a function call */
435 		iffcncall:
436 			if ((fcndef == YES || ppdefine == YES ||
437 			    rules == YES) && externdec == NO &&
438 			    (localdef == NO || initializer == YES)) {
439 				token = FCNCALL;
440 				goto iflongline;
441 			}
442 			if (template == NO && typedefdef == NO) {
443 				templateparens = parens;
444 				template = YES;
445 			}
446 			token = IDENT;
447 			goto iflongline;
448 			/* NOTREACHED */
449 		}
450 (\+\+|--)[ \t]*{identifier}	{	/* prefix increment or decrement */
451 			token = ASSIGNMENT;
452 			goto findident;
453 			/* NOTREACHED */
454 		}
455 {identifier}/[ \t]*(\+\+|--)	{	/* postfix increment or decrement */
456 			token = ASSIGNMENT;
457 			goto iflongline;
458 			/* NOTREACHED */
459 		}
460 \*[ \t]*{identifier}/[ \t]*[^a-zA-Z0-9_(+-][^+-]	{
461 			/* indirect assignment or dcl */
462 			while (!isalnum(yytext[first]) &&
463 			    yytext[first] != '_') {
464 				++first;
465 			}
466 			goto ident;
467 			/* NOTREACHED */
468 		}
469 {identifier}/[ \t\n\r]*(=[^=]|[-+*/%&^|]=|<<=|>>=)	{ /* assignment */
470 			if ((fcndef == YES || ppdefine == YES ||
471 			    rules == YES) && localdef == NO) {
472 				token = ASSIGNMENT;
473 				goto iflongline;
474 			}
475 			goto ident;
476 			/* NOTREACHED */
477 		}
478 {identifier}/[* \t\n\r]+[a-zA-Z0-9_]	{	/* possible typedef name use */
479 			if (notpp() && esudef == NO && fcndef == YES &&
480 			    typedefdef == NO && parens == 0) {
481 				char	c, *s = yytext + first - 1;
482 
483 				while (--s >= yytext && (c = *s) != ';' &&
484 				    c != '{') {
485 					if (!isspace(c) && !isalpha(c)) {
486 						goto nottypedefname;
487 					}
488 				}
489 				typedefname = YES;
490 			}
491 		nottypedefname:
492 			/* skip the global/parameter/local tests */
493 			token = IDENT;
494 			goto iflongline;
495 			/* NOTREACHED */
496 		}
497 {identifier}	{
498 			struct	keystruct *p;
499 			char	*s;
500 
501 		ident:	token = IDENT;
502 			if (notpp() && externdec == NO &&
503 			    arraydimension == NO && initializer == NO) {
504 
505 				/* if an enum/struct/union member definition */
506 				if (esudef == YES) {
507 					if (structfield == NO) {
508 						token = MEMBERDEF;
509 					}
510 				} else if (typedefdef == YES && oldtype == NO) {
511 					/* if a typedef name */
512 					token = TYPEDEF;
513 				} else if (globalscope == YES &&
514 				    template == NO && oldtype == NO) {
515 					/* if a global definition */
516 					token = GLOBALDEF;
517 				} else if (fcndef == YES && braces == 0) {
518 					/* if a function parameter definition */
519 					token = PARAMETER;
520 				} else if (localdef == YES) {
521 					/* if a local definition */
522 					token = LOCALDEF;
523 				}
524 			}
525 		iflongline:
526 			/* if a long line */
527 			if (yyleng > STMTMAX) {
528 				int	c;
529 
530 				/* skip to the end of the line */
531 				warning("line too long");
532 				while ((c = input()) != LEXEOF) {
533 					if (c == '\n') {
534 						unput(c);
535 						break;
536 					}
537 				}
538 			}
539 			/* truncate a long symbol */
540 			if (yyleng - first > PATLEN) {
541 				warning("symbol too long");
542 				yyleng = first + PATLEN;
543 				yytext[yyleng] = '\0';
544 			}
545 
546 			yymore();
547 
548 			if (asy) {
549 				int t;
550 
551 				last = yyleng;
552 				t = do_assembly(token);
553 				if (t >= 0) {
554 					token = t;
555 					return (token);
556 				}
557 
558 				goto end;
559 			}
560 
561 			/* if a keyword */
562 			if ((p = lookup(yytext + first)) != NULL) {
563 				first = yyleng;
564 				s = p->text;
565 
566 				/* if an extern declaration */
567 				if (s == externtext) {
568 					externdec = YES;
569 				} else if (s == typedeftext) {
570 					/* if a typedef name definition */
571 					typedefdef = YES;
572 					oldtype = YES;
573 				} else if (p->type == DECL && fcndef == YES &&
574 				    typedefdef == NO && parens == 0) {
575 					/* if a local definition */
576 					localdef = YES;
577 				} else if (templateparens == parens &&
578 				    template == YES) {
579 					/*
580 					 * keyword doesn't start a function
581 					 * template
582 					 */
583 					templateparens = -1;
584 					template = NO;
585 				} else {
586 					/*
587 					 * next identifier after typedef was
588 					 * a keyword
589 					 */
590 					oldtype = NO;
591 				}
592 				typedefname = NO;
593 			} else {	/* identifier */
594 				last = yyleng;
595 
596 				/*
597 				 * if an enum/struct/union keyword preceded
598 				 * this ident.
599 				 */
600 				if (esudef == YES && cesudeftoken) {
601 					token = cesudeftoken;
602 					cesudeftoken = '\0';
603 				} else {
604 					oldtype = NO;
605 				}
606 				/* if a local definition using a typedef name */
607 				if (typedefname == YES) {
608 					localdef = YES;
609 				}
610 				typedefname = NO;
611 				return (token);
612 			}
613 
614 		end:
615 			;
616 		}
617 \[		{	/* array dimension (don't worry about subscripts) */
618 			arraydimension = YES;
619 			goto more;
620 			/* NOTREACHED */
621 		}
622 \]		{
623 			arraydimension = NO;
624 			goto more;
625 			/* NOTREACHED */
626 		}
627 \\\n		{	/* preprocessor statement is continued on next line */
628 			goto eol;
629 			/* NOTREACHED */
630 		}
631 \n		{	/* end of the line */
632 			if (ppdefine == YES) {	/* end of a #define */
633 				ppdefine = NO;
634 				(void) yyless(yyleng - 1);	/* rescan \n */
635 				last = first;
636 				yymore();
637 				return (DEFINEEND);
638 			}
639 			/*
640 			 * skip the first 8 columns of a breakpoint listing
641 			 * line and skip the file path in the page header
642 			 */
643 			if (bplisting == YES) {
644 				int	c, i;
645 
646 				switch (input()) {
647 				/* tab and EOF just fall through */
648 				case ' ':	/* breakpoint number line */
649 				case '[':
650 					for (i = 1; i < 8 && input() != LEXEOF;
651 					    ++i) {
652 					    /*EMPTY*/
653 					}
654 					break;
655 				case '.':	/* header line */
656 				case '/':
657 					/* skip to the end of the line */
658 					while ((c = input()) != LEXEOF) {
659 						if (c == '\n') {
660 							unput(c);
661 							break;
662 						}
663 					}
664 					break;
665 				case '\n':	/* empty line */
666 					unput('\n');
667 					break;
668 				}
669 			}
670 		eol:
671 			++yylineno;
672 			first = 0;
673 			last = 0;
674 			if (symbols > 0) {
675 				return (NEWLINE);
676 			}
677 			lineno = yylineno;
678 		}
679 \'		{	/* character constant */
680 			if (sdl == NO) {
681 				multicharconstant('\'');
682 			}
683 			goto more;
684 			/* NOTREACHED */
685 		}
686 \"		{	/* string constant */
687 			multicharconstant('"');
688 			goto more;
689 			/* NOTREACHED */
690 		}
691 ^[ \t\f\b]+	{	/* don't save leading white space */
692 		}
693 \#[# \t]*include[ \t]*["<][^"> \t\n\r]+	{ /* #include or Ingres ##include */
694 			char	*s;
695 
696 			s = strpbrk(yytext, "\"<");
697 			incfile(s + 1, *s);
698 			first = s - yytext;
699 			last = yyleng;
700 			if (compress == YES) {
701 				/* compress the keyword */
702 				yytext[0] = '\1';
703 			}
704 			/*
705 			 * avoid multicharconstant call triggered by trailing
706 			 * ", which puts a trailing comment in the database
707 			 */
708 			if (*s == '"') {
709 				int	c;
710 
711 				while ((c = input()) != LEXEOF) {
712 					if (c == '"') {
713 						yytext[yyleng] = '"';
714 						yytext[++yyleng] = '\0';
715 						break;
716 					}
717 					/* the trailing '"' may be missing */
718 					if (c == '\n') {
719 						unput('\n');
720 						break;
721 					}
722 				}
723 			}
724 			yymore();
725 			return (INCLUDE);
726 			/* NOTREACHED */
727 		}
728 \#[ \t]*pragma[ \t]+weak[ \t]+{identifier} {
729 			ppdefine = YES;
730 			token = DEFINE;
731 			goto findident;
732 
733 			/*NOTREACHED*/
734 		}
735 \#[ \t]*{identifier}	|	/* preprocessor keyword */
736 {number}	|	/* number */
737 .		{	/* punctuation and operators */
738 		more:	first = yyleng;
739 			yymore();
740 		}
741 %%
742 
743 void
744 initscanner(char *srcfile)
745 {
746 	char	*s;
747 
748 	if (maxifbraces == NULL) {
749 		maxifbraces = mymalloc(miflevel * sizeof (int));
750 		preifbraces = mymalloc(miflevel * sizeof (int));
751 	}
752 	first = 0;		/* buffer index for first char of symbol */
753 	last = 0;		/* buffer index for last char of symbol */
754 	lineno = 1;		/* symbol line number */
755 	yylineno = 1;		/* input line number */
756 	arraydimension = NO;	/* inside array dimension declaration */
757 	bplisting = NO;		/* breakpoint listing */
758 	braces = 0;		/* unmatched left brace count */
759 	cesudeftoken = '\0';	/* class/enum/struct/union definition */
760 	classdef = NO;		/* c++ class definition */
761 	elseelif = NO;		/* #else or #elif found */
762 	esudef = NO;		/* enum/struct/union definition */
763 	esubraces = -1;		/* outermost enum/struct/union brace count */
764 	externdec = NO;		/* extern declaration */
765 	fcndef = NO;		/* function definition */
766 	globalscope = YES;	/* file global scope (outside functions) */
767 	iflevel = 0;		/* #if nesting level */
768 	initializer = NO;	/* data initializer */
769 	initializerbraces = -1;	/* data initializer outer brace count */
770 	lex = NO;		/* lex file */
771 	localdef = NO;		/* function/block local definition */
772 	parens = 0;		/* unmatched left parenthesis count */
773 	ppdefine = NO;		/* preprocessor define statement */
774 	psuedoelif = NO;	/* psuedo-#elif */
775 	oldtype = NO;		/* next identifier is an old type */
776 	rules = NO;		/* lex/yacc rules */
777 	sdl = NO;		/* SDL file */
778 	structfield = NO;	/* structure field declaration */
779 	template = NO;		/* function template */
780 	templateparens = -1;	/* function template outer parentheses count */
781 	typedefdef = NO;	/* typedef name definition */
782 	typedefname = NO;	/* typedef name use */
783 	asy = NO;		/* assembly file */
784 	BEGIN 0;
785 
786 	/* if this is not a C file */
787 	if ((s = strrchr(srcfile, '.')) != NULL) {
788 		switch (*++s) {	/* this switch saves time on C files */
789 		case 'b':
790 			if (strcmp(s, "bp") == 0) {	/* breakpoint listing */
791 				bplisting = YES;
792 			}
793 			break;
794 		case 'l':
795 			if (strcmp(s, "l") == 0) {	/* lex */
796 				lex = YES;
797 				globalscope = NO;
798 			}
799 			break;
800 		case 'p':
801 		case 's':
802 			if (strcmp(s, "pr") == 0 ||
803 			    strcmp(s, "sd") == 0) {	/* SDL */
804 				sdl = YES;
805 				BEGIN SDL;
806 			} else if (strcmp(s, "s") == 0) {
807 				asy = YES;
808 			}
809 			break;
810 		case 'y':
811 			if (strcmp(s, "y") == 0) {	/* yacc */
812 				globalscope = NO;
813 			}
814 			break;
815 		}
816 	}
817 }
818 
819 int
820 comment(void)
821 {
822 	int	c, lastc;
823 
824 	do {
825 		if ((c = getc(yyin)) == '*') {	/* C comment */
826 			lastc = '\0';
827 			while ((c = getc(yyin)) != EOF &&
828 			    (c != '/' || lastc != '*')) { /* fewer '/'s */
829 				if (c == '\n') {
830 					++yylineno;
831 				}
832 				lastc = c;
833 			}
834 			/* return a blank for Reiser cpp token concatenation */
835 			if ((c = getc(yyin)) == '_' || isalnum(c)) {
836 				(void) ungetc(c, yyin);
837 				c = ' ';
838 				break;
839 			}
840 		} else if (c == '/') {		/* C++ comment */
841 			while ((c = getc(yyin)) != EOF && c != '\n') {
842 				/*EMPTY*/
843 			}
844 			break;
845 		} else {	/* not a comment */
846 			(void) ungetc(c, yyin);
847 			c = '/';
848 			break;
849 		}
850 
851 		/* there may be an immediately following comment */
852 	} while (c == '/');
853 	return (c);
854 }
855 
856 void
857 multicharconstant(char terminator)
858 {
859 	char	c;
860 
861 	/* scan until the terminator is found */
862 	while ((c = yytext[yyleng++] = noncommentinput()) != terminator) {
863 		switch (c) {
864 		case '\\':	/* escape character */
865 			if ((yytext[yyleng++] = noncommentinput()) == '\n') {
866 				++yylineno;
867 			}
868 			break;
869 		case '\t':	/* tab character */
870 
871 			/* if not a lex program, continue */
872 			if (lex == NO) {
873 				break;
874 			}
875 			/* FALLTHROUGH */
876 
877 		case '\n':	/* illegal character */
878 
879 			/*
880 			 * assume the terminator is missing, so put
881 			 * this character back
882 			 */
883 			unput(c);
884 			yytext[--yyleng] = '\0';
885 			/* FALLTHROUGH */
886 
887 		case LEXEOF:	/* end of file */
888 			return;
889 
890 		default:
891 			/* change a control character to a blank */
892 			if (!isprint(c)) {
893 				yytext[yyleng - 1] = ' ';
894 			}
895 		}
896 		/* if this token will overflow the line buffer */
897 		/* note: '\\' may cause yyleng to be > STMTMAX */
898 		if (yyleng >= STMTMAX) {
899 
900 			/* truncate the token */
901 			while ((c = noncommentinput()) != LEXEOF) {
902 				if (c == terminator) {
903 					unput(c);
904 					break;
905 				} else if (c == '\n') {
906 					++yylineno;
907 				}
908 			}
909 		}
910 	}
911 	yytext[yyleng] = '\0';
912 }
913 
914 /*
915  * Returns true if the beginning of str matches ident, and the next character
916  * is not alphanumeric and not an underscore.
917  */
918 int
919 identcmp(const char *str, const char *ident)
920 {
921 	int n = strlen(ident);
922 
923 	return (strncmp(str, ident, n) == 0 && !isalnum(str[n]) &&
924 	    str[n] != '_');
925 }
926 
927 /*
928  * Here we want to
929  *   - Make *ENTRY*() macro invocations into function definitions
930  *   - Make SET_SIZE() macro calls into function ends
931  *   - Make "call sym" instructions into function calls
932  *   - Eliminate C function definitions (since they are for lint, and we want
933  *     only one definition for each function)
934  */
935 int
936 do_assembly(int token)
937 {
938 	/* Handle C keywords? */
939 
940 	switch (token) {
941 
942 	case FCNDEF:
943 		/*
944 		 * We have a symbol that looks like a C function definition or
945 		 * call.  (Note: That can include assembly instructions with
946 		 * the right parentheses.)  We want to convert assembly macro
947 		 * invocations to function calls, and ignore everything else.
948 		 * Since we technically can't tell the difference, we'll use
949 		 * an all-caps heuristic.
950 		 *
951 		 * ... except for SET_SIZE macros, since they will precede
952 		 * FUNCEND tokens, which will break code in find.c which
953 		 * assumes that FUNCEND tokens occur at the beginning of
954 		 * lines.
955 		 */
956 		if (isupper(yytext[first]) && strcmp(yytext, "SET_SIZE") != 0)
957 			return (FCNCALL);
958 
959 		/* Don't return a token. */
960 		return (-1);
961 
962 	case GLOBALDEF:
963 	case IDENT:
964 		/* Macro arguments come down as global variable definitions. */
965 
966 		if (identcmp(yytext, "ENTRY") ||
967 		    identcmp(yytext, "ENTRY2") ||
968 		    identcmp(yytext, "ENTRY_NP") ||
969 		    identcmp(yytext, "ENTRY_NP2") ||
970 		    identcmp(yytext, "RTENTRY") ||
971 		    identcmp(yytext, "ALTENTRY")) {
972 			/*
973 			 * Identifiers on lines beginning with *ENTRY* macros
974 			 * are actually function definitions.
975 			 */
976 			return (FCNDEF);
977 		}
978 
979 		if (identcmp(yytext, "SET_SIZE")) {
980 			/*
981 			 * Identifiers on lines beginning with SET_SIZE are
982 			 * actually function ends.
983 			 */
984 			return (FCNEND);
985 		}
986 
987 		if (first != 0 && identcmp(yytext, "call")) {
988 			/*
989 			 * Make this a function call.  We exclude first == 0,
990 			 * because that happens when we're looking at "call"
991 			 * itself.  (Then we'd get function calls to "call"
992 			 * everywhere.)
993 			 */
994 			return (FCNCALL);
995 		}
996 
997 		/* FALLTHROUGH */
998 
999 	default:
1000 		/* Default to normal behavior. */
1001 		return (token);
1002 	}
1003 }
1004