xref: /titanic_51/usr/src/tools/cscope-fast/scanner.l (revision ba2e4443695ee6a6f420a35cd4fc3d3346d22932)
1 %{
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License, Version 1.0 only
7  * (the "License").  You may not use this file except in compliance
8  * with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  *
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1988 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  *	cscope - interactive C symbol cross-reference
34  *
35  *
36  *	C symbol scanner
37  */
38 #ident	"@(#)scanner.l	1.2	93/06/07 SMI"
39 #include "global.h"
40 
41 /* the line counting has been moved from character reading for speed */
42 /* comments are discarded */
43 #undef	input
44 #define	input() \
45 	((yytchar = (yytchar = yysptr > yysbuf ? \
46 	    *--yysptr : getc(yyin)) == '/' ? comment() : yytchar) == \
47 	    EOF ? 0 : toascii(yytchar))
48 #define	noncommentinput() \
49 	((yytchar = yysptr > yysbuf ? *--yysptr : getc(yyin)) == \
50 	    EOF ? 0 : yytchar)
51 #undef	unput
52 #define	unput(c) (*yysptr++ = (c))
53 
54 /* not a preprocessor line (allow Ingres(TM) "## char var;" lines) */
55 #define	notpp()	(ppdefine == NO && (*yytext != '#' || yytext[1] == '#'))
56 
57 #define	IFLEVELINC	5	/* #if nesting level size increment */
58 
59 /* keyword text for fast testing of keywords in the scanner */
60 extern	char	externtext[];
61 extern	char	typedeftext[];
62 
63 int	first;	/* buffer index for first char of symbol */
64 int	last;	/* buffer index for last char of symbol */
65 int	lineno;	/* symbol line number */
66 
67 static	BOOL	arraydimension;		/* inside array dimension declaration */
68 static	BOOL	bplisting;		/* breakpoint listing */
69 static	int	braces;			/* unmatched left brace count */
70 static	int	cesudeftoken;		/* class/enum/struct/union definition */
71 static	BOOL	classdef;		/* c++ class definition */
72 static	BOOL	elseelif;		/* #else or #elif found */
73 static	BOOL	esudef;			/* enum/struct/union definition */
74 static	int	esubraces;		/* outermost enum/struct/union */
75 					/* brace count */
76 static	BOOL	externdec;		/* extern declaration */
77 static	BOOL	fcndef;			/* function definition */
78 static	BOOL	globalscope;		/* file global scope */
79 					/* (outside functions) */
80 static	int	iflevel;		/* #if nesting level */
81 static	BOOL	initializer;		/* data initializer */
82 static	int	initializerbraces;	/* data initializer outer brace count */
83 static	BOOL	lex;			/* lex file */
84 static	BOOL	localdef;		/* function/block local definition */
85 static	int	miflevel = IFLEVELINC;	/* maximum #if nesting level */
86 static	int	*maxifbraces;		/* maximum brace count within #if */
87 static	int	*preifbraces;		/* brace count before #if */
88 static	int	parens;			/* unmatched left parenthesis count */
89 static	BOOL	ppdefine;		/* preprocessor define statement */
90 static	BOOL	psuedoelif;		/* psuedo-#elif */
91 static	BOOL	oldtype;		/* next identifier is an old type */
92 static	BOOL	rules;			/* lex/yacc rules */
93 static	BOOL	sdl;			/* SDL file */
94 static	BOOL	structfield;		/* structure field declaration */
95 static	BOOL	template;		/* function template */
96 static	int	templateparens;	/* function template outer parentheses count */
97 static	BOOL	typedefdef;	/* typedef name definition */
98 static	BOOL	typedefname;	/* typedef name use */
99 static	int	token;		/* token found */
100 
101 static	BOOL	asy;			/* assembly file */
102 
103 void multicharconstant(char terminator);
104 int do_assembly(int token);
105 %}
106 identifier	[a-zA-Z_][a-zA-Z_0-9]*
107 number		\.?[0-9][.0-9a-fA-FlLuUxX]*
108 %start SDL
109 %a 6000
110 %o 11000
111 %p 3000
112 %%
113 %\{		{	/* lex/yacc C declarations/definitions */
114 			globalscope = YES;
115 			goto more;
116 			/* NOTREACHED */
117 		}
118 %\}		{
119 			globalscope = NO;
120 			goto more;
121 			/* NOTREACHED */
122 		}
123 ^%%		{	/* lex/yacc rules delimiter */
124 			braces = 0;
125 			if (rules == NO) {
126 				rules = YES;
127 
128 				/* simulate a yylex() or yyparse() definition */
129 				(void) strcat(yytext, " /* ");
130 				first = strlen(yytext);
131 				if (lex == YES) {
132 					(void) strcat(yytext, "yylex");
133 				} else {
134 					/*
135 					 * yacc: yyparse implicitly calls yylex
136 					 */
137 					char *s = " yylex()";
138 					char *cp = s + strlen(s);
139 					while (--cp >= s) {
140 						unput(*cp);
141 					}
142 					(void) strcat(yytext, "yyparse");
143 				}
144 				last = strlen(yytext);
145 				(void) strcat(yytext, " */");
146 				yyleng = strlen(yytext);
147 				yymore();
148 				return (FCNDEF);
149 			} else {
150 				rules = NO;
151 				globalscope = YES;
152 				last = first;
153 				yymore();
154 				return (FCNEND);
155 			}
156 			/* NOTREACHED */
157 		}
158 <SDL>(PROCEDURE|STATE)[ \t]+({identifier}|\*)	{ /* SDL procedure or state */
159 			braces = 1;
160 			fcndef = YES;	/* treat as function definition */
161 			token = FCNDEF;
162 			globalscope = NO;
163 			goto findident;
164 			/* NOTREACHED */
165 		}
166 <SDL>(CALL|NEXTSTATE)[ \t]+({identifier}|\*)	{ /* SDL call or nextstate */
167 			token = FCNCALL;
168 			goto findident;	/* treat as function call */
169 			/* NOTREACHED */
170 		}
171 <SDL>END(PROCEDURE|STATE)[ \t]+({identifier}|\*)	{
172 			/* end of an SDL procedure or state */
173 			goto endstate;	/* treat as the end of a function */
174 			/* NOTREACHED */
175 		}
176 \{		{
177 			/* count unmatched left braces for fcn def detection */
178 			++braces;
179 
180 			/*
181 			 * mark an untagged enum/struct/union so its beginning
182 			 * can be found
183 			 */
184 			if (cesudeftoken) {
185 				last = first;
186 				savesymbol(cesudeftoken);
187 				cesudeftoken = '\0';
188 			}
189 			goto more;
190 			/* NOTREACHED */
191 		}
192 \#[ \t]*endif/.*[\n\r][ \t\n\r]*#[ \t]*if	{
193 			/*
194 			 * attempt to correct erroneous brace count caused by:
195 			 *
196 			 * #if ...
197 			 * 	... {
198 			 * #endif
199 			 * #if ...
200 			 * 	... {
201 			 * #endif
202 			 */
203 			/* the current #if must not have an #else or #elif */
204 			if (elseelif == YES) {
205 				goto endif;
206 			}
207 			psuedoelif = YES;
208 			goto more;
209 			/* NOTREACHED */
210 		}
211 \#[ \t]*ifn?(def)?	{ /* #if, #ifdef or #ifndef */
212 			elseelif = NO;
213 			if (psuedoelif == YES) {
214 				psuedoelif = NO;
215 				goto elif;
216 			}
217 			/*
218 			 * make sure there is room for the current brace count
219 			 */
220 			if (iflevel == miflevel) {
221 				miflevel += IFLEVELINC;
222 				maxifbraces = myrealloc(maxifbraces,
223 				    miflevel * sizeof (int));
224 				preifbraces = myrealloc(preifbraces,
225 				    miflevel * sizeof (int));
226 			}
227 			/* push the current brace count */
228 			preifbraces[iflevel] = braces;
229 			maxifbraces[iflevel++] = 0;
230 			goto more;
231 			/* NOTREACHED */
232 		}
233 \#[ \t]*el(se|if)	{ /* #elif or #else */
234 			elseelif = YES;
235 		elif:
236 			if (iflevel > 0) {
237 
238 				/* save the maximum brace count for this #if */
239 				if (braces > maxifbraces[iflevel]) {
240 					maxifbraces[iflevel - 1] = braces;
241 				}
242 				/* restore the brace count to before the #if */
243 				braces = preifbraces[iflevel - 1];
244 			}
245 			goto more;
246 			/* NOTREACHED */
247 		}
248 \#[ \t]*endif	{	/* #endif */
249 		endif:
250 			if (iflevel > 0) {
251 
252 				/* get the maximum brace count for this #if */
253 				if (braces < maxifbraces[--iflevel]) {
254 					braces = maxifbraces[iflevel];
255 				}
256 			}
257 			goto more;
258 			/* NOTREACHED */
259 		}
260 \}		{
261 			/* could be the last enum member initializer */
262 			if (braces == initializerbraces) {
263 				initializerbraces = -1;
264 				initializer = NO;
265 			}
266 			if (--braces <= 0) {
267 		endstate:
268 				braces = 0;
269 				classdef = NO;
270 			}
271 			/*
272 			 * if the end of an outermost enum/struct/union
273 			 * definition
274 			 */
275 			if (esudef == YES && braces == esubraces) {
276 				esudef = NO;
277 				esubraces = -1;
278 				last = first;
279 				yymore();
280 				return (ESUEND);
281 			}
282 			/* if the end of a function */
283 			if ((braces == 0 || braces == 1 && classdef == YES) &&
284 			    fcndef == YES) {
285 				fcndef = NO;
286 				globalscope = YES;
287 				last = first;
288 				yymore();
289 				return (FCNEND);
290 			}
291 			goto more;
292 			/* NOTREACHED */
293 		}
294 \(		{
295 			/*
296 			 * count unmatched left parentheses for function
297 			 * templates
298 			 */
299 			++parens;
300 			goto more;
301 			/* NOTREACHED */
302 		}
303 \)		{
304 			if (--parens <= 0) {
305 				parens = 0;
306 			}
307 			/* if the end of a function template */
308 			if (parens == templateparens) {
309 				templateparens = -1;
310 				template = NO;
311 			}
312 			goto more;
313 			/* NOTREACHED */
314 		}
315 =		{	/* if a global definition initializer */
316 			if ((globalscope == YES || localdef == YES) &&
317 			    notpp()) {
318 				initializerbraces = braces;
319 				initializer = YES;
320 			}
321 			goto more;
322 			/* NOTREACHED */
323 		}
324 :		{	/* if a structure field */
325 			/* note: a pr header has a colon in the date */
326 			if (esudef == YES && notpp()) {
327 				structfield = YES;
328 			}
329 			goto more;
330 			/* NOTREACHED */
331 		}
332 \,		{
333 			if (braces == initializerbraces) {
334 				initializerbraces = -1;
335 				initializer = NO;
336 			}
337 			structfield = NO;
338 			goto more;
339 			/* NOTREACHED */
340 		}
341 "##"		|	/* start of Ingres(TM) code line */
342 ;		{
343 			/* if not in an enum/struct/union declaration */
344 			if (esudef == NO) {
345 				externdec = NO;
346 				typedefdef = NO;
347 				localdef = NO;
348 			}
349 			structfield = NO;
350 			initializer = NO;
351 			oldtype = NO;
352 			goto more;
353 			/* NOTREACHED */
354 		}
355 \#[ \t]*define[ \t]+{identifier}	{
356 
357 			/* preprocessor macro or constant definition */
358 			ppdefine = YES;
359 			token = DEFINE;
360 			if (compress == YES) {
361 				/* compress the keyword */
362 				yytext[0] = '\7';
363 			}
364 		findident:
365 			first = yyleng - 1;
366 			while (isalnum(yytext[first]) || yytext[first] == '_') {
367 				--first;
368 			}
369 			++first;
370 			goto iflongline;
371 			/* NOTREACHED */
372 		}
373 class[ \t]+{identifier}[ \t\n\ra-zA-Z0-9_():]*\{	{
374 			/* class definition */
375 			classdef = YES;
376 			cesudeftoken = 'c';
377 			REJECT;
378 			/* NOTREACHED */
379 		}
380 (enum|struct|union)/([ \t\n\r]+{identifier})?[ \t\n\r]*\{	{
381 			/* enum/struct/union definition */
382 			esudef = YES;
383 			if (esubraces < 0) {
384 				/* if outermost enum/struct/union */
385 				esubraces = braces;
386 			}
387 			cesudeftoken = *(yytext + first);
388 			goto iflongline;
389 			/* NOTREACHED */
390 		}
391 {identifier}/[ \t]*\(([ \t\n\ra-zA-Z0-9_*&[\]=,.]*|\([ \ta-zA-Z0-9_*[\],]*\))*\)[ \t\n\r()]*[:a-zA-Z_#{]	{
392 
393 			/*
394 			 * warning: "if (...)" must not overflow yytext, so
395 			 * the content of function argument definitions is
396 			 * restricted, in particular parentheses are
397 			 * not allowed
398 			 */
399 
400 			if (asy) {
401 				/*
402 				 * In assembly files, if it looks like
403 				 * a definition, pass it down as one and we'll
404 				 * take care of it later.
405 				 */
406 				token = FCNDEF;
407 				goto iflongline;
408 			}
409 
410 			/* if a function definition */
411 			/*
412 			 * note: "#define a (b) {" and "#if defined(a)\n#"
413 			 * are not
414 			 */
415 			if (braces == 0 && notpp() && rules == NO ||
416 			    braces == 1 && classdef == YES) {
417 				fcndef = YES;
418 				token = FCNDEF;
419 				globalscope = NO;
420 				goto iflongline;
421 			}
422 			goto iffcncall;
423 			/* NOTREACHED */
424 		}
425 {identifier}/[ \t]*\(	{
426 			if (asy) {
427 				/*
428 				 * Macro calls can get here if they have
429 				 * arguments which contain %'s (i.e.,
430 				 * registers).
431 				 */
432 				token = FCNDEF;
433 				goto iflongline;
434 			}
435 
436 			/* if a function call */
437 		iffcncall:
438 			if ((fcndef == YES || ppdefine == YES ||
439 			    rules == YES) && externdec == NO &&
440 			    (localdef == NO || initializer == YES)) {
441 				token = FCNCALL;
442 				goto iflongline;
443 			}
444 			if (template == NO && typedefdef == NO) {
445 				templateparens = parens;
446 				template = YES;
447 			}
448 			token = IDENT;
449 			goto iflongline;
450 			/* NOTREACHED */
451 		}
452 (\+\+|--)[ \t]*{identifier}	{	/* prefix increment or decrement */
453 			token = ASSIGNMENT;
454 			goto findident;
455 			/* NOTREACHED */
456 		}
457 {identifier}/[ \t]*(\+\+|--)	{	/* postfix increment or decrement */
458 			token = ASSIGNMENT;
459 			goto iflongline;
460 			/* NOTREACHED */
461 		}
462 \*[ \t]*{identifier}/[ \t]*[^a-zA-Z0-9_(+-][^+-]	{
463 			/* indirect assignment or dcl */
464 			while (!isalnum(yytext[first]) &&
465 			    yytext[first] != '_') {
466 				++first;
467 			}
468 			goto ident;
469 			/* NOTREACHED */
470 		}
471 {identifier}/[ \t\n\r]*(=[^=]|[-+*/%&^|]=|<<=|>>=)	{ /* assignment */
472 			if ((fcndef == YES || ppdefine == YES ||
473 			    rules == YES) && localdef == NO) {
474 				token = ASSIGNMENT;
475 				goto iflongline;
476 			}
477 			goto ident;
478 			/* NOTREACHED */
479 		}
480 {identifier}/[* \t\n\r]+[a-zA-Z0-9_]	{	/* possible typedef name use */
481 			if (notpp() && esudef == NO && fcndef == YES &&
482 			    typedefdef == NO && parens == 0) {
483 				char	c, *s = yytext + first - 1;
484 
485 				while (--s >= yytext && (c = *s) != ';' &&
486 				    c != '{') {
487 					if (!isspace(c) && !isalpha(c)) {
488 						goto nottypedefname;
489 					}
490 				}
491 				typedefname = YES;
492 			}
493 		nottypedefname:
494 			/* skip the global/parameter/local tests */
495 			token = IDENT;
496 			goto iflongline;
497 			/* NOTREACHED */
498 		}
499 {identifier}	{
500 			struct	keystruct *p;
501 			char	*s;
502 
503 		ident:	token = IDENT;
504 			if (notpp() && externdec == NO &&
505 			    arraydimension == NO && initializer == NO) {
506 
507 				/* if an enum/struct/union member definition */
508 				if (esudef == YES) {
509 					if (structfield == NO) {
510 						token = MEMBERDEF;
511 					}
512 				} else if (typedefdef == YES && oldtype == NO) {
513 					/* if a typedef name */
514 					token = TYPEDEF;
515 				} else if (globalscope == YES &&
516 				    template == NO && oldtype == NO) {
517 					/* if a global definition */
518 					token = GLOBALDEF;
519 				} else if (fcndef == YES && braces == 0) {
520 					/* if a function parameter definition */
521 					token = PARAMETER;
522 				} else if (localdef == YES) {
523 					/* if a local definition */
524 					token = LOCALDEF;
525 				}
526 			}
527 		iflongline:
528 			/* if a long line */
529 			if (yyleng > STMTMAX) {
530 				int	c;
531 
532 				/* skip to the end of the line */
533 				warning("line too long");
534 				while ((c = input()) != LEXEOF) {
535 					if (c == '\n') {
536 						unput(c);
537 						break;
538 					}
539 				}
540 			}
541 			/* truncate a long symbol */
542 			if (yyleng - first > PATLEN) {
543 				warning("symbol too long");
544 				yyleng = first + PATLEN;
545 				yytext[yyleng] = '\0';
546 			}
547 
548 			yymore();
549 
550 			if (asy) {
551 				int t;
552 
553 				last = yyleng;
554 				t = do_assembly(token);
555 				if (t >= 0) {
556 					token = t;
557 					return (token);
558 				}
559 
560 				goto end;
561 			}
562 
563 			/* if a keyword */
564 			if ((p = lookup(yytext + first)) != NULL) {
565 				first = yyleng;
566 				s = p->text;
567 
568 				/* if an extern declaration */
569 				if (s == externtext) {
570 					externdec = YES;
571 				} else if (s == typedeftext) {
572 					/* if a typedef name definition */
573 					typedefdef = YES;
574 					oldtype = YES;
575 				} else if (p->type == DECL && fcndef == YES &&
576 				    typedefdef == NO && parens == 0) {
577 					/* if a local definition */
578 					localdef = YES;
579 				} else if (templateparens == parens &&
580 				    template == YES) {
581 					/*
582 					 * keyword doesn't start a function
583 					 * template
584 					 */
585 					templateparens = -1;
586 					template = NO;
587 				} else {
588 					/*
589 					 * next identifier after typedef was
590 					 * a keyword
591 					 */
592 					oldtype = NO;
593 				}
594 				typedefname = NO;
595 			} else {	/* identifier */
596 				last = yyleng;
597 
598 				/*
599 				 * if an enum/struct/union keyword preceded
600 				 * this ident.
601 				 */
602 				if (esudef == YES && cesudeftoken) {
603 					token = cesudeftoken;
604 					cesudeftoken = '\0';
605 				} else {
606 					oldtype = NO;
607 				}
608 				/* if a local definition using a typedef name */
609 				if (typedefname == YES) {
610 					localdef = YES;
611 				}
612 				typedefname = NO;
613 				return (token);
614 			}
615 
616 		end:
617 			;
618 		}
619 \[		{	/* array dimension (don't worry about subscripts) */
620 			arraydimension = YES;
621 			goto more;
622 			/* NOTREACHED */
623 		}
624 \]		{
625 			arraydimension = NO;
626 			goto more;
627 			/* NOTREACHED */
628 		}
629 \\\n		{	/* preprocessor statement is continued on next line */
630 			goto eol;
631 			/* NOTREACHED */
632 		}
633 \n		{	/* end of the line */
634 			if (ppdefine == YES) {	/* end of a #define */
635 				ppdefine = NO;
636 				(void) yyless(yyleng - 1);	/* rescan \n */
637 				last = first;
638 				yymore();
639 				return (DEFINEEND);
640 			}
641 			/*
642 			 * skip the first 8 columns of a breakpoint listing
643 			 * line and skip the file path in the page header
644 			 */
645 			if (bplisting == YES) {
646 				int	c, i;
647 
648 				switch (input()) {
649 				/* tab and EOF just fall through */
650 				case ' ':	/* breakpoint number line */
651 				case '[':
652 					for (i = 1; i < 8 && input() != LEXEOF;
653 					    ++i) {
654 					    /*EMPTY*/
655 					}
656 					break;
657 				case '.':	/* header line */
658 				case '/':
659 					/* skip to the end of the line */
660 					while ((c = input()) != LEXEOF) {
661 						if (c == '\n') {
662 							unput(c);
663 							break;
664 						}
665 					}
666 					break;
667 				case '\n':	/* empty line */
668 					unput('\n');
669 					break;
670 				}
671 			}
672 		eol:
673 			++yylineno;
674 			first = 0;
675 			last = 0;
676 			if (symbols > 0) {
677 				return (NEWLINE);
678 			}
679 			lineno = yylineno;
680 		}
681 \'		{	/* character constant */
682 			if (sdl == NO) {
683 				multicharconstant('\'');
684 			}
685 			goto more;
686 			/* NOTREACHED */
687 		}
688 \"		{	/* string constant */
689 			multicharconstant('"');
690 			goto more;
691 			/* NOTREACHED */
692 		}
693 ^[ \t\f\b]+	{	/* don't save leading white space */
694 		}
695 \#[# \t]*include[ \t]*["<][^"> \t\n\r]+	{ /* #include or Ingres ##include */
696 			char	*s;
697 
698 			s = strpbrk(yytext, "\"<");
699 			incfile(s + 1, *s);
700 			first = s - yytext;
701 			last = yyleng;
702 			if (compress == YES) {
703 				/* compress the keyword */
704 				yytext[0] = '\1';
705 			}
706 			/*
707 			 * avoid multicharconstant call triggered by trailing
708 			 * ", which puts a trailing comment in the database
709 			 */
710 			if (*s == '"') {
711 				int	c;
712 
713 				while ((c = input()) != LEXEOF) {
714 					if (c == '"') {
715 						yytext[yyleng] = '"';
716 						yytext[++yyleng] = '\0';
717 						break;
718 					}
719 					/* the trailing '"' may be missing */
720 					if (c == '\n') {
721 						unput('\n');
722 						break;
723 					}
724 				}
725 			}
726 			yymore();
727 			return (INCLUDE);
728 			/* NOTREACHED */
729 		}
730 \#[ \t]*pragma[ \t]+weak[ \t]+{identifier} {
731 			ppdefine = YES;
732 			token = DEFINE;
733 			goto findident;
734 
735 			/*NOTREACHED*/
736 		}
737 \#[ \t]*{identifier}	|	/* preprocessor keyword */
738 {number}	|	/* number */
739 .		{	/* punctuation and operators */
740 		more:	first = yyleng;
741 			yymore();
742 		}
743 %%
744 
745 void
746 initscanner(char *srcfile)
747 {
748 	char	*s;
749 
750 	if (maxifbraces == NULL) {
751 		maxifbraces = mymalloc(miflevel * sizeof (int));
752 		preifbraces = mymalloc(miflevel * sizeof (int));
753 	}
754 	first = 0;		/* buffer index for first char of symbol */
755 	last = 0;		/* buffer index for last char of symbol */
756 	lineno = 1;		/* symbol line number */
757 	yylineno = 1;		/* input line number */
758 	arraydimension = NO;	/* inside array dimension declaration */
759 	bplisting = NO;		/* breakpoint listing */
760 	braces = 0;		/* unmatched left brace count */
761 	cesudeftoken = '\0';	/* class/enum/struct/union definition */
762 	classdef = NO;		/* c++ class definition */
763 	elseelif = NO;		/* #else or #elif found */
764 	esudef = NO;		/* enum/struct/union definition */
765 	esubraces = -1;		/* outermost enum/struct/union brace count */
766 	externdec = NO;		/* extern declaration */
767 	fcndef = NO;		/* function definition */
768 	globalscope = YES;	/* file global scope (outside functions) */
769 	iflevel = 0;		/* #if nesting level */
770 	initializer = NO;	/* data initializer */
771 	initializerbraces = -1;	/* data initializer outer brace count */
772 	lex = NO;		/* lex file */
773 	localdef = NO;		/* function/block local definition */
774 	parens = 0;		/* unmatched left parenthesis count */
775 	ppdefine = NO;		/* preprocessor define statement */
776 	psuedoelif = NO;	/* psuedo-#elif */
777 	oldtype = NO;		/* next identifier is an old type */
778 	rules = NO;		/* lex/yacc rules */
779 	sdl = NO;		/* SDL file */
780 	structfield = NO;	/* structure field declaration */
781 	template = NO;		/* function template */
782 	templateparens = -1;	/* function template outer parentheses count */
783 	typedefdef = NO;	/* typedef name definition */
784 	typedefname = NO;	/* typedef name use */
785 	asy = NO;		/* assembly file */
786 	BEGIN 0;
787 
788 	/* if this is not a C file */
789 	if ((s = strrchr(srcfile, '.')) != NULL) {
790 		switch (*++s) {	/* this switch saves time on C files */
791 		case 'b':
792 			if (strcmp(s, "bp") == 0) {	/* breakpoint listing */
793 				bplisting = YES;
794 			}
795 			break;
796 		case 'l':
797 			if (strcmp(s, "l") == 0) {	/* lex */
798 				lex = YES;
799 				globalscope = NO;
800 			}
801 			break;
802 		case 'p':
803 		case 's':
804 			if (strcmp(s, "pr") == 0 ||
805 			    strcmp(s, "sd") == 0) {	/* SDL */
806 				sdl = YES;
807 				BEGIN SDL;
808 			} else if (strcmp(s, "s") == 0) {
809 				asy = YES;
810 			}
811 			break;
812 		case 'y':
813 			if (strcmp(s, "y") == 0) {	/* yacc */
814 				globalscope = NO;
815 			}
816 			break;
817 		}
818 	}
819 }
820 
821 int
822 comment(void)
823 {
824 	int	c, lastc;
825 
826 	do {
827 		if ((c = getc(yyin)) == '*') {	/* C comment */
828 			lastc = '\0';
829 			while ((c = getc(yyin)) != EOF &&
830 			    (c != '/' || lastc != '*')) { /* fewer '/'s */
831 				if (c == '\n') {
832 					++yylineno;
833 				}
834 				lastc = c;
835 			}
836 			/* return a blank for Reiser cpp token concatenation */
837 			if ((c = getc(yyin)) == '_' || isalnum(c)) {
838 				(void) ungetc(c, yyin);
839 				c = ' ';
840 				break;
841 			}
842 		} else if (c == '/') {		/* C++ comment */
843 			while ((c = getc(yyin)) != EOF && c != '\n') {
844 				/*EMPTY*/
845 			}
846 			break;
847 		} else {	/* not a comment */
848 			(void) ungetc(c, yyin);
849 			c = '/';
850 			break;
851 		}
852 
853 		/* there may be an immediately following comment */
854 	} while (c == '/');
855 	return (c);
856 }
857 
858 void
859 multicharconstant(char terminator)
860 {
861 	char	c;
862 
863 	/* scan until the terminator is found */
864 	while ((c = yytext[yyleng++] = noncommentinput()) != terminator) {
865 		switch (c) {
866 		case '\\':	/* escape character */
867 			if ((yytext[yyleng++] = noncommentinput()) == '\n') {
868 				++yylineno;
869 			}
870 			break;
871 		case '\t':	/* tab character */
872 
873 			/* if not a lex program, continue */
874 			if (lex == NO) {
875 				break;
876 			}
877 			/* FALLTHROUGH */
878 
879 		case '\n':	/* illegal character */
880 
881 			/*
882 			 * assume the terminator is missing, so put
883 			 * this character back
884 			 */
885 			unput(c);
886 			yytext[--yyleng] = '\0';
887 			/* FALLTHROUGH */
888 
889 		case LEXEOF:	/* end of file */
890 			return;
891 
892 		default:
893 			/* change a control character to a blank */
894 			if (!isprint(c)) {
895 				yytext[yyleng - 1] = ' ';
896 			}
897 		}
898 		/* if this token will overflow the line buffer */
899 		/* note: '\\' may cause yyleng to be > STMTMAX */
900 		if (yyleng >= STMTMAX) {
901 
902 			/* truncate the token */
903 			while ((c = noncommentinput()) != LEXEOF) {
904 				if (c == terminator) {
905 					unput(c);
906 					break;
907 				} else if (c == '\n') {
908 					++yylineno;
909 				}
910 			}
911 		}
912 	}
913 	yytext[yyleng] = '\0';
914 }
915 
916 /*
917  * Returns true if the beginning of str matches ident, and the next character
918  * is not alphanumeric and not an underscore.
919  */
920 int
921 identcmp(const char *str, const char *ident)
922 {
923 	int n = strlen(ident);
924 
925 	return (strncmp(str, ident, n) == 0 && !isalnum(str[n]) &&
926 	    str[n] != '_');
927 }
928 
929 /*
930  * Here we want to
931  *   - Make *ENTRY*() macro invocations into function definitions
932  *   - Make SET_SIZE() macro calls into function ends
933  *   - Make "call sym" instructions into function calls
934  *   - Eliminate C function definitions (since they are for lint, and we want
935  *     only one definition for each function)
936  */
937 int
938 do_assembly(int token)
939 {
940 	/* Handle C keywords? */
941 
942 	switch (token) {
943 
944 	case FCNDEF:
945 		/*
946 		 * We have a symbol that looks like a C function definition or
947 		 * call.  (Note: That can include assembly instructions with
948 		 * the right parentheses.)  We want to convert assembly macro
949 		 * invocations to function calls, and ignore everything else.
950 		 * Since we technically can't tell the difference, we'll use
951 		 * an all-caps heuristic.
952 		 *
953 		 * ... except for SET_SIZE macros, since they will precede
954 		 * FUNCEND tokens, which will break code in find.c which
955 		 * assumes that FUNCEND tokens occur at the beginning of
956 		 * lines.
957 		 */
958 		if (isupper(yytext[first]) && strcmp(yytext, "SET_SIZE") != 0)
959 			return (FCNCALL);
960 
961 		/* Don't return a token. */
962 		return (-1);
963 
964 	case GLOBALDEF:
965 	case IDENT:
966 		/* Macro arguments come down as global variable definitions. */
967 
968 		if (identcmp(yytext, "ENTRY") ||
969 		    identcmp(yytext, "ENTRY2") ||
970 		    identcmp(yytext, "ENTRY_NP") ||
971 		    identcmp(yytext, "ENTRY_NP2") ||
972 		    identcmp(yytext, "RTENTRY") ||
973 		    identcmp(yytext, "ALTENTRY")) {
974 			/*
975 			 * Identifiers on lines beginning with *ENTRY* macros
976 			 * are actually function definitions.
977 			 */
978 			return (FCNDEF);
979 		}
980 
981 		if (identcmp(yytext, "SET_SIZE")) {
982 			/*
983 			 * Identifiers on lines beginning with SET_SIZE are
984 			 * actually function ends.
985 			 */
986 			return (FCNEND);
987 		}
988 
989 		if (first != 0 && identcmp(yytext, "call")) {
990 			/*
991 			 * Make this a function call.  We exclude first == 0,
992 			 * because that happens when we're looking at "call"
993 			 * itself.  (Then we'd get function calls to "call"
994 			 * everywhere.)
995 			 */
996 			return (FCNCALL);
997 		}
998 
999 	default:
1000 		/* Default to normal behavior. */
1001 		return (token);
1002 	}
1003 }
1004