xref: /titanic_44/usr/src/cmd/vi/misc/ctags.c (revision 6a634c9dca3093f3922e4b7ab826d7bdf17bf78e)
1 /*
2  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
3  */
4 
5 /*	Copyright (c) 1988 AT&T	*/
6 /*	All Rights Reserved	*/
7 
8 /*
9  * Copyright (c) 1980 Regents of the University of California.
10  * All rights reserved.  The Berkeley software License Agreement
11  * specifies the terms and conditions for redistribution.
12  */
13 
14 /*
15  *   Modify ctags to handle C++ in C_entries(), etc:
16  *	-  Handles C++ comment token "//"
17  *	-  Handles C++ scope operator "::".
18  *		This helps to distinguish between xyz()
19  *	   definition and X::xyz() definition.
20  *	-  Recognizes C++ reserved word "class" in typedef processing
21  *		(for "-t" option)
22  *	-  Handles Sun C++ special file name extensions: .c, .C, .cc, and .cxx.
23  *	-  Handles overloaded unary/binary operator names
24  *   Doesn't handle yet:
25  *	-  inline functions in class definition (currently they get
26  *		swallowed within a class definition)
27  *	-  Tags with scope operator :: with spaces in between,
28  *		e.g. classz ::afunc
29  *
30  *   Enhance operator functions support:
31  *  	-  Control flow involving operator tokens scanning are
32  *	   consistent with that of other function tokens - original
33  *	   hacking method for 2.0 is removed.  This will accurately
34  *	   identify tags for declarations of the form 'operator+()'
35  *	   (bugid 1027806) as well as allowing spaces in between
36  *	   'operator' and 'oprtk', e.g. 'operator + ()'.
37  *
38  */
39 
40 #ifndef lint
41 char copyright[] = "@(#) Copyright (c) 1980 Regents of the University of "
42 			"California.\nAll rights reserved.\n";
43 #endif
44 
45 #include <stdio.h>
46 #include <ctype.h>
47 #include <locale.h>
48 #include <unistd.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <limits.h>
52 #include <sys/types.h>
53 #include <sys/stat.h>
54 
55 /*
56  * ctags: create a tags file
57  */
58 
59 #define	bool	char
60 
61 #define	TRUE	(1)
62 #define	FALSE	(0)
63 
64 #define	CPFLAG	3			/* # of bytes in a flag		*/
65 
66 #define	iswhite(arg)	(_wht[arg])	/* T if char is white		*/
67 #define	begtoken(arg)	(_btk[arg])	/* T if char can start token	*/
68 #define	intoken(arg)	(_itk[arg])	/* T if char can be in token	*/
69 #define	endtoken(arg)	(_etk[arg])	/* T if char ends tokens	*/
70 #define	isgood(arg)	(_gd[arg])	/* T if char can be after ')'	*/
71 
72 #define	optoken(arg)	(_opr[arg])	/* T if char can be 		*/
73 					/* an overloaded operator token	*/
74 
75 #define	max(I1, I2)	(I1 > I2 ? I1 : I2)
76 
77 struct	nd_st {			/* sorting structure			*/
78 	char	*entry;			/* function or type name	*/
79 	char	*file;			/* file name			*/
80 	bool	f;			/* use pattern or line no	*/
81 	int	lno;			/* for -x option		*/
82 	char	*pat;			/* search pattern		*/
83 	bool	been_warned;		/* set if noticed dup		*/
84 	struct	nd_st	*left, *right;	/* left and right sons		*/
85 };
86 
87 long	ftell();
88 typedef	struct	nd_st	NODE;
89 
90 static bool
91 	number,				/* T if on line starting with #	*/
92 	gotone,				/* found a func already on line	*/
93 					/* boolean "func" (see init)	*/
94 	_wht[0177], _etk[0177], _itk[0177], _btk[0177], _gd[0177];
95 
96 /* boolean array for overloadable operator symbols			*/
97 static bool	_opr[0177];
98 
99 	/*
100 	 * typedefs are recognized using a simple finite automata,
101 	 * tydef is its state variable.
102 	 */
103 typedef enum {none, begin, begin_rec, begin_tag, middle, end } TYST;
104 
105 static TYST tydef = none;
106 
107 static char	searchar = '/';		/* use /.../ searches 		*/
108 
109 static int	lineno;			/* line number of current line */
110 static char
111 	line[4*BUFSIZ],		/* current input line			*/
112 	*curfile,		/* current input file name		*/
113 	*outfile = "tags",	/* output file				*/
114 	*white	= " \f\t\n",	/* white chars				*/
115 	*endtk	= " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?",
116 				/* token ending chars			*/
117 	*begtk	= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz",
118 				/* token starting chars			*/
119 	*intk	= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
120 		    "0123456789",
121 				/* valid in-token chars			*/
122 	*notgd	= ",;";		/* non-valid after-function chars	*/
123 
124 static char	*oprtk	= " =-+%*/&|^~!<>[]()";	/* overloadable operators */
125 
126 static int	file_num;	/* current file number			*/
127 static int	aflag;		/* -a: append to tags */
128 
129 #ifndef XPG4			/* XPG4: handle typedefs by default	*/
130 static int	tflag;		/* -t: create tags for typedefs		*/
131 #endif /*  !XPG4 */
132 
133 static int	uflag;		/* -u: update tags			*/
134 static int	wflag;		/* -w: suppress warnings		*/
135 static int	vflag;		/* -v: create vgrind style index output */
136 static int	xflag;		/* -x: create cxref style output	*/
137 
138 static char	lbuf[LINE_MAX];
139 
140 static FILE
141 	*inf,			/* ioptr for current input file		*/
142 	*outf;			/* ioptr for tags file			*/
143 
144 static long	lineftell;	/* ftell after getc( inf ) == '\n' 	*/
145 
146 static NODE	*head;		/* the head of the sorted binary tree	*/
147 
148 #ifdef __STDC__
149 char	*strrchr(), *strchr();
150 #else
151 char	*rindex(), *index();
152 #endif
153 
154 static int	infile_fail;	/* Count of bad opens. Fix bug ID #1082298 */
155 
156 static char	*dbp = lbuf;
157 static int	pfcnt;
158 
159 static int	mac;		/* our modified argc, after parseargs() */
160 static char	**mav;		/* our modified argv, after parseargs() */
161 
162 
163 /* our local functions:							*/
164 static void	init();
165 static void	find_entries(char *file);
166 static void	pfnote();
167 static void	C_entries();
168 static int	start_entry(char **lp, char *token, int *f);
169 static void	Y_entries();
170 static char	*toss_comment(char *start);
171 static void	getaline(long int where);
172 static void	free_tree(NODE *node);
173 static void	add_node(NODE *node, NODE *cur_node);
174 static void	put_entries(NODE *node);
175 static int	PF_funcs(FILE *fi);
176 static int	tail(char *cp);
177 static void	takeprec();
178 static void	getit();
179 static char	*savestr(char *cp);
180 static void	L_funcs(FILE *fi);
181 static void	L_getit(int special);
182 static int	striccmp(char *str, char *pat);
183 static int	first_char();
184 static void	toss_yysec();
185 static void	Usage();
186 static void	parseargs(int ac, char **av);
187 
188 int
main(int ac,char * av[])189 main(int ac, char *av[])
190 {
191 	int i;
192 	char cmd[100];
193 
194 	(void) setlocale(LC_ALL, "");
195 #if !defined(TEXT_DOMAIN)
196 #define	TEXT_DOMAIN "SYS_TEST"
197 #endif
198 	(void) textdomain(TEXT_DOMAIN);
199 
200 	parseargs(ac, av);
201 
202 	while ((i = getopt(mac, mav, "aBFtuvwxf:")) != EOF) {
203 		switch (i) {
204 		case 'a':	/* -a: Append output to existing tags file */
205 			aflag++;
206 			break;
207 
208 		case 'B':	/* -B: Use backward search patterns (?...?) */
209 			searchar = '?';
210 			break;
211 
212 		case 'F':	/* -F: Use forward search patterns (/.../) */
213 			searchar = '/';
214 			break;
215 
216 		case 't':	/* -t: Create tags for typedefs.	*/
217 				/* for XPG4 , we silently ignore "-t".	*/
218 #ifndef XPG4
219 			tflag++;
220 #endif /*  !XPG4 */
221 			break;
222 
223 		case 'u':	/* -u: Update the specified tags file	*/
224 			uflag++;
225 			break;
226 
227 		case 'v':	/* -v: Index listing on stdout		*/
228 			vflag++;
229 			xflag++;
230 			break;
231 
232 		case 'w':	/* -w: Suppress warnings		*/
233 			wflag++;
234 			break;
235 
236 		case 'x':	/* -x: Produce a simple index		*/
237 			xflag++;
238 			break;
239 
240 		case 'f':	/* -f tagsfile: output to tagsfile	*/
241 			outfile = strdup(optarg);
242 			break;
243 
244 		default:
245 			Usage();	/* never returns		*/
246 			break;
247 		}
248 	}
249 
250 	/* if we didn't specify any source code to parse, complain and die. */
251 	if (optind == mac) {
252 		Usage();	/* never returns		*/
253 	}
254 
255 
256 	init();			/* set up boolean "functions"		*/
257 	/*
258 	 * loop through files finding functions
259 	 */
260 	for (file_num = optind; file_num < mac; file_num++)
261 		find_entries(mav[file_num]);
262 
263 	if (xflag) {
264 		put_entries(head);
265 		exit(infile_fail > 0 ? 2 : 0); /* Fix for 1082298 */
266 	}
267 	if (uflag) {
268 		for (i = 1; i < mac; i++) {
269 			(void) sprintf(cmd,
270 			"mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
271 				outfile, mav[i], outfile);
272 			(void) system(cmd);
273 		}
274 		aflag++;
275 	}
276 	outf = fopen(outfile, aflag ? "a" : "w");
277 	if (outf == NULL) {
278 		perror(outfile);
279 		exit(1);
280 	}
281 	put_entries(head);
282 	(void) fclose(outf);
283 	if (uflag) {
284 		(void) sprintf(cmd, "sort %s -o %s", outfile, outfile);
285 		(void) system(cmd);
286 	}
287 	return (infile_fail > 0 ? 2 : 0); /* Fix for #1082298 */
288 }
289 
290 /*
291  * This routine sets up the boolean psuedo-functions which work
292  * by seting boolean flags dependent upon the corresponding character
293  * Every char which is NOT in that string is not a white char.  Therefore,
294  * all of the array "_wht" is set to FALSE, and then the elements
295  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
296  * of a char is TRUE if it is the string "white", else FALSE.
297  */
298 static void
init()299 init()
300 {
301 	char	*sp;
302 	int	i;
303 
304 	for (i = 0; i < 0177; i++) {
305 		_wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE;
306 		_opr[i] = FALSE;	/* initialize boolean		*/
307 					/* array of operator symbols	*/
308 		_gd[i] = TRUE;
309 	}
310 	for (sp = white; *sp; sp++)
311 		_wht[*sp] = TRUE;
312 	for (sp = endtk; *sp; sp++)
313 		_etk[*sp] = TRUE;
314 	for (sp = intk; *sp; sp++)
315 		_itk[*sp] = TRUE;
316 	for (sp = begtk; *sp; sp++)
317 		_btk[*sp] = TRUE;
318 
319 	/* mark overloadable operator symbols				*/
320 	for (sp = oprtk; *sp; sp++)
321 		_opr[*sp] = TRUE;
322 
323 	for (sp = notgd; *sp; sp++)
324 		_gd[*sp] = FALSE;
325 }
326 
327 /*
328  * This routine opens the specified file and calls the function
329  * which finds the function and type definitions.
330  */
331 static void
find_entries(file)332 find_entries(file)
333 char	*file;
334 {
335 	char *cp;
336 	struct stat st;
337 
338 	/* skip anything that isn't a regular file */
339 	if (stat(file, &st) == 0 && !S_ISREG(st.st_mode))
340 		return;
341 
342 	if ((inf = fopen(file, "r")) == NULL) {
343 		perror(file);
344 		infile_fail++;		/* Count bad opens. ID #1082298 */
345 		return;
346 	}
347 	curfile = savestr(file);
348 	lineno = 0;
349 #ifdef __STDC__
350 	cp = strrchr(file, '.');
351 #else
352 	cp = rindex(file, '.');
353 #endif
354 	/* .l implies lisp or lex source code */
355 	if (cp && cp[1] == 'l' && cp[2] == '\0') {
356 #ifdef __STDC__
357 		if (strchr(";([", first_char()) != NULL) 	/* lisp */
358 #else
359 		if (index(";([", first_char()) != NULL) 	/* lisp */
360 #endif
361 		{
362 			L_funcs(inf);
363 			(void) fclose(inf);
364 			return;
365 		} else {					/* lex */
366 			/*
367 			 * throw away all the code before the second "%%"
368 			 */
369 			toss_yysec();
370 			getaline(lineftell);
371 			pfnote("yylex", lineno, TRUE);
372 			toss_yysec();
373 			C_entries();
374 			(void) fclose(inf);
375 			return;
376 		}
377 	}
378 	/* .y implies a yacc file */
379 	if (cp && cp[1] == 'y' && cp[2] == '\0') {
380 		toss_yysec();
381 		Y_entries();
382 		C_entries();
383 		(void) fclose(inf);
384 		return;
385 	}
386 
387 	/*
388 	 * Add in file name extension support for Sun C++ which
389 	 * permits .C/.c (AT&T), .cc (G++) and .cxx (Gloksp.)
390 	 */
391 
392 	/* if not a .c, .C, .cc, .cxx or .h file, try fortran */
393 	if (cp && (cp[1] != 'C' && cp[1] != 'c' && cp[1] != 'h') &&
394 	    cp[2] == '\0' && (strcmp(cp, ".cc") == 0) &&
395 	    (strcmp(cp, ".cxx") == 0)) {
396 		if (PF_funcs(inf) != 0) {
397 			(void) fclose(inf);
398 			return;
399 		}
400 		rewind(inf);	/* no fortran tags found, try C */
401 	}
402 	C_entries();
403 	(void) fclose(inf);
404 }
405 
406 static void
pfnote(name,ln,f)407 pfnote(name, ln, f)
408 char	*name;
409 int	ln;
410 bool	f;		/* f == TRUE when function */
411 {
412 	char *fp;
413 	NODE *np;
414 	char *nametk;	/* hold temporary tokens from name */
415 	char nbuf[BUFSIZ];
416 
417 	if ((np = malloc(sizeof (NODE))) == NULL) {
418 		(void) fprintf(stderr,
419 				gettext("ctags: too many entries to sort\n"));
420 		put_entries(head);
421 		free_tree(head);
422 		head = np = (NODE *) malloc(sizeof (NODE));
423 	}
424 	if (xflag == 0 && (strcmp(name, "main") == 0)) {
425 #ifdef __STDC__
426 		fp = strrchr(curfile, '/');
427 #else
428 		fp = rindex(curfile, '/');
429 #endif
430 		if (fp == 0)
431 			fp = curfile;
432 		else
433 			fp++;
434 		(void) sprintf(nbuf, "M%s", fp);
435 #ifdef __STDC__
436 		fp = strrchr(nbuf, '.');
437 #else
438 		fp = rindex(nbuf, '.');
439 #endif
440 		/* Chop off .cc and .cxx as well as .c, .h, etc		*/
441 		if (fp && ((fp[2] == 0) || (fp[2] == 'c' && fp[3] == 0) ||
442 			    (fp[3] == 'x' && fp[4] == 0)))
443 			*fp = 0;
444 		name = nbuf;
445 	}
446 
447 	/* remove in-between blanks operator function tags */
448 #ifdef __STDC__
449 	if (strchr(name, ' ') != NULL)
450 #else
451 	if (index(name, ' ') != NULL)
452 #endif
453 	{
454 		(void) strcpy(name, strtok(name, " "));
455 		while (nametk = strtok(0, " "))
456 			(void) strcat(name, nametk);
457 	}
458 	np->entry = savestr(name);
459 	np->file = curfile;
460 	np->f = f;
461 	np->lno = ln;
462 	np->left = np->right = 0;
463 	if (xflag == 0) {
464 		lbuf[50] = 0;
465 		(void) strcat(lbuf, "$");
466 		lbuf[50] = 0;
467 	}
468 	np->pat = savestr(lbuf);
469 	if (head == NULL)
470 		head = np;
471 	else
472 		add_node(np, head);
473 }
474 
475 /*
476  * This routine finds functions and typedefs in C syntax and adds them
477  * to the list.
478  */
479 static void
C_entries()480 C_entries()
481 {
482 	int c;
483 	char *token, *tp;
484 	bool incomm, inquote, inchar, midtoken, isoperator, optfound;
485 	int level;
486 	char *sp;
487 	char tok[BUFSIZ];
488 	long int tokftell;
489 
490 	number = gotone = midtoken = inquote = inchar =
491 	incomm = isoperator = optfound = FALSE;
492 
493 	level = 0;
494 	sp = tp = token = line;
495 	lineno++;
496 	lineftell = tokftell = ftell(inf);
497 	for (;;) {
498 		*sp = c = getc(inf);
499 		if (feof(inf))
500 			break;
501 		if (c == '\n') {
502 			lineftell = ftell(inf);
503 			lineno++;
504 		} else if (c == '\\') {
505 			c = *++sp = getc(inf);
506 			if ((c == '\n') || (c == EOF)) { /* c == EOF, 1091005 */
507 				lineftell = ftell(inf);
508 				lineno++;
509 				c = ' ';
510 			}
511 		} else if (incomm) {
512 			if (c == '*') {
513 				while ((*++sp = c = getc(inf)) == '*')
514 					continue;
515 
516 				/* c == EOF 1091005			*/
517 				if ((c == '\n') || (c == EOF)) {
518 					lineftell = ftell(inf);
519 					lineno++;
520 				}
521 
522 				if (c == '/')
523 					incomm = FALSE;
524 			}
525 		} else if (inquote) {
526 			/*
527 			 * Too dumb to know about \" not being magic, but
528 			 * they usually occur in pairs anyway.
529 			 */
530 			if (c == '"')
531 				inquote = FALSE;
532 			continue;
533 		} else if (inchar) {
534 			if (c == '\'')
535 				inchar = FALSE;
536 			continue;
537 		} else if (midtoken == TRUE) {	/* if white space omitted */
538 			goto dotoken;
539 		} else switch (c) {
540 		    case '"':
541 			inquote = TRUE;
542 			continue;
543 		    case '\'':
544 			inchar = TRUE;
545 			continue;
546 		    case '/':
547 			*++sp = c = getc(inf);
548 			/* Handles the C++ comment token "//" 		*/
549 			if (c == '*')
550 				incomm = TRUE;
551 			else if (c == '/') {
552 				/*
553 				 * Skip over all the characters after
554 				 * "//" until a newline character. Now also
555 				 * includes fix for 1091005, check for EOF.
556 				 */
557 				do  {
558 					c = getc(inf);
559 				/* 1091005:				*/
560 				} while ((c != '\n') && (c != EOF));
561 
562 
563 				/*
564 				 * Fixed bugid 1030014
565 				 * Return the current position of the
566 				 * file after the newline.
567 				 */
568 				lineftell = ftell(inf);
569 				lineno++;
570 				*--sp = c;
571 			}
572 			else
573 				(void) ungetc(*sp, inf);
574 			continue;
575 		    case '#':
576 			if (sp == line)
577 				number = TRUE;
578 			continue;
579 		    case '{':
580 			if ((tydef == begin_rec) || (tydef == begin_tag)) {
581 				tydef = middle;
582 			}
583 			level++;
584 			continue;
585 		    case '}':
586 			/*
587 			 * Heuristic for function or structure end;
588 			 * common for #ifdef/#else blocks to add extra "{"
589 			 */
590 			if (sp == line)
591 				level = 0;	/* reset */
592 			else
593 				level--;
594 			if (!level && tydef == middle) {
595 				tydef = end;
596 			}
597 			if (!level && tydef == none) /* Fix for #1034126 */
598 				goto dotoken;
599 			continue;
600 		}
601 
602 dotoken:
603 
604 
605 		if (!level && !inquote && !incomm && gotone == FALSE) {
606 			if (midtoken) {
607 				if (endtoken(c)) {
608 
609 				/*
610 				 *
611 				 *    ':'  +---> ':' -> midtok
612 				 *
613 				 *    +---> operator{+,-, etc} -> midtok
614 				 *		(continue)
615 				 *    +---> endtok
616 				 */
617 		/*
618 		 * Enhance operator function support and
619 		 *	fix bugid 1027806
620 		 *
621 		 *  For operator token, scanning will continue until
622 		 *  '(' is found.  Spaces between 'operater' and
623 		 *  'oprtk' are allowed (e.g. 'operator + ()'), but
624 		 *  will be removed when the actual entry for the tag
625 		 *  is made.
626 		 *  Note that functions of the form 'operator ()(int)'
627 		 *  will be recognized, but 'operator ()' will not,
628 		 *  even though this is legitimate in C.
629 		 */
630 
631 					if (optoken(c)) {
632 					    if (isoperator) {
633 					    if (optfound) {
634 						    if (c != '(') {
635 						    tp++;
636 						    goto next_char;
637 						    }
638 					    } else {
639 						    if (c != ' ') {
640 						    optfound = TRUE;
641 						    }
642 						    tp++;
643 						    goto next_char;
644 					    }
645 					    } else {
646 				/* start: this code shifted left for cstyle */
647 				char *backptr = tp - 7;
648 				if (strncmp(backptr, "operator", 8) == 0) {
649 					/* This is an overloaded operator */
650 					isoperator = TRUE;
651 					if (c != ' ') {
652 						optfound = TRUE;
653 					}
654 
655 					tp++;
656 					goto next_char;
657 				} else if (c == '~') {
658 					/* This is a destructor		*/
659 					tp++;
660 					goto next_char;
661 				}
662 				/* end: above code shifted left for cstyle */
663 					}
664 					} else if (c == ':') {
665 					    if ((*++sp = getc(inf)) == ':') {
666 						tp += 2;
667 						c = *sp;
668 						goto next_char;
669 					    } else {
670 						(void) ungetc (*sp, inf);
671 						--sp;
672 					    }
673 					}
674 
675 				/* start: this code shifted left for cstyle */
676 				{
677 				int f;
678 				int pfline = lineno;
679 
680 				if (start_entry(&sp, token, &f)) {
681 					(void) strncpy(tok, token, tp-token+1);
682 					tok[tp-token+1] = 0;
683 					getaline(tokftell);
684 					pfnote(tok, pfline, f);
685 					gotone = f;	/* function */
686 				}
687 
688 				isoperator = optfound = midtoken = FALSE;
689 				token = sp;
690 				}
691 				/* end: above code shifted left for cstyle */
692 				} else if (intoken(c))
693 					tp++;
694 			} else if (begtoken(c)) {
695 				token = tp = sp;
696 				midtoken = TRUE;
697 				tokftell = lineftell;
698 			}
699 		}
700 	next_char:
701 		if (c == ';' && tydef == end)	/* clean with typedefs */
702 			tydef = none;
703 		sp++;
704 			/* The "c == }" was added to fix #1034126 */
705 		if (c == '\n' ||c == '}'|| sp > &line[sizeof (line) - BUFSIZ]) {
706 			tp = token = sp = line;
707 			number = gotone = midtoken = inquote =
708 			inchar = isoperator = optfound = FALSE;
709 		}
710 	}
711 }
712 
713 /*
714  * This routine  checks to see if the current token is
715  * at the start of a function, or corresponds to a typedef
716  * It updates the input line * so that the '(' will be
717  * in it when it returns.
718  */
719 static int
start_entry(lp,token,f)720 start_entry(lp, token, f)
721 char	**lp, *token;
722 int	*f;
723 {
724 	char	*sp;
725 	int	c;
726 	static	bool	found;
727 	bool	firsttok;	/* T if have seen first token in ()'s	*/
728 	int	bad;
729 
730 	*f = 1;			/* a function */
731 	sp = *lp;
732 	c = *sp;
733 	bad = FALSE;
734 	if (!number) {		/* space is not allowed in macro defs	*/
735 		while (iswhite(c)) {
736 			*++sp = c = getc(inf);
737 			if ((c == '\n') || (c == EOF)) { /* c==EOF, #1091005 */
738 				lineno++;
739 				lineftell = ftell(inf);
740 				if (sp > &line[sizeof (line) - BUFSIZ])
741 					goto ret;
742 			}
743 		}
744 	/* the following tries to make it so that a #define	a b(c)	*/
745 	/* doesn't count as a define of b.				*/
746 	} else {
747 		if (strncmp(token, "define", 6) == 0)
748 			found = 0;
749 		else
750 			found++;
751 		if (found >= 2) {
752 			gotone = TRUE;
753 badone:			bad = TRUE;
754 			goto ret;
755 		}
756 	}
757 	/* check for the typedef cases		*/
758 #ifdef XPG4
759 	if (strncmp(token, "typedef", 7) == 0) {
760 #else /*  !XPG4 */
761 	if (tflag && (strncmp(token, "typedef", 7) == 0)) {
762 #endif /*  XPG4 */
763 		tydef = begin;
764 		goto badone;
765 	}
766 	/* Handles 'class' besides 'struct' etc.			*/
767 	if (tydef == begin && ((strncmp(token, "struct", 6) == 0) ||
768 			    (strncmp(token, "class", 5) == 0) ||
769 			    (strncmp(token, "union", 5) == 0)||
770 			    (strncmp(token, "enum", 4) == 0))) {
771 		tydef = begin_rec;
772 		goto badone;
773 	}
774 	if (tydef == begin) {
775 		tydef = end;
776 		goto badone;
777 	}
778 	if (tydef == begin_rec) {
779 		tydef = begin_tag;
780 		goto badone;
781 	}
782 	if (tydef == begin_tag) {
783 		tydef = end;
784 		goto gottydef;	/* Fall through to "tydef==end" */
785 	}
786 
787 gottydef:
788 	if (tydef == end) {
789 		*f = 0;
790 		goto ret;
791 	}
792 	if (c != '(')
793 		goto badone;
794 	firsttok = FALSE;
795 	while ((*++sp = c = getc(inf)) != ')') {
796 		if ((c == '\n') || (c == EOF)) { /* c == EOF Fix for #1091005 */
797 			lineftell = ftell(inf);
798 			lineno++;
799 			if (sp > &line[sizeof (line) - BUFSIZ])
800 				goto ret;
801 		}
802 		/*
803 		 * This line used to confuse ctags:
804 		 *	int	(*oldhup)();
805 		 * This fixes it. A nonwhite char before the first
806 		 * token, other than a / (in case of a comment in there)
807 		 * makes this not a declaration.
808 		 */
809 		if (begtoken(c) || c == '/')
810 			firsttok = TRUE;
811 		else if (!iswhite(c) && !firsttok)
812 			goto badone;
813 	}
814 	while (iswhite(*++sp = c = getc(inf)))
815 		if ((c == '\n') || (c == EOF)) { /* c == EOF fix for #1091005 */
816 			lineno++;
817 			lineftell = ftell(inf);
818 			if (sp > &line[sizeof (line) - BUFSIZ])
819 				break;
820 		}
821 ret:
822 	*lp = --sp;
823 	if (c == '\n')
824 		lineno--;
825 	(void) ungetc(c, inf);
826 	return (!bad && (!*f || isgood(c)));
827 					/* hack for typedefs */
828 }
829 
830 /*
831  * Y_entries:
832  *	Find the yacc tags and put them in.
833  */
834 static void
Y_entries()835 Y_entries()
836 {
837 	char	*sp, *orig_sp;
838 	int	brace;
839 	bool	in_rule, toklen;
840 	char		tok[BUFSIZ];
841 
842 	brace = 0;
843 	getaline(lineftell);
844 	pfnote("yyparse", lineno, TRUE);
845 	while (fgets(line, sizeof (line), inf) != NULL)
846 		for (sp = line; *sp; sp++)
847 			switch (*sp) {
848 			    case '\n':
849 				lineno++;
850 				/* FALLTHROUGH */
851 			    case ' ':
852 			    case '\t':
853 			    case '\f':
854 			    case '\r':
855 				break;
856 			    case '"':
857 				do {
858 					while (*++sp != '"')
859 						continue;
860 				} while (sp[-1] == '\\');
861 				break;
862 			    case '\'':
863 				do {
864 					while (*++sp != '\'')
865 						continue;
866 				} while (sp[-1] == '\\');
867 				break;
868 			    case '/':
869 				if (*++sp == '*')
870 					sp = toss_comment(sp);
871 				else
872 					--sp;
873 				break;
874 			    case '{':
875 				brace++;
876 				break;
877 			    case '}':
878 				brace--;
879 				break;
880 			    case '%':
881 				if (sp[1] == '%' && sp == line)
882 					return;
883 				break;
884 			    case '|':
885 			    case ';':
886 				in_rule = FALSE;
887 				break;
888 			    default:
889 				if (brace == 0 && !in_rule && (isalpha(*sp) ||
890 								*sp == '.' ||
891 								*sp == '_')) {
892 					orig_sp = sp;
893 					++sp;
894 					while (isalnum(*sp) || *sp == '_' ||
895 						*sp == '.')
896 						sp++;
897 					toklen = sp - orig_sp;
898 					while (isspace(*sp))
899 						sp++;
900 					if (*sp == ':' || (*sp == '\0' &&
901 						    first_char() == ':')) {
902 						(void) strncpy(tok,
903 							orig_sp, toklen);
904 						tok[toklen] = '\0';
905 						(void) strcpy(lbuf, line);
906 						lbuf[strlen(lbuf) - 1] = '\0';
907 						pfnote(tok, lineno, TRUE);
908 						in_rule = TRUE;
909 					}
910 					else
911 						sp--;
912 				}
913 				break;
914 			}
915 }
916 
917 static char *
toss_comment(start)918 toss_comment(start)
919 char	*start;
920 {
921 	char	*sp;
922 
923 	/*
924 	 * first, see if the end-of-comment is on the same line
925 	 */
926 	do {
927 #ifdef __STDC__
928 		while ((sp = strchr(start, '*')) != NULL)
929 #else
930 		while ((sp = index(start, '*')) != NULL)
931 #endif
932 			if (sp[1] == '/')
933 				return (++sp);
934 			else
935 				start = (++sp);
936 		start = line;
937 		lineno++;
938 	} while (fgets(line, sizeof (line), inf) != NULL);
939 
940 	/*
941 	 * running this through lint revealed that the original version
942 	 * of this routine didn't explicitly return something; while
943 	 * the return value was always used!. so i've added this
944 	 * next line.
945 	 */
946 	return (sp);
947 }
948 
949 static void
getaline(where)950 getaline(where)
951 long int where;
952 {
953 	long saveftell = ftell(inf);
954 	char *cp;
955 
956 	(void) fseek(inf, where, 0);
957 	(void) fgets(lbuf, sizeof (lbuf), inf);
958 #ifdef __STDC__
959 	cp = strrchr(lbuf, '\n');
960 #else
961 	cp = rindex(lbuf, '\n');
962 #endif
963 	if (cp)
964 		*cp = 0;
965 	(void) fseek(inf, saveftell, 0);
966 }
967 
968 static void
free_tree(node)969 free_tree(node)
970 NODE	*node;
971 {
972 	while (node) {
973 		free_tree(node->right);
974 		free(node);
975 		node = node->left;
976 	}
977 }
978 
979 static void
add_node(node,cur_node)980 add_node(node, cur_node)
981 NODE *node, *cur_node;
982 {
983 	int dif;
984 
985 	dif = strcmp(node->entry, cur_node->entry);
986 	if (dif == 0) {
987 		if (node->file == cur_node->file) {
988 			if (!wflag) {
989 			(void) fprintf(stderr,
990 			gettext("Duplicate entry in file %s, line %d: %s\n"),
991 			node->file, lineno, node->entry);
992 			(void) fprintf(stderr,
993 					gettext("Second entry ignored\n"));
994 			}
995 			return;
996 		}
997 		if (!cur_node->been_warned)
998 			if (!wflag) {
999 				(void) fprintf(stderr, gettext("Duplicate "
1000 					    "entry in files %s and %s: %s "
1001 					    "(Warning only)\n"),
1002 					    node->file, cur_node->file,
1003 					    node->entry);
1004 			}
1005 		cur_node->been_warned = TRUE;
1006 		return;
1007 	}
1008 
1009 	if (dif < 0) {
1010 		if (cur_node->left != NULL)
1011 			add_node(node, cur_node->left);
1012 		else
1013 			cur_node->left = node;
1014 		return;
1015 	}
1016 	if (cur_node->right != NULL)
1017 		add_node(node, cur_node->right);
1018 	else
1019 		cur_node->right = node;
1020 }
1021 
1022 static void
put_entries(node)1023 put_entries(node)
1024 NODE	*node;
1025 {
1026 	char	*sp;
1027 
1028 	if (node == NULL)
1029 		return;
1030 	put_entries(node->left);
1031 
1032 	/*
1033 	 * while the code in the following #ifdef section could be combined,
1034 	 * it's explicitly separated here to make maintainance easier.
1035 	 */
1036 #ifdef XPG4
1037 	/*
1038 	 * POSIX 2003: we no longer have a "-t" flag; the logic is
1039 	 * automatically assumed to be "turned on" here.
1040 	 */
1041 	if (xflag == 0) {
1042 			(void) fprintf(outf, "%s\t%s\t%c^",
1043 				node->entry, node->file, searchar);
1044 			for (sp = node->pat; *sp; sp++)
1045 				if (*sp == '\\')
1046 					(void) fprintf(outf, "\\\\");
1047 				else if (*sp == searchar)
1048 					(void) fprintf(outf, "\\%c", searchar);
1049 				else
1050 					(void) putc(*sp, outf);
1051 			(void) fprintf(outf, "%c\n", searchar);
1052 	} else if (vflag)
1053 		(void) fprintf(stdout, "%s %s %d\n",
1054 				node->entry, node->file, (node->lno+63)/64);
1055 	else
1056 		(void) fprintf(stdout, "%-16s %4d %-16s %s\n",
1057 			node->entry, node->lno, node->file, node->pat);
1058 #else /* XPG4 */
1059 	/*
1060 	 * original way of doing things. "-t" logic is only turned on
1061 	 * when the user has specified it via a command-line argument.
1062 	 */
1063 	if (xflag == 0)
1064 		if (node->f) {		/* a function */
1065 			(void) fprintf(outf, "%s\t%s\t%c^",
1066 				node->entry, node->file, searchar);
1067 			for (sp = node->pat; *sp; sp++)
1068 				if (*sp == '\\')
1069 					(void) fprintf(outf, "\\\\");
1070 				else if (*sp == searchar)
1071 					(void) fprintf(outf, "\\%c", searchar);
1072 				else
1073 					(void) putc(*sp, outf);
1074 			(void) fprintf(outf, "%c\n", searchar);
1075 		} else {		/* a typedef; text pattern inadequate */
1076 			(void) fprintf(outf, "%s\t%s\t%d\n",
1077 				node->entry, node->file, node->lno);
1078 		} else if (vflag)
1079 		(void) fprintf(stdout, "%s %s %d\n",
1080 				node->entry, node->file, (node->lno+63)/64);
1081 	else
1082 		(void) fprintf(stdout, "%-16s %4d %-16s %s\n",
1083 			node->entry, node->lno, node->file, node->pat);
1084 #endif /* XPG4 */
1085 	put_entries(node->right);
1086 }
1087 
1088 
1089 static int
PF_funcs(fi)1090 PF_funcs(fi)
1091 FILE *fi;
1092 {
1093 
1094 	pfcnt = 0;
1095 	while (fgets(lbuf, sizeof (lbuf), fi)) {
1096 		lineno++;
1097 		dbp = lbuf;
1098 		if (*dbp == '%') dbp++;	/* Ratfor escape to fortran */
1099 		while (isspace(*dbp))
1100 			dbp++;
1101 		if (*dbp == 0)
1102 			continue;
1103 		switch (*dbp |' ') {
1104 
1105 		    case 'i':
1106 			if (tail("integer"))
1107 				takeprec();
1108 			break;
1109 		    case 'r':
1110 			if (tail("real"))
1111 				takeprec();
1112 			break;
1113 		    case 'l':
1114 			if (tail("logical"))
1115 				takeprec();
1116 			break;
1117 		    case 'c':
1118 			if (tail("complex") || tail("character"))
1119 				takeprec();
1120 			break;
1121 		    case 'd':
1122 			if (tail("double")) {
1123 				while (isspace(*dbp))
1124 					dbp++;
1125 				if (*dbp == 0)
1126 					continue;
1127 				if (tail("precision"))
1128 					break;
1129 				continue;
1130 			}
1131 			break;
1132 		}
1133 		while (isspace(*dbp))
1134 			dbp++;
1135 		if (*dbp == 0)
1136 			continue;
1137 		switch (*dbp|' ') {
1138 
1139 		    case 'f':
1140 			if (tail("function"))
1141 				getit();
1142 			continue;
1143 		    case 's':
1144 			if (tail("subroutine"))
1145 				getit();
1146 			continue;
1147 		    case 'p':
1148 			if (tail("program")) {
1149 				getit();
1150 				continue;
1151 			}
1152 			if (tail("procedure"))
1153 				getit();
1154 			continue;
1155 		}
1156 	}
1157 	return (pfcnt);
1158 }
1159 
1160 static int
tail(cp)1161 tail(cp)
1162 char *cp;
1163 {
1164 	int len = 0;
1165 
1166 	while (*cp && (*cp&~' ') == ((*(dbp+len))&~' '))
1167 		cp++, len++;
1168 	if (*cp == 0) {
1169 		dbp += len;
1170 		return (1);
1171 	}
1172 	return (0);
1173 }
1174 
1175 static void
takeprec()1176 takeprec()
1177 {
1178 
1179 	while (isspace(*dbp))
1180 		dbp++;
1181 	if (*dbp != '*')
1182 		return;
1183 	dbp++;
1184 	while (isspace(*dbp))
1185 		dbp++;
1186 	if (!isdigit(*dbp)) {
1187 		--dbp;		/* force failure */
1188 		return;
1189 	}
1190 	do
1191 		dbp++;
1192 	while (isdigit(*dbp));
1193 }
1194 
1195 static void
getit()1196 getit()
1197 {
1198 	char *cp;
1199 	char c;
1200 	char nambuf[BUFSIZ];
1201 
1202 	for (cp = lbuf; *cp; cp++)
1203 		;
1204 	*--cp = 0;	/* zap newline */
1205 	while (isspace(*dbp))
1206 		dbp++;
1207 	if (*dbp == 0 || !isalpha(*dbp) || !isascii(*dbp))
1208 		return;
1209 	for (cp = dbp+1; *cp && (isalpha(*cp) || isdigit(*cp)); cp++)
1210 		continue;
1211 	c = cp[0];
1212 	cp[0] = 0;
1213 	(void) strcpy(nambuf, dbp);
1214 	cp[0] = c;
1215 	pfnote(nambuf, lineno, TRUE);
1216 	pfcnt++;
1217 }
1218 
1219 static char *
savestr(cp)1220 savestr(cp)
1221 char *cp;
1222 {
1223 	int len;
1224 	char *dp;
1225 
1226 	len = strlen(cp);
1227 	dp = (char *)malloc(len+1);
1228 	(void) strcpy(dp, cp);
1229 
1230 	return (dp);
1231 }
1232 
1233 #ifndef __STDC__
1234 /*
1235  * Return the ptr in sp at which the character c last
1236  * appears; NULL if not found
1237  *
1238  * Identical to v7 rindex, included for portability.
1239  */
1240 
1241 static char *
rindex(sp,c)1242 rindex(sp, c)
1243 char *sp, c;
1244 {
1245 	char *r;
1246 
1247 	r = NULL;
1248 	do {
1249 		if (*sp == c)
1250 			r = sp;
1251 	} while (*sp++);
1252 	return (r);
1253 }
1254 #endif
1255 
1256 /*
1257  * lisp tag functions
1258  * just look for (def or (DEF
1259  */
1260 
1261 static void
L_funcs(fi)1262 L_funcs(fi)
1263 FILE *fi;
1264 {
1265 	int	special;
1266 
1267 	pfcnt = 0;
1268 	while (fgets(lbuf, sizeof (lbuf), fi)) {
1269 		lineno++;
1270 		dbp = lbuf;
1271 		if (dbp[0] == '(' &&
1272 		    (dbp[1] == 'D' || dbp[1] == 'd') &&
1273 		    (dbp[2] == 'E' || dbp[2] == 'e') &&
1274 		    (dbp[3] == 'F' || dbp[3] == 'f')) {
1275 			dbp += 4;
1276 			if (striccmp(dbp, "method") == 0 ||
1277 			    striccmp(dbp, "wrapper") == 0 ||
1278 			    striccmp(dbp, "whopper") == 0)
1279 				special = TRUE;
1280 			else
1281 				special = FALSE;
1282 			while (!isspace(*dbp))
1283 				dbp++;
1284 			while (isspace(*dbp))
1285 				dbp++;
1286 			L_getit(special);
1287 		}
1288 	}
1289 }
1290 
1291 static void
L_getit(special)1292 L_getit(special)
1293 int	special;
1294 {
1295 	char	*cp;
1296 	char	c;
1297 	char		nambuf[BUFSIZ];
1298 
1299 	for (cp = lbuf; *cp; cp++)
1300 		continue;
1301 	*--cp = 0;		/* zap newline */
1302 	if (*dbp == 0)
1303 		return;
1304 	if (special) {
1305 #ifdef __STDC__
1306 		if ((cp = strchr(dbp, ')')) == NULL)
1307 #else
1308 		if ((cp = index(dbp, ')')) == NULL)
1309 #endif
1310 			return;
1311 		while (cp >= dbp && *cp != ':')
1312 			cp--;
1313 		if (cp < dbp)
1314 			return;
1315 		dbp = cp;
1316 		while (*cp && *cp != ')' && *cp != ' ')
1317 			cp++;
1318 	}
1319 	else
1320 		for (cp = dbp + 1; *cp && *cp != '(' && *cp != ' '; cp++)
1321 			continue;
1322 	c = cp[0];
1323 	cp[0] = 0;
1324 	(void) strcpy(nambuf, dbp);
1325 	cp[0] = c;
1326 	pfnote(nambuf, lineno, TRUE);
1327 	pfcnt++;
1328 }
1329 
1330 /*
1331  * striccmp:
1332  *	Compare two strings over the length of the second, ignoring
1333  *	case distinctions.  If they are the same, return 0.  If they
1334  *	are different, return the difference of the first two different
1335  *	characters.  It is assumed that the pattern (second string) is
1336  *	completely lower case.
1337  */
1338 static int
striccmp(str,pat)1339 striccmp(str, pat)
1340 char	*str, *pat;
1341 {
1342 	int	c1;
1343 
1344 	while (*pat) {
1345 		if (isupper(*str))
1346 			c1 = tolower(*str);
1347 		else
1348 			c1 = *str;
1349 		if (c1 != *pat)
1350 			return (c1 - *pat);
1351 		pat++;
1352 		str++;
1353 	}
1354 	return (0);
1355 }
1356 
1357 /*
1358  * first_char:
1359  *	Return the first non-blank character in the file.  After
1360  *	finding it, rewind the input file so we start at the beginning
1361  *	again.
1362  */
1363 static int
first_char()1364 first_char()
1365 {
1366 	int	c;
1367 	long	off;
1368 
1369 	off = ftell(inf);
1370 	while ((c = getc(inf)) != EOF)
1371 		if (!isspace(c) && c != '\r') {
1372 			(void) fseek(inf, off, 0);
1373 			return (c);
1374 		}
1375 	(void) fseek(inf, off, 0);
1376 	return (EOF);
1377 }
1378 
1379 /*
1380  * toss_yysec:
1381  *	Toss away code until the next "%%" line.
1382  */
1383 static void
toss_yysec()1384 toss_yysec()
1385 {
1386 	char		buf[BUFSIZ];
1387 
1388 	for (;;) {
1389 		lineftell = ftell(inf);
1390 		if (fgets(buf, BUFSIZ, inf) == NULL)
1391 			return;
1392 		lineno++;
1393 		if (strncmp(buf, "%%", 2) == 0)
1394 			return;
1395 	}
1396 }
1397 
1398 static void
Usage()1399 Usage()
1400 {
1401 #ifdef XPG4
1402 	(void) fprintf(stderr, gettext("Usage:\tctags [-aBFuvw] "
1403 #else /*  !XPG4 */
1404 	(void) fprintf(stderr, gettext("Usage:\tctags [-aBFtuvw] "
1405 #endif /*  XPG4 */
1406 		    "[-f tagsfile] file ...\n"));
1407 	(void) fprintf(stderr, gettext("OR:\tctags [-x] file ...\n"));
1408 	exit(1);
1409 }
1410 
1411 
1412 /*
1413  * parseargs():		modify the args
1414  *	the purpose of this routine is to transform any ancient argument
1415  *	usage into a format which is acceptable to getopt(3C), so that we
1416  *	retain backwards Solaris 2.[0-4] compatibility.
1417  *
1418  *	This routine allows us to make full use of getopts, without any
1419  *	funny argument processing in main().
1420  *
1421  *	The other alternative would be to hand-craft the processed arguments
1422  *	during and after getopt(3C) - which usually leads to uglier code
1423  *	in main(). I've opted to keep the ugliness isolated down here,
1424  *	instead of in main().
1425  *
1426  *	In a nutshell, if the user has used the old Solaris syntax of:
1427  *		ctags [-aBFtuvwx] [-f tagsfile] filename ...
1428  *	We simply change this into:
1429  *		ctags [-a] [-B] [-F] [-t] [-u] [-v] [-w] [-x] [-f tags] file...
1430  *
1431  *	If the user has specified the new getopt(3C) syntax, we merely
1432  *	copy that into our modified argument space.
1433  */
1434 static void
parseargs(ac,av)1435 parseargs(ac, av)
1436 int ac;				/* argument count			*/
1437 char **av;			/* ptr to original argument space	*/
1438 {
1439 	int i;			/* current argument			*/
1440 	int a;			/* used to parse combined arguments	*/
1441 	int fflag;		/* 1 = we're only parsing filenames	*/
1442 	size_t sz;		/* size of the argument			*/
1443 	size_t mav_sz;		/* size of our psuedo argument space	*/
1444 
1445 	i = mac = fflag = 0;	/* proper initializations */
1446 
1447 	mav_sz = ((ac + 1) * sizeof (char *));
1448 	if ((mav = malloc(mav_sz)) == (char **)NULL) {
1449 		perror("Can't malloc argument space");
1450 		exit(1);
1451 	}
1452 
1453 	/* for each argument, see if we need to change things:		*/
1454 	for (; (av[i] != (char *)NULL) && (av[i][0] != (char)NULL); i++) {
1455 
1456 		if (strcmp(av[i], "--") == 0) {
1457 			fflag = 1;	/* just handle filenames now	*/
1458 		}
1459 
1460 		sz = strlen(&av[i][0]);	/* get this arg's size		*/
1461 
1462 		/*
1463 		 * if the argument starts with a "-", and has more than
1464 		 * 1 flag, then we have to search through each character,
1465 		 * and separate any flags which have been combined.
1466 		 *
1467 		 * so, if we've found a "-" string which needs separating:
1468 		 */
1469 		if (fflag == 0 && 	/* not handling filename args	*/
1470 		    av[i][0] == '-' &&	/* and this is a flag		*/
1471 		    sz > 2) {		/* and there's more than 1 flag	*/
1472 			/* then for each flag after the "-" sign:	*/
1473 			for (a = 1; av[i][a]; a++) {
1474 				/* copy the flag into mav space.	*/
1475 				if (a > 1) {
1476 					/*
1477 					 * we need to call realloc() after the
1478 					 * 1st combined flag, because "ac"
1479 					 * doesn't include combined args.
1480 					 */
1481 					mav_sz += sizeof (char *);
1482 					if ((mav = realloc(mav, mav_sz)) ==
1483 					    (char **)NULL) {
1484 						perror("Can't realloc "
1485 							"argument space");
1486 						exit(1);
1487 					}
1488 				}
1489 
1490 				if ((mav[mac] = malloc((size_t)CPFLAG)) ==
1491 				    (char *)NULL) {
1492 					perror("Can't malloc argument space");
1493 					exit(1);
1494 				}
1495 				(void) sprintf(mav[mac], "-%c", av[i][a]);
1496 				++mac;
1497 			}
1498 		} else {
1499 			/* otherwise, just copy the argument:		*/
1500 			if ((mav[mac] = malloc(sz + 1)) == (char *)NULL) {
1501 				perror("Can't malloc argument space");
1502 				exit(1);
1503 			}
1504 			(void) strcpy(mav[mac], av[i]);
1505 			++mac;
1506 		}
1507 	}
1508 
1509 	mav[mac] = (char *)NULL;
1510 }
1511