1 /*
2 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
3 */
4
5 /* Copyright (c) 1988 AT&T */
6 /* All Rights Reserved */
7
8 /*
9 * Copyright (c) 1980 Regents of the University of California.
10 * All rights reserved. The Berkeley software License Agreement
11 * specifies the terms and conditions for redistribution.
12 */
13
14 /*
15 * Modify ctags to handle C++ in C_entries(), etc:
16 * - Handles C++ comment token "//"
17 * - Handles C++ scope operator "::".
18 * This helps to distinguish between xyz()
19 * definition and X::xyz() definition.
20 * - Recognizes C++ reserved word "class" in typedef processing
21 * (for "-t" option)
22 * - Handles Sun C++ special file name extensions: .c, .C, .cc, and .cxx.
23 * - Handles overloaded unary/binary operator names
24 * Doesn't handle yet:
25 * - inline functions in class definition (currently they get
26 * swallowed within a class definition)
27 * - Tags with scope operator :: with spaces in between,
28 * e.g. classz ::afunc
29 *
30 * Enhance operator functions support:
31 * - Control flow involving operator tokens scanning are
32 * consistent with that of other function tokens - original
33 * hacking method for 2.0 is removed. This will accurately
34 * identify tags for declarations of the form 'operator+()'
35 * (bugid 1027806) as well as allowing spaces in between
36 * 'operator' and 'oprtk', e.g. 'operator + ()'.
37 *
38 */
39
40 #ifndef lint
41 char copyright[] = "@(#) Copyright (c) 1980 Regents of the University of "
42 "California.\nAll rights reserved.\n";
43 #endif
44
45 #include <stdio.h>
46 #include <ctype.h>
47 #include <locale.h>
48 #include <unistd.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <limits.h>
52 #include <sys/types.h>
53 #include <sys/stat.h>
54
55 /*
56 * ctags: create a tags file
57 */
58
59 #define bool char
60
61 #define TRUE (1)
62 #define FALSE (0)
63
64 #define CPFLAG 3 /* # of bytes in a flag */
65
66 #define iswhite(arg) (_wht[arg]) /* T if char is white */
67 #define begtoken(arg) (_btk[arg]) /* T if char can start token */
68 #define intoken(arg) (_itk[arg]) /* T if char can be in token */
69 #define endtoken(arg) (_etk[arg]) /* T if char ends tokens */
70 #define isgood(arg) (_gd[arg]) /* T if char can be after ')' */
71
72 #define optoken(arg) (_opr[arg]) /* T if char can be */
73 /* an overloaded operator token */
74
75 #define max(I1, I2) (I1 > I2 ? I1 : I2)
76
77 struct nd_st { /* sorting structure */
78 char *entry; /* function or type name */
79 char *file; /* file name */
80 bool f; /* use pattern or line no */
81 int lno; /* for -x option */
82 char *pat; /* search pattern */
83 bool been_warned; /* set if noticed dup */
84 struct nd_st *left, *right; /* left and right sons */
85 };
86
87 long ftell();
88 typedef struct nd_st NODE;
89
90 static bool
91 number, /* T if on line starting with # */
92 gotone, /* found a func already on line */
93 /* boolean "func" (see init) */
94 _wht[0177], _etk[0177], _itk[0177], _btk[0177], _gd[0177];
95
96 /* boolean array for overloadable operator symbols */
97 static bool _opr[0177];
98
99 /*
100 * typedefs are recognized using a simple finite automata,
101 * tydef is its state variable.
102 */
103 typedef enum {none, begin, begin_rec, begin_tag, middle, end } TYST;
104
105 static TYST tydef = none;
106
107 static char searchar = '/'; /* use /.../ searches */
108
109 static int lineno; /* line number of current line */
110 static char
111 line[4*BUFSIZ], /* current input line */
112 *curfile, /* current input file name */
113 *outfile = "tags", /* output file */
114 *white = " \f\t\n", /* white chars */
115 *endtk = " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?",
116 /* token ending chars */
117 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz",
118 /* token starting chars */
119 *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
120 "0123456789",
121 /* valid in-token chars */
122 *notgd = ",;"; /* non-valid after-function chars */
123
124 static char *oprtk = " =-+%*/&|^~!<>[]()"; /* overloadable operators */
125
126 static int file_num; /* current file number */
127 static int aflag; /* -a: append to tags */
128
129 #ifndef XPG4 /* XPG4: handle typedefs by default */
130 static int tflag; /* -t: create tags for typedefs */
131 #endif /* !XPG4 */
132
133 static int uflag; /* -u: update tags */
134 static int wflag; /* -w: suppress warnings */
135 static int vflag; /* -v: create vgrind style index output */
136 static int xflag; /* -x: create cxref style output */
137
138 static char lbuf[LINE_MAX];
139
140 static FILE
141 *inf, /* ioptr for current input file */
142 *outf; /* ioptr for tags file */
143
144 static long lineftell; /* ftell after getc( inf ) == '\n' */
145
146 static NODE *head; /* the head of the sorted binary tree */
147
148 #ifdef __STDC__
149 char *strrchr(), *strchr();
150 #else
151 char *rindex(), *index();
152 #endif
153
154 static int infile_fail; /* Count of bad opens. Fix bug ID #1082298 */
155
156 static char *dbp = lbuf;
157 static int pfcnt;
158
159 static int mac; /* our modified argc, after parseargs() */
160 static char **mav; /* our modified argv, after parseargs() */
161
162
163 /* our local functions: */
164 static void init();
165 static void find_entries(char *file);
166 static void pfnote();
167 static void C_entries();
168 static int start_entry(char **lp, char *token, int *f);
169 static void Y_entries();
170 static char *toss_comment(char *start);
171 static void getaline(long int where);
172 static void free_tree(NODE *node);
173 static void add_node(NODE *node, NODE *cur_node);
174 static void put_entries(NODE *node);
175 static int PF_funcs(FILE *fi);
176 static int tail(char *cp);
177 static void takeprec();
178 static void getit();
179 static char *savestr(char *cp);
180 static void L_funcs(FILE *fi);
181 static void L_getit(int special);
182 static int striccmp(char *str, char *pat);
183 static int first_char();
184 static void toss_yysec();
185 static void Usage();
186 static void parseargs(int ac, char **av);
187
188 int
main(int ac,char * av[])189 main(int ac, char *av[])
190 {
191 int i;
192 char cmd[100];
193
194 (void) setlocale(LC_ALL, "");
195 #if !defined(TEXT_DOMAIN)
196 #define TEXT_DOMAIN "SYS_TEST"
197 #endif
198 (void) textdomain(TEXT_DOMAIN);
199
200 parseargs(ac, av);
201
202 while ((i = getopt(mac, mav, "aBFtuvwxf:")) != EOF) {
203 switch (i) {
204 case 'a': /* -a: Append output to existing tags file */
205 aflag++;
206 break;
207
208 case 'B': /* -B: Use backward search patterns (?...?) */
209 searchar = '?';
210 break;
211
212 case 'F': /* -F: Use forward search patterns (/.../) */
213 searchar = '/';
214 break;
215
216 case 't': /* -t: Create tags for typedefs. */
217 /* for XPG4 , we silently ignore "-t". */
218 #ifndef XPG4
219 tflag++;
220 #endif /* !XPG4 */
221 break;
222
223 case 'u': /* -u: Update the specified tags file */
224 uflag++;
225 break;
226
227 case 'v': /* -v: Index listing on stdout */
228 vflag++;
229 xflag++;
230 break;
231
232 case 'w': /* -w: Suppress warnings */
233 wflag++;
234 break;
235
236 case 'x': /* -x: Produce a simple index */
237 xflag++;
238 break;
239
240 case 'f': /* -f tagsfile: output to tagsfile */
241 outfile = strdup(optarg);
242 break;
243
244 default:
245 Usage(); /* never returns */
246 break;
247 }
248 }
249
250 /* if we didn't specify any source code to parse, complain and die. */
251 if (optind == mac) {
252 Usage(); /* never returns */
253 }
254
255
256 init(); /* set up boolean "functions" */
257 /*
258 * loop through files finding functions
259 */
260 for (file_num = optind; file_num < mac; file_num++)
261 find_entries(mav[file_num]);
262
263 if (xflag) {
264 put_entries(head);
265 exit(infile_fail > 0 ? 2 : 0); /* Fix for 1082298 */
266 }
267 if (uflag) {
268 for (i = 1; i < mac; i++) {
269 (void) sprintf(cmd,
270 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
271 outfile, mav[i], outfile);
272 (void) system(cmd);
273 }
274 aflag++;
275 }
276 outf = fopen(outfile, aflag ? "a" : "w");
277 if (outf == NULL) {
278 perror(outfile);
279 exit(1);
280 }
281 put_entries(head);
282 (void) fclose(outf);
283 if (uflag) {
284 (void) sprintf(cmd, "sort %s -o %s", outfile, outfile);
285 (void) system(cmd);
286 }
287 return (infile_fail > 0 ? 2 : 0); /* Fix for #1082298 */
288 }
289
290 /*
291 * This routine sets up the boolean psuedo-functions which work
292 * by seting boolean flags dependent upon the corresponding character
293 * Every char which is NOT in that string is not a white char. Therefore,
294 * all of the array "_wht" is set to FALSE, and then the elements
295 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
296 * of a char is TRUE if it is the string "white", else FALSE.
297 */
298 static void
init()299 init()
300 {
301 char *sp;
302 int i;
303
304 for (i = 0; i < 0177; i++) {
305 _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE;
306 _opr[i] = FALSE; /* initialize boolean */
307 /* array of operator symbols */
308 _gd[i] = TRUE;
309 }
310 for (sp = white; *sp; sp++)
311 _wht[*sp] = TRUE;
312 for (sp = endtk; *sp; sp++)
313 _etk[*sp] = TRUE;
314 for (sp = intk; *sp; sp++)
315 _itk[*sp] = TRUE;
316 for (sp = begtk; *sp; sp++)
317 _btk[*sp] = TRUE;
318
319 /* mark overloadable operator symbols */
320 for (sp = oprtk; *sp; sp++)
321 _opr[*sp] = TRUE;
322
323 for (sp = notgd; *sp; sp++)
324 _gd[*sp] = FALSE;
325 }
326
327 /*
328 * This routine opens the specified file and calls the function
329 * which finds the function and type definitions.
330 */
331 static void
find_entries(file)332 find_entries(file)
333 char *file;
334 {
335 char *cp;
336 struct stat st;
337
338 /* skip anything that isn't a regular file */
339 if (stat(file, &st) == 0 && !S_ISREG(st.st_mode))
340 return;
341
342 if ((inf = fopen(file, "r")) == NULL) {
343 perror(file);
344 infile_fail++; /* Count bad opens. ID #1082298 */
345 return;
346 }
347 curfile = savestr(file);
348 lineno = 0;
349 #ifdef __STDC__
350 cp = strrchr(file, '.');
351 #else
352 cp = rindex(file, '.');
353 #endif
354 /* .l implies lisp or lex source code */
355 if (cp && cp[1] == 'l' && cp[2] == '\0') {
356 #ifdef __STDC__
357 if (strchr(";([", first_char()) != NULL) /* lisp */
358 #else
359 if (index(";([", first_char()) != NULL) /* lisp */
360 #endif
361 {
362 L_funcs(inf);
363 (void) fclose(inf);
364 return;
365 } else { /* lex */
366 /*
367 * throw away all the code before the second "%%"
368 */
369 toss_yysec();
370 getaline(lineftell);
371 pfnote("yylex", lineno, TRUE);
372 toss_yysec();
373 C_entries();
374 (void) fclose(inf);
375 return;
376 }
377 }
378 /* .y implies a yacc file */
379 if (cp && cp[1] == 'y' && cp[2] == '\0') {
380 toss_yysec();
381 Y_entries();
382 C_entries();
383 (void) fclose(inf);
384 return;
385 }
386
387 /*
388 * Add in file name extension support for Sun C++ which
389 * permits .C/.c (AT&T), .cc (G++) and .cxx (Gloksp.)
390 */
391
392 /* if not a .c, .C, .cc, .cxx or .h file, try fortran */
393 if (cp && (cp[1] != 'C' && cp[1] != 'c' && cp[1] != 'h') &&
394 cp[2] == '\0' && (strcmp(cp, ".cc") == 0) &&
395 (strcmp(cp, ".cxx") == 0)) {
396 if (PF_funcs(inf) != 0) {
397 (void) fclose(inf);
398 return;
399 }
400 rewind(inf); /* no fortran tags found, try C */
401 }
402 C_entries();
403 (void) fclose(inf);
404 }
405
406 static void
pfnote(name,ln,f)407 pfnote(name, ln, f)
408 char *name;
409 int ln;
410 bool f; /* f == TRUE when function */
411 {
412 char *fp;
413 NODE *np;
414 char *nametk; /* hold temporary tokens from name */
415 char nbuf[BUFSIZ];
416
417 if ((np = malloc(sizeof (NODE))) == NULL) {
418 (void) fprintf(stderr,
419 gettext("ctags: too many entries to sort\n"));
420 put_entries(head);
421 free_tree(head);
422 head = np = (NODE *) malloc(sizeof (NODE));
423 }
424 if (xflag == 0 && (strcmp(name, "main") == 0)) {
425 #ifdef __STDC__
426 fp = strrchr(curfile, '/');
427 #else
428 fp = rindex(curfile, '/');
429 #endif
430 if (fp == 0)
431 fp = curfile;
432 else
433 fp++;
434 (void) sprintf(nbuf, "M%s", fp);
435 #ifdef __STDC__
436 fp = strrchr(nbuf, '.');
437 #else
438 fp = rindex(nbuf, '.');
439 #endif
440 /* Chop off .cc and .cxx as well as .c, .h, etc */
441 if (fp && ((fp[2] == 0) || (fp[2] == 'c' && fp[3] == 0) ||
442 (fp[3] == 'x' && fp[4] == 0)))
443 *fp = 0;
444 name = nbuf;
445 }
446
447 /* remove in-between blanks operator function tags */
448 #ifdef __STDC__
449 if (strchr(name, ' ') != NULL)
450 #else
451 if (index(name, ' ') != NULL)
452 #endif
453 {
454 (void) strcpy(name, strtok(name, " "));
455 while (nametk = strtok(0, " "))
456 (void) strcat(name, nametk);
457 }
458 np->entry = savestr(name);
459 np->file = curfile;
460 np->f = f;
461 np->lno = ln;
462 np->left = np->right = 0;
463 if (xflag == 0) {
464 lbuf[50] = 0;
465 (void) strcat(lbuf, "$");
466 lbuf[50] = 0;
467 }
468 np->pat = savestr(lbuf);
469 if (head == NULL)
470 head = np;
471 else
472 add_node(np, head);
473 }
474
475 /*
476 * This routine finds functions and typedefs in C syntax and adds them
477 * to the list.
478 */
479 static void
C_entries()480 C_entries()
481 {
482 int c;
483 char *token, *tp;
484 bool incomm, inquote, inchar, midtoken, isoperator, optfound;
485 int level;
486 char *sp;
487 char tok[BUFSIZ];
488 long int tokftell;
489
490 number = gotone = midtoken = inquote = inchar =
491 incomm = isoperator = optfound = FALSE;
492
493 level = 0;
494 sp = tp = token = line;
495 lineno++;
496 lineftell = tokftell = ftell(inf);
497 for (;;) {
498 *sp = c = getc(inf);
499 if (feof(inf))
500 break;
501 if (c == '\n') {
502 lineftell = ftell(inf);
503 lineno++;
504 } else if (c == '\\') {
505 c = *++sp = getc(inf);
506 if ((c == '\n') || (c == EOF)) { /* c == EOF, 1091005 */
507 lineftell = ftell(inf);
508 lineno++;
509 c = ' ';
510 }
511 } else if (incomm) {
512 if (c == '*') {
513 while ((*++sp = c = getc(inf)) == '*')
514 continue;
515
516 /* c == EOF 1091005 */
517 if ((c == '\n') || (c == EOF)) {
518 lineftell = ftell(inf);
519 lineno++;
520 }
521
522 if (c == '/')
523 incomm = FALSE;
524 }
525 } else if (inquote) {
526 /*
527 * Too dumb to know about \" not being magic, but
528 * they usually occur in pairs anyway.
529 */
530 if (c == '"')
531 inquote = FALSE;
532 continue;
533 } else if (inchar) {
534 if (c == '\'')
535 inchar = FALSE;
536 continue;
537 } else if (midtoken == TRUE) { /* if white space omitted */
538 goto dotoken;
539 } else switch (c) {
540 case '"':
541 inquote = TRUE;
542 continue;
543 case '\'':
544 inchar = TRUE;
545 continue;
546 case '/':
547 *++sp = c = getc(inf);
548 /* Handles the C++ comment token "//" */
549 if (c == '*')
550 incomm = TRUE;
551 else if (c == '/') {
552 /*
553 * Skip over all the characters after
554 * "//" until a newline character. Now also
555 * includes fix for 1091005, check for EOF.
556 */
557 do {
558 c = getc(inf);
559 /* 1091005: */
560 } while ((c != '\n') && (c != EOF));
561
562
563 /*
564 * Fixed bugid 1030014
565 * Return the current position of the
566 * file after the newline.
567 */
568 lineftell = ftell(inf);
569 lineno++;
570 *--sp = c;
571 }
572 else
573 (void) ungetc(*sp, inf);
574 continue;
575 case '#':
576 if (sp == line)
577 number = TRUE;
578 continue;
579 case '{':
580 if ((tydef == begin_rec) || (tydef == begin_tag)) {
581 tydef = middle;
582 }
583 level++;
584 continue;
585 case '}':
586 /*
587 * Heuristic for function or structure end;
588 * common for #ifdef/#else blocks to add extra "{"
589 */
590 if (sp == line)
591 level = 0; /* reset */
592 else
593 level--;
594 if (!level && tydef == middle) {
595 tydef = end;
596 }
597 if (!level && tydef == none) /* Fix for #1034126 */
598 goto dotoken;
599 continue;
600 }
601
602 dotoken:
603
604
605 if (!level && !inquote && !incomm && gotone == FALSE) {
606 if (midtoken) {
607 if (endtoken(c)) {
608
609 /*
610 *
611 * ':' +---> ':' -> midtok
612 *
613 * +---> operator{+,-, etc} -> midtok
614 * (continue)
615 * +---> endtok
616 */
617 /*
618 * Enhance operator function support and
619 * fix bugid 1027806
620 *
621 * For operator token, scanning will continue until
622 * '(' is found. Spaces between 'operater' and
623 * 'oprtk' are allowed (e.g. 'operator + ()'), but
624 * will be removed when the actual entry for the tag
625 * is made.
626 * Note that functions of the form 'operator ()(int)'
627 * will be recognized, but 'operator ()' will not,
628 * even though this is legitimate in C.
629 */
630
631 if (optoken(c)) {
632 if (isoperator) {
633 if (optfound) {
634 if (c != '(') {
635 tp++;
636 goto next_char;
637 }
638 } else {
639 if (c != ' ') {
640 optfound = TRUE;
641 }
642 tp++;
643 goto next_char;
644 }
645 } else {
646 /* start: this code shifted left for cstyle */
647 char *backptr = tp - 7;
648 if (strncmp(backptr, "operator", 8) == 0) {
649 /* This is an overloaded operator */
650 isoperator = TRUE;
651 if (c != ' ') {
652 optfound = TRUE;
653 }
654
655 tp++;
656 goto next_char;
657 } else if (c == '~') {
658 /* This is a destructor */
659 tp++;
660 goto next_char;
661 }
662 /* end: above code shifted left for cstyle */
663 }
664 } else if (c == ':') {
665 if ((*++sp = getc(inf)) == ':') {
666 tp += 2;
667 c = *sp;
668 goto next_char;
669 } else {
670 (void) ungetc (*sp, inf);
671 --sp;
672 }
673 }
674
675 /* start: this code shifted left for cstyle */
676 {
677 int f;
678 int pfline = lineno;
679
680 if (start_entry(&sp, token, &f)) {
681 (void) strncpy(tok, token, tp-token+1);
682 tok[tp-token+1] = 0;
683 getaline(tokftell);
684 pfnote(tok, pfline, f);
685 gotone = f; /* function */
686 }
687
688 isoperator = optfound = midtoken = FALSE;
689 token = sp;
690 }
691 /* end: above code shifted left for cstyle */
692 } else if (intoken(c))
693 tp++;
694 } else if (begtoken(c)) {
695 token = tp = sp;
696 midtoken = TRUE;
697 tokftell = lineftell;
698 }
699 }
700 next_char:
701 if (c == ';' && tydef == end) /* clean with typedefs */
702 tydef = none;
703 sp++;
704 /* The "c == }" was added to fix #1034126 */
705 if (c == '\n' ||c == '}'|| sp > &line[sizeof (line) - BUFSIZ]) {
706 tp = token = sp = line;
707 number = gotone = midtoken = inquote =
708 inchar = isoperator = optfound = FALSE;
709 }
710 }
711 }
712
713 /*
714 * This routine checks to see if the current token is
715 * at the start of a function, or corresponds to a typedef
716 * It updates the input line * so that the '(' will be
717 * in it when it returns.
718 */
719 static int
start_entry(lp,token,f)720 start_entry(lp, token, f)
721 char **lp, *token;
722 int *f;
723 {
724 char *sp;
725 int c;
726 static bool found;
727 bool firsttok; /* T if have seen first token in ()'s */
728 int bad;
729
730 *f = 1; /* a function */
731 sp = *lp;
732 c = *sp;
733 bad = FALSE;
734 if (!number) { /* space is not allowed in macro defs */
735 while (iswhite(c)) {
736 *++sp = c = getc(inf);
737 if ((c == '\n') || (c == EOF)) { /* c==EOF, #1091005 */
738 lineno++;
739 lineftell = ftell(inf);
740 if (sp > &line[sizeof (line) - BUFSIZ])
741 goto ret;
742 }
743 }
744 /* the following tries to make it so that a #define a b(c) */
745 /* doesn't count as a define of b. */
746 } else {
747 if (strncmp(token, "define", 6) == 0)
748 found = 0;
749 else
750 found++;
751 if (found >= 2) {
752 gotone = TRUE;
753 badone: bad = TRUE;
754 goto ret;
755 }
756 }
757 /* check for the typedef cases */
758 #ifdef XPG4
759 if (strncmp(token, "typedef", 7) == 0) {
760 #else /* !XPG4 */
761 if (tflag && (strncmp(token, "typedef", 7) == 0)) {
762 #endif /* XPG4 */
763 tydef = begin;
764 goto badone;
765 }
766 /* Handles 'class' besides 'struct' etc. */
767 if (tydef == begin && ((strncmp(token, "struct", 6) == 0) ||
768 (strncmp(token, "class", 5) == 0) ||
769 (strncmp(token, "union", 5) == 0)||
770 (strncmp(token, "enum", 4) == 0))) {
771 tydef = begin_rec;
772 goto badone;
773 }
774 if (tydef == begin) {
775 tydef = end;
776 goto badone;
777 }
778 if (tydef == begin_rec) {
779 tydef = begin_tag;
780 goto badone;
781 }
782 if (tydef == begin_tag) {
783 tydef = end;
784 goto gottydef; /* Fall through to "tydef==end" */
785 }
786
787 gottydef:
788 if (tydef == end) {
789 *f = 0;
790 goto ret;
791 }
792 if (c != '(')
793 goto badone;
794 firsttok = FALSE;
795 while ((*++sp = c = getc(inf)) != ')') {
796 if ((c == '\n') || (c == EOF)) { /* c == EOF Fix for #1091005 */
797 lineftell = ftell(inf);
798 lineno++;
799 if (sp > &line[sizeof (line) - BUFSIZ])
800 goto ret;
801 }
802 /*
803 * This line used to confuse ctags:
804 * int (*oldhup)();
805 * This fixes it. A nonwhite char before the first
806 * token, other than a / (in case of a comment in there)
807 * makes this not a declaration.
808 */
809 if (begtoken(c) || c == '/')
810 firsttok = TRUE;
811 else if (!iswhite(c) && !firsttok)
812 goto badone;
813 }
814 while (iswhite(*++sp = c = getc(inf)))
815 if ((c == '\n') || (c == EOF)) { /* c == EOF fix for #1091005 */
816 lineno++;
817 lineftell = ftell(inf);
818 if (sp > &line[sizeof (line) - BUFSIZ])
819 break;
820 }
821 ret:
822 *lp = --sp;
823 if (c == '\n')
824 lineno--;
825 (void) ungetc(c, inf);
826 return (!bad && (!*f || isgood(c)));
827 /* hack for typedefs */
828 }
829
830 /*
831 * Y_entries:
832 * Find the yacc tags and put them in.
833 */
834 static void
Y_entries()835 Y_entries()
836 {
837 char *sp, *orig_sp;
838 int brace;
839 bool in_rule, toklen;
840 char tok[BUFSIZ];
841
842 brace = 0;
843 getaline(lineftell);
844 pfnote("yyparse", lineno, TRUE);
845 while (fgets(line, sizeof (line), inf) != NULL)
846 for (sp = line; *sp; sp++)
847 switch (*sp) {
848 case '\n':
849 lineno++;
850 /* FALLTHROUGH */
851 case ' ':
852 case '\t':
853 case '\f':
854 case '\r':
855 break;
856 case '"':
857 do {
858 while (*++sp != '"')
859 continue;
860 } while (sp[-1] == '\\');
861 break;
862 case '\'':
863 do {
864 while (*++sp != '\'')
865 continue;
866 } while (sp[-1] == '\\');
867 break;
868 case '/':
869 if (*++sp == '*')
870 sp = toss_comment(sp);
871 else
872 --sp;
873 break;
874 case '{':
875 brace++;
876 break;
877 case '}':
878 brace--;
879 break;
880 case '%':
881 if (sp[1] == '%' && sp == line)
882 return;
883 break;
884 case '|':
885 case ';':
886 in_rule = FALSE;
887 break;
888 default:
889 if (brace == 0 && !in_rule && (isalpha(*sp) ||
890 *sp == '.' ||
891 *sp == '_')) {
892 orig_sp = sp;
893 ++sp;
894 while (isalnum(*sp) || *sp == '_' ||
895 *sp == '.')
896 sp++;
897 toklen = sp - orig_sp;
898 while (isspace(*sp))
899 sp++;
900 if (*sp == ':' || (*sp == '\0' &&
901 first_char() == ':')) {
902 (void) strncpy(tok,
903 orig_sp, toklen);
904 tok[toklen] = '\0';
905 (void) strcpy(lbuf, line);
906 lbuf[strlen(lbuf) - 1] = '\0';
907 pfnote(tok, lineno, TRUE);
908 in_rule = TRUE;
909 }
910 else
911 sp--;
912 }
913 break;
914 }
915 }
916
917 static char *
toss_comment(start)918 toss_comment(start)
919 char *start;
920 {
921 char *sp;
922
923 /*
924 * first, see if the end-of-comment is on the same line
925 */
926 do {
927 #ifdef __STDC__
928 while ((sp = strchr(start, '*')) != NULL)
929 #else
930 while ((sp = index(start, '*')) != NULL)
931 #endif
932 if (sp[1] == '/')
933 return (++sp);
934 else
935 start = (++sp);
936 start = line;
937 lineno++;
938 } while (fgets(line, sizeof (line), inf) != NULL);
939
940 /*
941 * running this through lint revealed that the original version
942 * of this routine didn't explicitly return something; while
943 * the return value was always used!. so i've added this
944 * next line.
945 */
946 return (sp);
947 }
948
949 static void
getaline(where)950 getaline(where)
951 long int where;
952 {
953 long saveftell = ftell(inf);
954 char *cp;
955
956 (void) fseek(inf, where, 0);
957 (void) fgets(lbuf, sizeof (lbuf), inf);
958 #ifdef __STDC__
959 cp = strrchr(lbuf, '\n');
960 #else
961 cp = rindex(lbuf, '\n');
962 #endif
963 if (cp)
964 *cp = 0;
965 (void) fseek(inf, saveftell, 0);
966 }
967
968 static void
free_tree(node)969 free_tree(node)
970 NODE *node;
971 {
972 while (node) {
973 free_tree(node->right);
974 free(node);
975 node = node->left;
976 }
977 }
978
979 static void
add_node(node,cur_node)980 add_node(node, cur_node)
981 NODE *node, *cur_node;
982 {
983 int dif;
984
985 dif = strcmp(node->entry, cur_node->entry);
986 if (dif == 0) {
987 if (node->file == cur_node->file) {
988 if (!wflag) {
989 (void) fprintf(stderr,
990 gettext("Duplicate entry in file %s, line %d: %s\n"),
991 node->file, lineno, node->entry);
992 (void) fprintf(stderr,
993 gettext("Second entry ignored\n"));
994 }
995 return;
996 }
997 if (!cur_node->been_warned)
998 if (!wflag) {
999 (void) fprintf(stderr, gettext("Duplicate "
1000 "entry in files %s and %s: %s "
1001 "(Warning only)\n"),
1002 node->file, cur_node->file,
1003 node->entry);
1004 }
1005 cur_node->been_warned = TRUE;
1006 return;
1007 }
1008
1009 if (dif < 0) {
1010 if (cur_node->left != NULL)
1011 add_node(node, cur_node->left);
1012 else
1013 cur_node->left = node;
1014 return;
1015 }
1016 if (cur_node->right != NULL)
1017 add_node(node, cur_node->right);
1018 else
1019 cur_node->right = node;
1020 }
1021
1022 static void
put_entries(node)1023 put_entries(node)
1024 NODE *node;
1025 {
1026 char *sp;
1027
1028 if (node == NULL)
1029 return;
1030 put_entries(node->left);
1031
1032 /*
1033 * while the code in the following #ifdef section could be combined,
1034 * it's explicitly separated here to make maintainance easier.
1035 */
1036 #ifdef XPG4
1037 /*
1038 * POSIX 2003: we no longer have a "-t" flag; the logic is
1039 * automatically assumed to be "turned on" here.
1040 */
1041 if (xflag == 0) {
1042 (void) fprintf(outf, "%s\t%s\t%c^",
1043 node->entry, node->file, searchar);
1044 for (sp = node->pat; *sp; sp++)
1045 if (*sp == '\\')
1046 (void) fprintf(outf, "\\\\");
1047 else if (*sp == searchar)
1048 (void) fprintf(outf, "\\%c", searchar);
1049 else
1050 (void) putc(*sp, outf);
1051 (void) fprintf(outf, "%c\n", searchar);
1052 } else if (vflag)
1053 (void) fprintf(stdout, "%s %s %d\n",
1054 node->entry, node->file, (node->lno+63)/64);
1055 else
1056 (void) fprintf(stdout, "%-16s %4d %-16s %s\n",
1057 node->entry, node->lno, node->file, node->pat);
1058 #else /* XPG4 */
1059 /*
1060 * original way of doing things. "-t" logic is only turned on
1061 * when the user has specified it via a command-line argument.
1062 */
1063 if (xflag == 0)
1064 if (node->f) { /* a function */
1065 (void) fprintf(outf, "%s\t%s\t%c^",
1066 node->entry, node->file, searchar);
1067 for (sp = node->pat; *sp; sp++)
1068 if (*sp == '\\')
1069 (void) fprintf(outf, "\\\\");
1070 else if (*sp == searchar)
1071 (void) fprintf(outf, "\\%c", searchar);
1072 else
1073 (void) putc(*sp, outf);
1074 (void) fprintf(outf, "%c\n", searchar);
1075 } else { /* a typedef; text pattern inadequate */
1076 (void) fprintf(outf, "%s\t%s\t%d\n",
1077 node->entry, node->file, node->lno);
1078 } else if (vflag)
1079 (void) fprintf(stdout, "%s %s %d\n",
1080 node->entry, node->file, (node->lno+63)/64);
1081 else
1082 (void) fprintf(stdout, "%-16s %4d %-16s %s\n",
1083 node->entry, node->lno, node->file, node->pat);
1084 #endif /* XPG4 */
1085 put_entries(node->right);
1086 }
1087
1088
1089 static int
PF_funcs(fi)1090 PF_funcs(fi)
1091 FILE *fi;
1092 {
1093
1094 pfcnt = 0;
1095 while (fgets(lbuf, sizeof (lbuf), fi)) {
1096 lineno++;
1097 dbp = lbuf;
1098 if (*dbp == '%') dbp++; /* Ratfor escape to fortran */
1099 while (isspace(*dbp))
1100 dbp++;
1101 if (*dbp == 0)
1102 continue;
1103 switch (*dbp |' ') {
1104
1105 case 'i':
1106 if (tail("integer"))
1107 takeprec();
1108 break;
1109 case 'r':
1110 if (tail("real"))
1111 takeprec();
1112 break;
1113 case 'l':
1114 if (tail("logical"))
1115 takeprec();
1116 break;
1117 case 'c':
1118 if (tail("complex") || tail("character"))
1119 takeprec();
1120 break;
1121 case 'd':
1122 if (tail("double")) {
1123 while (isspace(*dbp))
1124 dbp++;
1125 if (*dbp == 0)
1126 continue;
1127 if (tail("precision"))
1128 break;
1129 continue;
1130 }
1131 break;
1132 }
1133 while (isspace(*dbp))
1134 dbp++;
1135 if (*dbp == 0)
1136 continue;
1137 switch (*dbp|' ') {
1138
1139 case 'f':
1140 if (tail("function"))
1141 getit();
1142 continue;
1143 case 's':
1144 if (tail("subroutine"))
1145 getit();
1146 continue;
1147 case 'p':
1148 if (tail("program")) {
1149 getit();
1150 continue;
1151 }
1152 if (tail("procedure"))
1153 getit();
1154 continue;
1155 }
1156 }
1157 return (pfcnt);
1158 }
1159
1160 static int
tail(cp)1161 tail(cp)
1162 char *cp;
1163 {
1164 int len = 0;
1165
1166 while (*cp && (*cp&~' ') == ((*(dbp+len))&~' '))
1167 cp++, len++;
1168 if (*cp == 0) {
1169 dbp += len;
1170 return (1);
1171 }
1172 return (0);
1173 }
1174
1175 static void
takeprec()1176 takeprec()
1177 {
1178
1179 while (isspace(*dbp))
1180 dbp++;
1181 if (*dbp != '*')
1182 return;
1183 dbp++;
1184 while (isspace(*dbp))
1185 dbp++;
1186 if (!isdigit(*dbp)) {
1187 --dbp; /* force failure */
1188 return;
1189 }
1190 do
1191 dbp++;
1192 while (isdigit(*dbp));
1193 }
1194
1195 static void
getit()1196 getit()
1197 {
1198 char *cp;
1199 char c;
1200 char nambuf[BUFSIZ];
1201
1202 for (cp = lbuf; *cp; cp++)
1203 ;
1204 *--cp = 0; /* zap newline */
1205 while (isspace(*dbp))
1206 dbp++;
1207 if (*dbp == 0 || !isalpha(*dbp) || !isascii(*dbp))
1208 return;
1209 for (cp = dbp+1; *cp && (isalpha(*cp) || isdigit(*cp)); cp++)
1210 continue;
1211 c = cp[0];
1212 cp[0] = 0;
1213 (void) strcpy(nambuf, dbp);
1214 cp[0] = c;
1215 pfnote(nambuf, lineno, TRUE);
1216 pfcnt++;
1217 }
1218
1219 static char *
savestr(cp)1220 savestr(cp)
1221 char *cp;
1222 {
1223 int len;
1224 char *dp;
1225
1226 len = strlen(cp);
1227 dp = (char *)malloc(len+1);
1228 (void) strcpy(dp, cp);
1229
1230 return (dp);
1231 }
1232
1233 #ifndef __STDC__
1234 /*
1235 * Return the ptr in sp at which the character c last
1236 * appears; NULL if not found
1237 *
1238 * Identical to v7 rindex, included for portability.
1239 */
1240
1241 static char *
rindex(sp,c)1242 rindex(sp, c)
1243 char *sp, c;
1244 {
1245 char *r;
1246
1247 r = NULL;
1248 do {
1249 if (*sp == c)
1250 r = sp;
1251 } while (*sp++);
1252 return (r);
1253 }
1254 #endif
1255
1256 /*
1257 * lisp tag functions
1258 * just look for (def or (DEF
1259 */
1260
1261 static void
L_funcs(fi)1262 L_funcs(fi)
1263 FILE *fi;
1264 {
1265 int special;
1266
1267 pfcnt = 0;
1268 while (fgets(lbuf, sizeof (lbuf), fi)) {
1269 lineno++;
1270 dbp = lbuf;
1271 if (dbp[0] == '(' &&
1272 (dbp[1] == 'D' || dbp[1] == 'd') &&
1273 (dbp[2] == 'E' || dbp[2] == 'e') &&
1274 (dbp[3] == 'F' || dbp[3] == 'f')) {
1275 dbp += 4;
1276 if (striccmp(dbp, "method") == 0 ||
1277 striccmp(dbp, "wrapper") == 0 ||
1278 striccmp(dbp, "whopper") == 0)
1279 special = TRUE;
1280 else
1281 special = FALSE;
1282 while (!isspace(*dbp))
1283 dbp++;
1284 while (isspace(*dbp))
1285 dbp++;
1286 L_getit(special);
1287 }
1288 }
1289 }
1290
1291 static void
L_getit(special)1292 L_getit(special)
1293 int special;
1294 {
1295 char *cp;
1296 char c;
1297 char nambuf[BUFSIZ];
1298
1299 for (cp = lbuf; *cp; cp++)
1300 continue;
1301 *--cp = 0; /* zap newline */
1302 if (*dbp == 0)
1303 return;
1304 if (special) {
1305 #ifdef __STDC__
1306 if ((cp = strchr(dbp, ')')) == NULL)
1307 #else
1308 if ((cp = index(dbp, ')')) == NULL)
1309 #endif
1310 return;
1311 while (cp >= dbp && *cp != ':')
1312 cp--;
1313 if (cp < dbp)
1314 return;
1315 dbp = cp;
1316 while (*cp && *cp != ')' && *cp != ' ')
1317 cp++;
1318 }
1319 else
1320 for (cp = dbp + 1; *cp && *cp != '(' && *cp != ' '; cp++)
1321 continue;
1322 c = cp[0];
1323 cp[0] = 0;
1324 (void) strcpy(nambuf, dbp);
1325 cp[0] = c;
1326 pfnote(nambuf, lineno, TRUE);
1327 pfcnt++;
1328 }
1329
1330 /*
1331 * striccmp:
1332 * Compare two strings over the length of the second, ignoring
1333 * case distinctions. If they are the same, return 0. If they
1334 * are different, return the difference of the first two different
1335 * characters. It is assumed that the pattern (second string) is
1336 * completely lower case.
1337 */
1338 static int
striccmp(str,pat)1339 striccmp(str, pat)
1340 char *str, *pat;
1341 {
1342 int c1;
1343
1344 while (*pat) {
1345 if (isupper(*str))
1346 c1 = tolower(*str);
1347 else
1348 c1 = *str;
1349 if (c1 != *pat)
1350 return (c1 - *pat);
1351 pat++;
1352 str++;
1353 }
1354 return (0);
1355 }
1356
1357 /*
1358 * first_char:
1359 * Return the first non-blank character in the file. After
1360 * finding it, rewind the input file so we start at the beginning
1361 * again.
1362 */
1363 static int
first_char()1364 first_char()
1365 {
1366 int c;
1367 long off;
1368
1369 off = ftell(inf);
1370 while ((c = getc(inf)) != EOF)
1371 if (!isspace(c) && c != '\r') {
1372 (void) fseek(inf, off, 0);
1373 return (c);
1374 }
1375 (void) fseek(inf, off, 0);
1376 return (EOF);
1377 }
1378
1379 /*
1380 * toss_yysec:
1381 * Toss away code until the next "%%" line.
1382 */
1383 static void
toss_yysec()1384 toss_yysec()
1385 {
1386 char buf[BUFSIZ];
1387
1388 for (;;) {
1389 lineftell = ftell(inf);
1390 if (fgets(buf, BUFSIZ, inf) == NULL)
1391 return;
1392 lineno++;
1393 if (strncmp(buf, "%%", 2) == 0)
1394 return;
1395 }
1396 }
1397
1398 static void
Usage()1399 Usage()
1400 {
1401 #ifdef XPG4
1402 (void) fprintf(stderr, gettext("Usage:\tctags [-aBFuvw] "
1403 #else /* !XPG4 */
1404 (void) fprintf(stderr, gettext("Usage:\tctags [-aBFtuvw] "
1405 #endif /* XPG4 */
1406 "[-f tagsfile] file ...\n"));
1407 (void) fprintf(stderr, gettext("OR:\tctags [-x] file ...\n"));
1408 exit(1);
1409 }
1410
1411
1412 /*
1413 * parseargs(): modify the args
1414 * the purpose of this routine is to transform any ancient argument
1415 * usage into a format which is acceptable to getopt(3C), so that we
1416 * retain backwards Solaris 2.[0-4] compatibility.
1417 *
1418 * This routine allows us to make full use of getopts, without any
1419 * funny argument processing in main().
1420 *
1421 * The other alternative would be to hand-craft the processed arguments
1422 * during and after getopt(3C) - which usually leads to uglier code
1423 * in main(). I've opted to keep the ugliness isolated down here,
1424 * instead of in main().
1425 *
1426 * In a nutshell, if the user has used the old Solaris syntax of:
1427 * ctags [-aBFtuvwx] [-f tagsfile] filename ...
1428 * We simply change this into:
1429 * ctags [-a] [-B] [-F] [-t] [-u] [-v] [-w] [-x] [-f tags] file...
1430 *
1431 * If the user has specified the new getopt(3C) syntax, we merely
1432 * copy that into our modified argument space.
1433 */
1434 static void
parseargs(ac,av)1435 parseargs(ac, av)
1436 int ac; /* argument count */
1437 char **av; /* ptr to original argument space */
1438 {
1439 int i; /* current argument */
1440 int a; /* used to parse combined arguments */
1441 int fflag; /* 1 = we're only parsing filenames */
1442 size_t sz; /* size of the argument */
1443 size_t mav_sz; /* size of our psuedo argument space */
1444
1445 i = mac = fflag = 0; /* proper initializations */
1446
1447 mav_sz = ((ac + 1) * sizeof (char *));
1448 if ((mav = malloc(mav_sz)) == (char **)NULL) {
1449 perror("Can't malloc argument space");
1450 exit(1);
1451 }
1452
1453 /* for each argument, see if we need to change things: */
1454 for (; (av[i] != (char *)NULL) && (av[i][0] != (char)NULL); i++) {
1455
1456 if (strcmp(av[i], "--") == 0) {
1457 fflag = 1; /* just handle filenames now */
1458 }
1459
1460 sz = strlen(&av[i][0]); /* get this arg's size */
1461
1462 /*
1463 * if the argument starts with a "-", and has more than
1464 * 1 flag, then we have to search through each character,
1465 * and separate any flags which have been combined.
1466 *
1467 * so, if we've found a "-" string which needs separating:
1468 */
1469 if (fflag == 0 && /* not handling filename args */
1470 av[i][0] == '-' && /* and this is a flag */
1471 sz > 2) { /* and there's more than 1 flag */
1472 /* then for each flag after the "-" sign: */
1473 for (a = 1; av[i][a]; a++) {
1474 /* copy the flag into mav space. */
1475 if (a > 1) {
1476 /*
1477 * we need to call realloc() after the
1478 * 1st combined flag, because "ac"
1479 * doesn't include combined args.
1480 */
1481 mav_sz += sizeof (char *);
1482 if ((mav = realloc(mav, mav_sz)) ==
1483 (char **)NULL) {
1484 perror("Can't realloc "
1485 "argument space");
1486 exit(1);
1487 }
1488 }
1489
1490 if ((mav[mac] = malloc((size_t)CPFLAG)) ==
1491 (char *)NULL) {
1492 perror("Can't malloc argument space");
1493 exit(1);
1494 }
1495 (void) sprintf(mav[mac], "-%c", av[i][a]);
1496 ++mac;
1497 }
1498 } else {
1499 /* otherwise, just copy the argument: */
1500 if ((mav[mac] = malloc(sz + 1)) == (char *)NULL) {
1501 perror("Can't malloc argument space");
1502 exit(1);
1503 }
1504 (void) strcpy(mav[mac], av[i]);
1505 ++mac;
1506 }
1507 }
1508
1509 mav[mac] = (char *)NULL;
1510 }
1511