1 /*
2 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
3 */
4
5 /* Copyright (c) 1988 AT&T */
6 /* All Rights Reserved */
7
8 /*
9 * Copyright (c) 1980 Regents of the University of California.
10 * All rights reserved. The Berkeley software License Agreement
11 * specifies the terms and conditions for redistribution.
12 */
13
14 /*
15 * Modify ctags to handle C++ in C_entries(), etc:
16 * - Handles C++ comment token "//"
17 * - Handles C++ scope operator "::".
18 * This helps to distinguish between xyz()
19 * definition and X::xyz() definition.
20 * - Recognizes C++ reserved word "class" in typedef processing
21 * (for "-t" option)
22 * - Handles Sun C++ special file name extensions: .c, .C, .cc, and .cxx.
23 * - Handles overloaded unary/binary operator names
24 * Doesn't handle yet:
25 * - inline functions in class definition (currently they get
26 * swallowed within a class definition)
27 * - Tags with scope operator :: with spaces in between,
28 * e.g. classz ::afunc
29 *
30 * Enhance operator functions support:
31 * - Control flow involving operator tokens scanning are
32 * consistent with that of other function tokens - original
33 * hacking method for 2.0 is removed. This will accurately
34 * identify tags for declarations of the form 'operator+()'
35 * (bugid 1027806) as well as allowing spaces in between
36 * 'operator' and 'oprtk', e.g. 'operator + ()'.
37 *
38 */
39
40 #ifndef lint
41 char copyright[] = "@(#) Copyright (c) 1980 Regents of the University of "
42 "California.\nAll rights reserved.\n";
43 #endif
44
45 #include <stdio.h>
46 #include <ctype.h>
47 #include <locale.h>
48 #include <unistd.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <strings.h>
52 #include <limits.h>
53 #include <sys/types.h>
54 #include <sys/stat.h>
55
56 /*
57 * ctags: create a tags file
58 */
59
60 #define bool char
61
62 #define TRUE (1)
63 #define FALSE (0)
64
65 #define CPFLAG 3 /* # of bytes in a flag */
66
67 #define iswhite(arg) (_wht[arg]) /* T if char is white */
68 #define begtoken(arg) (_btk[arg]) /* T if char can start token */
69 #define intoken(arg) (_itk[arg]) /* T if char can be in token */
70 #define endtoken(arg) (_etk[arg]) /* T if char ends tokens */
71 #define isgood(arg) (_gd[arg]) /* T if char can be after ')' */
72
73 #define optoken(arg) (_opr[arg]) /* T if char can be */
74 /* an overloaded operator token */
75
76 #define max(I1, I2) (I1 > I2 ? I1 : I2)
77
78 struct nd_st { /* sorting structure */
79 char *entry; /* function or type name */
80 char *file; /* file name */
81 bool f; /* use pattern or line no */
82 int lno; /* for -x option */
83 char *pat; /* search pattern */
84 bool been_warned; /* set if noticed dup */
85 struct nd_st *left, *right; /* left and right sons */
86 };
87
88 long ftell();
89 typedef struct nd_st NODE;
90
91 static bool
92 number, /* T if on line starting with # */
93 gotone, /* found a func already on line */
94 /* boolean "func" (see init) */
95 _wht[0177], _etk[0177], _itk[0177], _btk[0177], _gd[0177];
96
97 /* boolean array for overloadable operator symbols */
98 static bool _opr[0177];
99
100 /*
101 * typedefs are recognized using a simple finite automata,
102 * tydef is its state variable.
103 */
104 typedef enum {none, begin, begin_rec, begin_tag, middle, end } TYST;
105
106 static TYST tydef = none;
107
108 static char searchar = '/'; /* use /.../ searches */
109
110 #define LINEBUFSIZ 4*BUFSIZ
111
112 static int lineno; /* line number of current line */
113 static char
114 line[LINEBUFSIZ], /* current input line */
115 *curfile, /* current input file name */
116 *outfile = "tags", /* output file */
117 *white = " \f\t\n", /* white chars */
118 *endtk = " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?",
119 /* token ending chars */
120 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz",
121 /* token starting chars */
122 *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
123 "0123456789",
124 /* valid in-token chars */
125 *notgd = ",;"; /* non-valid after-function chars */
126
127 static char *oprtk = " =-+%*/&|^~!<>[]()"; /* overloadable operators */
128
129 static int file_num; /* current file number */
130 static int aflag; /* -a: append to tags */
131
132 #ifndef XPG4 /* XPG4: handle typedefs by default */
133 static int tflag; /* -t: create tags for typedefs */
134 #endif /* !XPG4 */
135
136 static int uflag; /* -u: update tags */
137 static int wflag; /* -w: suppress warnings */
138 static int vflag; /* -v: create vgrind style index output */
139 static int xflag; /* -x: create cxref style output */
140
141 static char lbuf[LINEBUFSIZ];
142
143 static FILE
144 *inf, /* ioptr for current input file */
145 *outf; /* ioptr for tags file */
146
147 static long lineftell; /* ftell after getc( inf ) == '\n' */
148
149 static NODE *head; /* the head of the sorted binary tree */
150
151 static int infile_fail; /* Count of bad opens. Fix bug ID #1082298 */
152
153 static char *dbp = lbuf;
154 static int pfcnt;
155
156 static int mac; /* our modified argc, after parseargs() */
157 static char **mav; /* our modified argv, after parseargs() */
158
159
160 /* our local functions: */
161 static void init(void);
162 static void find_entries(char *);
163 static void pfnote(char *, int, bool);
164 static void C_entries(void);
165 static int start_entry(char **, char *, int *);
166 static void Y_entries(void);
167 static char *toss_comment(char *);
168 static void getaline(long int);
169 static void free_tree(NODE *);
170 static void add_node(NODE *, NODE *);
171 static void put_entries(NODE *);
172 static int PF_funcs(FILE *);
173 static int tail(char *);
174 static void takeprec(void);
175 static void getit(void);
176 static char *savestr(char *);
177 static void L_funcs(FILE *);
178 static void L_getit(int);
179 static int striccmp(char *, char *);
180 static int first_char(void);
181 static void toss_yysec(void);
182 static void Usage(void);
183 static void parseargs(int, char **);
184
185 int
main(int ac,char * av[])186 main(int ac, char *av[])
187 {
188 int i;
189 char cmd[100];
190
191 (void) setlocale(LC_ALL, "");
192 #if !defined(TEXT_DOMAIN)
193 #define TEXT_DOMAIN "SYS_TEST"
194 #endif
195 (void) textdomain(TEXT_DOMAIN);
196
197 parseargs(ac, av);
198
199 while ((i = getopt(mac, mav, "aBFtuvwxf:")) != EOF) {
200 switch (i) {
201 case 'a': /* -a: Append output to existing tags file */
202 aflag++;
203 break;
204
205 case 'B': /* -B: Use backward search patterns (?...?) */
206 searchar = '?';
207 break;
208
209 case 'F': /* -F: Use forward search patterns (/.../) */
210 searchar = '/';
211 break;
212
213 case 't': /* -t: Create tags for typedefs. */
214 /* for XPG4 , we silently ignore "-t". */
215 #ifndef XPG4
216 tflag++;
217 #endif /* !XPG4 */
218 break;
219
220 case 'u': /* -u: Update the specified tags file */
221 uflag++;
222 break;
223
224 case 'v': /* -v: Index listing on stdout */
225 vflag++;
226 xflag++;
227 break;
228
229 case 'w': /* -w: Suppress warnings */
230 wflag++;
231 break;
232
233 case 'x': /* -x: Produce a simple index */
234 xflag++;
235 break;
236
237 case 'f': /* -f tagsfile: output to tagsfile */
238 outfile = strdup(optarg);
239 break;
240
241 default:
242 Usage(); /* never returns */
243 break;
244 }
245 }
246
247 /* if we didn't specify any source code to parse, complain and die. */
248 if (optind == mac) {
249 Usage(); /* never returns */
250 }
251
252
253 init(); /* set up boolean "functions" */
254 /*
255 * loop through files finding functions
256 */
257 for (file_num = optind; file_num < mac; file_num++)
258 find_entries(mav[file_num]);
259
260 if (xflag) {
261 put_entries(head);
262 exit(infile_fail > 0 ? 2 : 0); /* Fix for 1082298 */
263 }
264 if (uflag) {
265 for (i = 1; i < mac; i++) {
266 (void) sprintf(cmd,
267 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
268 outfile, mav[i], outfile);
269 (void) system(cmd);
270 }
271 aflag++;
272 }
273 outf = fopen(outfile, aflag ? "a" : "w");
274 if (outf == NULL) {
275 perror(outfile);
276 exit(1);
277 }
278 put_entries(head);
279 (void) fclose(outf);
280 if (uflag) {
281 (void) sprintf(cmd, "sort %s -o %s", outfile, outfile);
282 (void) system(cmd);
283 }
284 return (infile_fail > 0 ? 2 : 0); /* Fix for #1082298 */
285 }
286
287 /*
288 * This routine sets up the boolean psuedo-functions which work
289 * by seting boolean flags dependent upon the corresponding character
290 * Every char which is NOT in that string is not a white char. Therefore,
291 * all of the array "_wht" is set to FALSE, and then the elements
292 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
293 * of a char is TRUE if it is the string "white", else FALSE.
294 */
295 static void
init(void)296 init(void)
297 {
298 char *sp;
299 int i;
300
301 for (i = 0; i < 0177; i++) {
302 _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE;
303 _opr[i] = FALSE; /* initialize boolean */
304 /* array of operator symbols */
305 _gd[i] = TRUE;
306 }
307 for (sp = white; *sp; sp++)
308 _wht[*sp] = TRUE;
309 for (sp = endtk; *sp; sp++)
310 _etk[*sp] = TRUE;
311 for (sp = intk; *sp; sp++)
312 _itk[*sp] = TRUE;
313 for (sp = begtk; *sp; sp++)
314 _btk[*sp] = TRUE;
315
316 /* mark overloadable operator symbols */
317 for (sp = oprtk; *sp; sp++)
318 _opr[*sp] = TRUE;
319
320 for (sp = notgd; *sp; sp++)
321 _gd[*sp] = FALSE;
322 }
323
324 /*
325 * This routine opens the specified file and calls the function
326 * which finds the function and type definitions.
327 */
328 static void
find_entries(char * file)329 find_entries(char *file)
330 {
331 char *cp;
332 struct stat st;
333
334 /* skip anything that isn't a regular file */
335 if (stat(file, &st) == 0 && !S_ISREG(st.st_mode))
336 return;
337
338 if ((inf = fopen(file, "r")) == NULL) {
339 perror(file);
340 infile_fail++; /* Count bad opens. ID #1082298 */
341 return;
342 }
343 curfile = savestr(file);
344 lineno = 0;
345 cp = strrchr(file, '.');
346 /* .l implies lisp or lex source code */
347 if (cp && cp[1] == 'l' && cp[2] == '\0') {
348 if (strchr(";([", first_char()) != NULL) /* lisp */
349 {
350 L_funcs(inf);
351 (void) fclose(inf);
352 return;
353 } else { /* lex */
354 /*
355 * throw away all the code before the second "%%"
356 */
357 toss_yysec();
358 getaline(lineftell);
359 pfnote("yylex", lineno, TRUE);
360 toss_yysec();
361 C_entries();
362 (void) fclose(inf);
363 return;
364 }
365 }
366 /* .y implies a yacc file */
367 if (cp && cp[1] == 'y' && cp[2] == '\0') {
368 toss_yysec();
369 Y_entries();
370 C_entries();
371 (void) fclose(inf);
372 return;
373 }
374
375 /*
376 * Add in file name extension support for Sun C++ which
377 * permits .C/.c (AT&T), .cc (G++) and .cxx (Gloksp.)
378 */
379
380 /* if not a .c, .C, .cc, .cxx or .h file, try fortran */
381 if (cp && (cp[1] != 'C' && cp[1] != 'c' && cp[1] != 'h') &&
382 cp[2] == '\0' && (strcmp(cp, ".cc") == 0) &&
383 (strcmp(cp, ".cxx") == 0)) {
384 if (PF_funcs(inf) != 0) {
385 (void) fclose(inf);
386 return;
387 }
388 rewind(inf); /* no fortran tags found, try C */
389 }
390 C_entries();
391 (void) fclose(inf);
392 }
393
394 static void
pfnote(char * name,int ln,bool f)395 pfnote(char *name, int ln, bool f)
396 {
397 char *fp;
398 NODE *np;
399 char *nametk; /* hold temporary tokens from name */
400 char nbuf[BUFSIZ];
401
402 if ((np = malloc(sizeof (NODE))) == NULL) {
403 (void) fprintf(stderr,
404 gettext("ctags: too many entries to sort\n"));
405 put_entries(head);
406 free_tree(head);
407 head = np = (NODE *) malloc(sizeof (NODE));
408 }
409 if (xflag == 0 && (strcmp(name, "main") == 0)) {
410 fp = strrchr(curfile, '/');
411
412 if (fp == 0)
413 fp = curfile;
414 else
415 fp++;
416 (void) sprintf(nbuf, "M%s", fp);
417 fp = strrchr(nbuf, '.');
418 /* Chop off .cc and .cxx as well as .c, .h, etc */
419 if (fp && ((fp[2] == 0) || (fp[2] == 'c' && fp[3] == 0) ||
420 (fp[3] == 'x' && fp[4] == 0)))
421 *fp = 0;
422 name = nbuf;
423 }
424
425 /* remove in-between blanks operator function tags */
426 if (strchr(name, ' ') != NULL)
427 {
428 (void) strcpy(name, strtok(name, " "));
429 while ((nametk = strtok(0, " ")) != NULL)
430 (void) strcat(name, nametk);
431 }
432 np->entry = savestr(name);
433 np->file = curfile;
434 np->f = f;
435 np->lno = ln;
436 np->left = np->right = 0;
437 if (xflag == 0) {
438 lbuf[50] = 0;
439 (void) strcat(lbuf, "$");
440 lbuf[50] = 0;
441 }
442 np->pat = savestr(lbuf);
443 if (head == NULL)
444 head = np;
445 else
446 add_node(np, head);
447 }
448
449 /*
450 * This routine finds functions and typedefs in C syntax and adds them
451 * to the list.
452 */
453 static void
C_entries(void)454 C_entries(void)
455 {
456 int c;
457 char *token, *tp;
458 bool incomm, inquote, inchar, midtoken, isoperator, optfound;
459 int level;
460 char *sp;
461 char tok[BUFSIZ];
462 long int tokftell;
463
464 number = gotone = midtoken = inquote = inchar =
465 incomm = isoperator = optfound = FALSE;
466
467 level = 0;
468 sp = tp = token = line;
469 lineno++;
470 lineftell = tokftell = ftell(inf);
471 for (;;) {
472 *sp = c = getc(inf);
473 if (feof(inf))
474 break;
475 if (c == '\n') {
476 lineftell = ftell(inf);
477 lineno++;
478 } else if (c == '\\') {
479 c = *++sp = getc(inf);
480 if ((c == '\n') || (c == EOF)) { /* c == EOF, 1091005 */
481 lineftell = ftell(inf);
482 lineno++;
483 c = ' ';
484 }
485 } else if (incomm) {
486 if (c == '*') {
487 while ((*++sp = c = getc(inf)) == '*')
488 continue;
489
490 /* c == EOF 1091005 */
491 if ((c == '\n') || (c == EOF)) {
492 lineftell = ftell(inf);
493 lineno++;
494 }
495
496 if (c == '/')
497 incomm = FALSE;
498 }
499 } else if (inquote) {
500 /*
501 * Too dumb to know about \" not being magic, but
502 * they usually occur in pairs anyway.
503 */
504 if (c == '"')
505 inquote = FALSE;
506 continue;
507 } else if (inchar) {
508 if (c == '\'')
509 inchar = FALSE;
510 continue;
511 } else if (midtoken == TRUE) { /* if white space omitted */
512 goto dotoken;
513 } else switch (c) {
514 case '"':
515 inquote = TRUE;
516 continue;
517 case '\'':
518 inchar = TRUE;
519 continue;
520 case '/':
521 *++sp = c = getc(inf);
522 /* Handles the C++ comment token "//" */
523 if (c == '*')
524 incomm = TRUE;
525 else if (c == '/') {
526 /*
527 * Skip over all the characters after
528 * "//" until a newline character. Now also
529 * includes fix for 1091005, check for EOF.
530 */
531 do {
532 c = getc(inf);
533 /* 1091005: */
534 } while ((c != '\n') && (c != EOF));
535
536
537 /*
538 * Fixed bugid 1030014
539 * Return the current position of the
540 * file after the newline.
541 */
542 lineftell = ftell(inf);
543 lineno++;
544 *--sp = c;
545 }
546 else
547 (void) ungetc(*sp, inf);
548 continue;
549 case '#':
550 if (sp == line)
551 number = TRUE;
552 continue;
553 case '{':
554 if ((tydef == begin_rec) || (tydef == begin_tag)) {
555 tydef = middle;
556 }
557 level++;
558 continue;
559 case '}':
560 /*
561 * Heuristic for function or structure end;
562 * common for #ifdef/#else blocks to add extra "{"
563 */
564 if (sp == line)
565 level = 0; /* reset */
566 else
567 level--;
568 if (!level && tydef == middle) {
569 tydef = end;
570 }
571 if (!level && tydef == none) /* Fix for #1034126 */
572 goto dotoken;
573 continue;
574 }
575
576 dotoken:
577
578
579 if (!level && !inquote && !incomm && gotone == FALSE) {
580 if (midtoken) {
581 if (endtoken(c)) {
582
583 /*
584 *
585 * ':' +---> ':' -> midtok
586 *
587 * +---> operator{+,-, etc} -> midtok
588 * (continue)
589 * +---> endtok
590 */
591 /*
592 * Enhance operator function support and
593 * fix bugid 1027806
594 *
595 * For operator token, scanning will continue until
596 * '(' is found. Spaces between 'operater' and
597 * 'oprtk' are allowed (e.g. 'operator + ()'), but
598 * will be removed when the actual entry for the tag
599 * is made.
600 * Note that functions of the form 'operator ()(int)'
601 * will be recognized, but 'operator ()' will not,
602 * even though this is legitimate in C.
603 */
604
605 if (optoken(c)) {
606 if (isoperator) {
607 if (optfound) {
608 if (c != '(') {
609 tp++;
610 goto next_char;
611 }
612 } else {
613 if (c != ' ') {
614 optfound = TRUE;
615 }
616 tp++;
617 goto next_char;
618 }
619 } else {
620 /* start: this code shifted left for cstyle */
621 char *backptr = tp - 7;
622 if (strncmp(backptr, "operator", 8) == 0) {
623 /* This is an overloaded operator */
624 isoperator = TRUE;
625 if (c != ' ') {
626 optfound = TRUE;
627 }
628
629 tp++;
630 goto next_char;
631 } else if (c == '~') {
632 /* This is a destructor */
633 tp++;
634 goto next_char;
635 }
636 /* end: above code shifted left for cstyle */
637 }
638 } else if (c == ':') {
639 if ((*++sp = getc(inf)) == ':') {
640 tp += 2;
641 c = *sp;
642 goto next_char;
643 } else {
644 (void) ungetc (*sp, inf);
645 --sp;
646 }
647 }
648
649 /* start: this code shifted left for cstyle */
650 {
651 int f;
652 int pfline = lineno;
653
654 if (start_entry(&sp, token, &f)) {
655 (void) strncpy(tok, token, tp-token+1);
656 tok[tp-token+1] = 0;
657 getaline(tokftell);
658 pfnote(tok, pfline, f);
659 gotone = f; /* function */
660 }
661
662 isoperator = optfound = midtoken = FALSE;
663 token = sp;
664 }
665 /* end: above code shifted left for cstyle */
666 } else if (intoken(c))
667 tp++;
668 } else if (begtoken(c)) {
669 token = tp = sp;
670 midtoken = TRUE;
671 tokftell = lineftell;
672 }
673 }
674 next_char:
675 if (c == ';' && tydef == end) /* clean with typedefs */
676 tydef = none;
677 sp++;
678 /* The "c == }" was added to fix #1034126 */
679 if (c == '\n' ||c == '}'|| sp > &line[sizeof (line) - BUFSIZ]) {
680 tp = token = sp = line;
681 number = gotone = midtoken = inquote =
682 inchar = isoperator = optfound = FALSE;
683 }
684 }
685 }
686
687 /*
688 * This routine checks to see if the current token is
689 * at the start of a function, or corresponds to a typedef
690 * It updates the input line * so that the '(' will be
691 * in it when it returns.
692 */
693 static int
start_entry(char ** lp,char * token,int * f)694 start_entry(char **lp, char *token, int *f)
695 {
696 char *sp;
697 int c;
698 static bool found;
699 bool firsttok; /* T if have seen first token in ()'s */
700 int bad;
701
702 *f = 1; /* a function */
703 sp = *lp;
704 c = *sp;
705 bad = FALSE;
706 if (!number) { /* space is not allowed in macro defs */
707 while (iswhite(c)) {
708 *++sp = c = getc(inf);
709 if ((c == '\n') || (c == EOF)) { /* c==EOF, #1091005 */
710 lineno++;
711 lineftell = ftell(inf);
712 if (sp > &line[sizeof (line) - BUFSIZ])
713 goto ret;
714 }
715 }
716 /* the following tries to make it so that a #define a b(c) */
717 /* doesn't count as a define of b. */
718 } else {
719 if (strncmp(token, "define", 6) == 0)
720 found = 0;
721 else
722 found++;
723 if (found >= 2) {
724 gotone = TRUE;
725 badone: bad = TRUE;
726 goto ret;
727 }
728 }
729 /* check for the typedef cases */
730 #ifdef XPG4
731 if (strncmp(token, "typedef", 7) == 0) {
732 #else /* !XPG4 */
733 if (tflag && (strncmp(token, "typedef", 7) == 0)) {
734 #endif /* XPG4 */
735 tydef = begin;
736 goto badone;
737 }
738 /* Handles 'class' besides 'struct' etc. */
739 if (tydef == begin && ((strncmp(token, "struct", 6) == 0) ||
740 (strncmp(token, "class", 5) == 0) ||
741 (strncmp(token, "union", 5) == 0)||
742 (strncmp(token, "enum", 4) == 0))) {
743 tydef = begin_rec;
744 goto badone;
745 }
746 if (tydef == begin) {
747 tydef = end;
748 goto badone;
749 }
750 if (tydef == begin_rec) {
751 tydef = begin_tag;
752 goto badone;
753 }
754 if (tydef == begin_tag) {
755 tydef = end;
756 goto gottydef; /* Fall through to "tydef==end" */
757 }
758
759 gottydef:
760 if (tydef == end) {
761 *f = 0;
762 goto ret;
763 }
764 if (c != '(')
765 goto badone;
766 firsttok = FALSE;
767 while ((*++sp = c = getc(inf)) != ')') {
768 if ((c == '\n') || (c == EOF)) { /* c == EOF Fix for #1091005 */
769 lineftell = ftell(inf);
770 lineno++;
771 if (sp > &line[sizeof (line) - BUFSIZ])
772 goto ret;
773 }
774 /*
775 * This line used to confuse ctags:
776 * int (*oldhup)();
777 * This fixes it. A nonwhite char before the first
778 * token, other than a / (in case of a comment in there)
779 * makes this not a declaration.
780 */
781 if (begtoken(c) || c == '/')
782 firsttok = TRUE;
783 else if (!iswhite(c) && !firsttok)
784 goto badone;
785 }
786 while (iswhite(*++sp = c = getc(inf)))
787 if ((c == '\n') || (c == EOF)) { /* c == EOF fix for #1091005 */
788 lineno++;
789 lineftell = ftell(inf);
790 if (sp > &line[sizeof (line) - BUFSIZ])
791 break;
792 }
793 ret:
794 *lp = --sp;
795 if (c == '\n')
796 lineno--;
797 (void) ungetc(c, inf);
798 return (!bad && (!*f || isgood(c)));
799 /* hack for typedefs */
800 }
801
802 /*
803 * Y_entries:
804 * Find the yacc tags and put them in.
805 */
806 static void
807 Y_entries(void)
808 {
809 char *sp, *orig_sp;
810 int brace;
811 bool in_rule = FALSE;
812 size_t toklen;
813 char tok[LINEBUFSIZ];
814
815 brace = 0;
816 getaline(lineftell);
817 pfnote("yyparse", lineno, TRUE);
818 while (fgets(line, sizeof (line), inf) != NULL) {
819 for (sp = line; *sp; sp++) {
820 switch (*sp) {
821 case '\n':
822 lineno++;
823 /* FALLTHROUGH */
824 case ' ':
825 case '\t':
826 case '\f':
827 case '\r':
828 break;
829 case '"':
830 case '\'': {
831 char start = *sp;
832 sp++;
833
834 while ((*sp != '\0') && (*sp != start)) {
835 if (*sp == '\\')
836 sp++; /* Skip escaped thing */
837 sp++;
838 }
839
840 if (*sp == '\0')
841 sp--;
842 break;
843 }
844 case '/':
845 if (*++sp == '*')
846 sp = toss_comment(sp);
847 else
848 --sp;
849 break;
850 case '{':
851 brace++;
852 break;
853 case '}':
854 brace--;
855 break;
856 case '%':
857 if (sp[1] == '%' && sp == line)
858 return;
859 break;
860 case '|':
861 case ';':
862 in_rule = FALSE;
863 break;
864 default:
865 if (brace == 0 && !in_rule && (isalpha(*sp) ||
866 *sp == '.' ||
867 *sp == '_')) {
868 orig_sp = sp;
869 ++sp;
870 while (isalnum(*sp) || *sp == '_' ||
871 *sp == '.')
872 sp++;
873 toklen = sp - orig_sp;
874 while (isspace(*sp))
875 sp++;
876 if (*sp == ':' || (*sp == '\0' &&
877 first_char() == ':')) {
878 (void) strncpy(tok,
879 orig_sp, toklen);
880 tok[toklen] = '\0';
881 (void) strcpy(lbuf, line);
882 lbuf[strlen(lbuf) - 1] = '\0';
883 pfnote(tok, lineno, TRUE);
884 in_rule = TRUE;
885 /*
886 * if we read NUL, leave it so
887 * we read the next line
888 */
889 if (*sp == '\0')
890 sp--;
891 } else {
892 sp--;
893 }
894 }
895 break;
896 }
897 }
898 }
899 }
900
901 static char *
902 toss_comment(char *start)
903 {
904 char *sp;
905
906 /*
907 * first, see if the end-of-comment is on the same line
908 */
909 do {
910 while ((sp = strchr(start, '*')) != NULL)
911 if (sp[1] == '/')
912 return (++sp);
913 else
914 start = (++sp);
915 start = line;
916 lineno++;
917 } while (fgets(line, sizeof (line), inf) != NULL);
918
919 /*
920 * running this through lint revealed that the original version
921 * of this routine didn't explicitly return something; while
922 * the return value was always used!. so i've added this
923 * next line.
924 */
925 return (sp);
926 }
927
928 static void
929 getaline(long int where)
930 {
931 long saveftell = ftell(inf);
932 char *cp;
933
934 (void) fseek(inf, where, 0);
935 (void) fgets(lbuf, sizeof (lbuf), inf);
936 cp = strrchr(lbuf, '\n');
937 if (cp)
938 *cp = 0;
939 (void) fseek(inf, saveftell, 0);
940 }
941
942 static void
943 free_tree(NODE *node)
944 {
945 NODE *next;
946 while (node) {
947 free_tree(node->right);
948 next = node->left;
949 free(node);
950 node = next;
951 }
952 }
953
954 static void
955 add_node(NODE *node, NODE *cur_node)
956 {
957 int dif;
958
959 dif = strcmp(node->entry, cur_node->entry);
960 if (dif == 0) {
961 if (node->file == cur_node->file) {
962 if (!wflag) {
963 (void) fprintf(stderr,
964 gettext("Duplicate entry in file %s, line %d: %s\n"),
965 node->file, lineno, node->entry);
966 (void) fprintf(stderr,
967 gettext("Second entry ignored\n"));
968 }
969 return;
970 }
971 if (!cur_node->been_warned)
972 if (!wflag) {
973 (void) fprintf(stderr, gettext("Duplicate "
974 "entry in files %s and %s: %s "
975 "(Warning only)\n"),
976 node->file, cur_node->file,
977 node->entry);
978 }
979 cur_node->been_warned = TRUE;
980 return;
981 }
982
983 if (dif < 0) {
984 if (cur_node->left != NULL)
985 add_node(node, cur_node->left);
986 else
987 cur_node->left = node;
988 return;
989 }
990 if (cur_node->right != NULL)
991 add_node(node, cur_node->right);
992 else
993 cur_node->right = node;
994 }
995
996 static void
997 put_entries(NODE *node)
998 {
999 char *sp;
1000
1001 if (node == NULL)
1002 return;
1003 put_entries(node->left);
1004
1005 /*
1006 * while the code in the following #ifdef section could be combined,
1007 * it's explicitly separated here to make maintainance easier.
1008 */
1009 #ifdef XPG4
1010 /*
1011 * POSIX 2003: we no longer have a "-t" flag; the logic is
1012 * automatically assumed to be "turned on" here.
1013 */
1014 if (xflag == 0) {
1015 (void) fprintf(outf, "%s\t%s\t%c^",
1016 node->entry, node->file, searchar);
1017 for (sp = node->pat; *sp; sp++)
1018 if (*sp == '\\')
1019 (void) fprintf(outf, "\\\\");
1020 else if (*sp == searchar)
1021 (void) fprintf(outf, "\\%c", searchar);
1022 else
1023 (void) putc(*sp, outf);
1024 (void) fprintf(outf, "%c\n", searchar);
1025 } else if (vflag)
1026 (void) fprintf(stdout, "%s %s %d\n",
1027 node->entry, node->file, (node->lno+63)/64);
1028 else
1029 (void) fprintf(stdout, "%-16s %4d %-16s %s\n",
1030 node->entry, node->lno, node->file, node->pat);
1031 #else /* XPG4 */
1032 /*
1033 * original way of doing things. "-t" logic is only turned on
1034 * when the user has specified it via a command-line argument.
1035 */
1036 if (xflag == 0)
1037 if (node->f) { /* a function */
1038 (void) fprintf(outf, "%s\t%s\t%c^",
1039 node->entry, node->file, searchar);
1040 for (sp = node->pat; *sp; sp++)
1041 if (*sp == '\\')
1042 (void) fprintf(outf, "\\\\");
1043 else if (*sp == searchar)
1044 (void) fprintf(outf, "\\%c", searchar);
1045 else
1046 (void) putc(*sp, outf);
1047 (void) fprintf(outf, "%c\n", searchar);
1048 } else { /* a typedef; text pattern inadequate */
1049 (void) fprintf(outf, "%s\t%s\t%d\n",
1050 node->entry, node->file, node->lno);
1051 } else if (vflag)
1052 (void) fprintf(stdout, "%s %s %d\n",
1053 node->entry, node->file, (node->lno+63)/64);
1054 else
1055 (void) fprintf(stdout, "%-16s %4d %-16s %s\n",
1056 node->entry, node->lno, node->file, node->pat);
1057 #endif /* XPG4 */
1058 put_entries(node->right);
1059 }
1060
1061
1062 static int
1063 PF_funcs(FILE *fi)
1064 {
1065
1066 pfcnt = 0;
1067 while (fgets(lbuf, sizeof (lbuf), fi)) {
1068 lineno++;
1069 dbp = lbuf;
1070 if (*dbp == '%') dbp++; /* Ratfor escape to fortran */
1071 while (isspace(*dbp))
1072 dbp++;
1073 if (*dbp == 0)
1074 continue;
1075 switch (*dbp |' ') {
1076
1077 case 'i':
1078 if (tail("integer"))
1079 takeprec();
1080 break;
1081 case 'r':
1082 if (tail("real"))
1083 takeprec();
1084 break;
1085 case 'l':
1086 if (tail("logical"))
1087 takeprec();
1088 break;
1089 case 'c':
1090 if (tail("complex") || tail("character"))
1091 takeprec();
1092 break;
1093 case 'd':
1094 if (tail("double")) {
1095 while (isspace(*dbp))
1096 dbp++;
1097 if (*dbp == 0)
1098 continue;
1099 if (tail("precision"))
1100 break;
1101 continue;
1102 }
1103 break;
1104 }
1105 while (isspace(*dbp))
1106 dbp++;
1107 if (*dbp == 0)
1108 continue;
1109 switch (*dbp|' ') {
1110
1111 case 'f':
1112 if (tail("function"))
1113 getit();
1114 continue;
1115 case 's':
1116 if (tail("subroutine"))
1117 getit();
1118 continue;
1119 case 'p':
1120 if (tail("program")) {
1121 getit();
1122 continue;
1123 }
1124 if (tail("procedure"))
1125 getit();
1126 continue;
1127 }
1128 }
1129 return (pfcnt);
1130 }
1131
1132 static int
1133 tail(char *cp)
1134 {
1135 int len = 0;
1136
1137 while (*cp && (*cp&~' ') == ((*(dbp+len))&~' '))
1138 cp++, len++;
1139 if (*cp == 0) {
1140 dbp += len;
1141 return (1);
1142 }
1143 return (0);
1144 }
1145
1146 static void
1147 takeprec(void)
1148 {
1149 while (isspace(*dbp))
1150 dbp++;
1151 if (*dbp != '*')
1152 return;
1153 dbp++;
1154 while (isspace(*dbp))
1155 dbp++;
1156 if (!isdigit(*dbp)) {
1157 --dbp; /* force failure */
1158 return;
1159 }
1160 do
1161 dbp++;
1162 while (isdigit(*dbp));
1163 }
1164
1165 static void
1166 getit(void)
1167 {
1168 char *cp;
1169 char c;
1170 char nambuf[LINEBUFSIZ];
1171
1172 for (cp = lbuf; *cp; cp++)
1173 ;
1174 *--cp = 0; /* zap newline */
1175 while (isspace(*dbp))
1176 dbp++;
1177 if (*dbp == 0 || !isalpha(*dbp) || !isascii(*dbp))
1178 return;
1179 for (cp = dbp+1; *cp && (isalpha(*cp) || isdigit(*cp)); cp++)
1180 continue;
1181 c = cp[0];
1182 cp[0] = 0;
1183 (void) strcpy(nambuf, dbp);
1184 cp[0] = c;
1185 pfnote(nambuf, lineno, TRUE);
1186 pfcnt++;
1187 }
1188
1189 static char *
1190 savestr(char *cp)
1191 {
1192 int len;
1193 char *dp;
1194
1195 len = strlen(cp);
1196 dp = (char *)malloc(len+1);
1197 (void) strcpy(dp, cp);
1198
1199 return (dp);
1200 }
1201
1202 /*
1203 * lisp tag functions
1204 * just look for (def or (DEF
1205 */
1206
1207 static void
1208 L_funcs(FILE *fi)
1209 {
1210 int special;
1211
1212 pfcnt = 0;
1213 while (fgets(lbuf, sizeof (lbuf), fi)) {
1214 lineno++;
1215 dbp = lbuf;
1216 if (dbp[0] == '(' &&
1217 (dbp[1] == 'D' || dbp[1] == 'd') &&
1218 (dbp[2] == 'E' || dbp[2] == 'e') &&
1219 (dbp[3] == 'F' || dbp[3] == 'f')) {
1220 dbp += 4;
1221 if (striccmp(dbp, "method") == 0 ||
1222 striccmp(dbp, "wrapper") == 0 ||
1223 striccmp(dbp, "whopper") == 0)
1224 special = TRUE;
1225 else
1226 special = FALSE;
1227 while (!isspace(*dbp))
1228 dbp++;
1229 while (isspace(*dbp))
1230 dbp++;
1231 L_getit(special);
1232 }
1233 }
1234 }
1235
1236 static void
1237 L_getit(int special)
1238 {
1239 char *cp;
1240 char c;
1241 char nambuf[LINEBUFSIZ];
1242
1243 for (cp = lbuf; *cp; cp++)
1244 continue;
1245 *--cp = 0; /* zap newline */
1246 if (*dbp == 0)
1247 return;
1248 if (special) {
1249 if ((cp = strchr(dbp, ')')) == NULL)
1250 return;
1251 while (cp >= dbp && *cp != ':')
1252 cp--;
1253 if (cp < dbp)
1254 return;
1255 dbp = cp;
1256 while (*cp && *cp != ')' && *cp != ' ')
1257 cp++;
1258 }
1259 else
1260 for (cp = dbp + 1; *cp && *cp != '(' && *cp != ' '; cp++)
1261 continue;
1262 c = cp[0];
1263 cp[0] = 0;
1264 (void) strcpy(nambuf, dbp);
1265 cp[0] = c;
1266 pfnote(nambuf, lineno, TRUE);
1267 pfcnt++;
1268 }
1269
1270 /*
1271 * striccmp:
1272 * Compare two strings over the length of the second, ignoring
1273 * case distinctions. If they are the same, return 0. If they
1274 * are different, return the difference of the first two different
1275 * characters. It is assumed that the pattern (second string) is
1276 * completely lower case.
1277 */
1278 static int
1279 striccmp(char *str, char *pat)
1280 {
1281 int c1;
1282
1283 while (*pat) {
1284 if (isupper(*str))
1285 c1 = tolower(*str);
1286 else
1287 c1 = *str;
1288 if (c1 != *pat)
1289 return (c1 - *pat);
1290 pat++;
1291 str++;
1292 }
1293 return (0);
1294 }
1295
1296 /*
1297 * first_char:
1298 * Return the first non-blank character in the file. After
1299 * finding it, rewind the input file so we start at the beginning
1300 * again.
1301 */
1302 static int
1303 first_char(void)
1304 {
1305 int c;
1306 long off;
1307
1308 off = ftell(inf);
1309 while ((c = getc(inf)) != EOF)
1310 if (!isspace(c) && c != '\r') {
1311 (void) fseek(inf, off, 0);
1312 return (c);
1313 }
1314 (void) fseek(inf, off, 0);
1315 return (EOF);
1316 }
1317
1318 /*
1319 * toss_yysec:
1320 * Toss away code until the next "%%" line.
1321 */
1322 static void
1323 toss_yysec(void)
1324 {
1325 char buf[BUFSIZ];
1326
1327 for (;;) {
1328 lineftell = ftell(inf);
1329 if (fgets(buf, BUFSIZ, inf) == NULL)
1330 return;
1331 lineno++;
1332 if (strncmp(buf, "%%", 2) == 0)
1333 return;
1334 }
1335 }
1336
1337 static void
1338 Usage(void)
1339 {
1340 #ifdef XPG4
1341 (void) fprintf(stderr, gettext("Usage:\tctags [-aBFuvw] "
1342 #else /* !XPG4 */
1343 (void) fprintf(stderr, gettext("Usage:\tctags [-aBFtuvw] "
1344 #endif /* XPG4 */
1345 "[-f tagsfile] file ...\n"));
1346 (void) fprintf(stderr, gettext("OR:\tctags [-x] file ...\n"));
1347 exit(1);
1348 }
1349
1350
1351 /*
1352 * parseargs(): modify the args
1353 * the purpose of this routine is to transform any ancient argument
1354 * usage into a format which is acceptable to getopt(3C), so that we
1355 * retain backwards Solaris 2.[0-4] compatibility.
1356 *
1357 * This routine allows us to make full use of getopts, without any
1358 * funny argument processing in main().
1359 *
1360 * The other alternative would be to hand-craft the processed arguments
1361 * during and after getopt(3C) - which usually leads to uglier code
1362 * in main(). I've opted to keep the ugliness isolated down here,
1363 * instead of in main().
1364 *
1365 * In a nutshell, if the user has used the old Solaris syntax of:
1366 * ctags [-aBFtuvwx] [-f tagsfile] filename ...
1367 * We simply change this into:
1368 * ctags [-a] [-B] [-F] [-t] [-u] [-v] [-w] [-x] [-f tags] file...
1369 *
1370 * If the user has specified the new getopt(3C) syntax, we merely
1371 * copy that into our modified argument space.
1372 */
1373 static void
1374 parseargs(int ac, char **av)
1375 {
1376 int i; /* current argument */
1377 int a; /* used to parse combined arguments */
1378 int fflag; /* 1 = we're only parsing filenames */
1379 size_t sz; /* size of the argument */
1380 size_t mav_sz; /* size of our psuedo argument space */
1381
1382 i = mac = fflag = 0; /* proper initializations */
1383
1384 mav_sz = ((ac + 1) * sizeof (char *));
1385 if ((mav = malloc(mav_sz)) == (char **)NULL) {
1386 perror("Can't malloc argument space");
1387 exit(1);
1388 }
1389
1390 /* for each argument, see if we need to change things: */
1391 for (; (av[i] != NULL) && (av[i][0] != '\0'); i++) {
1392
1393 if (strcmp(av[i], "--") == 0) {
1394 fflag = 1; /* just handle filenames now */
1395 }
1396
1397 sz = strlen(&av[i][0]); /* get this arg's size */
1398
1399 /*
1400 * if the argument starts with a "-", and has more than
1401 * 1 flag, then we have to search through each character,
1402 * and separate any flags which have been combined.
1403 *
1404 * so, if we've found a "-" string which needs separating:
1405 */
1406 if (fflag == 0 && /* not handling filename args */
1407 av[i][0] == '-' && /* and this is a flag */
1408 sz > 2) { /* and there's more than 1 flag */
1409 /* then for each flag after the "-" sign: */
1410 for (a = 1; av[i][a]; a++) {
1411 /* copy the flag into mav space. */
1412 if (a > 1) {
1413 /*
1414 * we need to call realloc() after the
1415 * 1st combined flag, because "ac"
1416 * doesn't include combined args.
1417 */
1418 mav_sz += sizeof (char *);
1419 if ((mav = realloc(mav, mav_sz)) ==
1420 (char **)NULL) {
1421 perror("Can't realloc "
1422 "argument space");
1423 exit(1);
1424 }
1425 }
1426
1427 if ((mav[mac] = malloc((size_t)CPFLAG)) ==
1428 (char *)NULL) {
1429 perror("Can't malloc argument space");
1430 exit(1);
1431 }
1432 (void) sprintf(mav[mac], "-%c", av[i][a]);
1433 ++mac;
1434 }
1435 } else {
1436 /* otherwise, just copy the argument: */
1437 if ((mav[mac] = malloc(sz + 1)) == (char *)NULL) {
1438 perror("Can't malloc argument space");
1439 exit(1);
1440 }
1441 (void) strcpy(mav[mac], av[i]);
1442 ++mac;
1443 }
1444 }
1445
1446 mav[mac] = (char *)NULL;
1447 }
1448