1 /* 2 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* Copyright (c) 1988 AT&T */ 6 /* All Rights Reserved */ 7 8 /* 9 * Copyright (c) 1980 Regents of the University of California. 10 * All rights reserved. The Berkeley software License Agreement 11 * specifies the terms and conditions for redistribution. 12 */ 13 14 /* 15 * Modify ctags to handle C++ in C_entries(), etc: 16 * - Handles C++ comment token "//" 17 * - Handles C++ scope operator "::". 18 * This helps to distinguish between xyz() 19 * definition and X::xyz() definition. 20 * - Recognizes C++ reserved word "class" in typedef processing 21 * (for "-t" option) 22 * - Handles Sun C++ special file name extensions: .c, .C, .cc, and .cxx. 23 * - Handles overloaded unary/binary operator names 24 * Doesn't handle yet: 25 * - inline functions in class definition (currently they get 26 * swallowed within a class definition) 27 * - Tags with scope operator :: with spaces in between, 28 * e.g. classz ::afunc 29 * 30 * Enhance operator functions support: 31 * - Control flow involving operator tokens scanning are 32 * consistent with that of other function tokens - original 33 * hacking method for 2.0 is removed. This will accurately 34 * identify tags for declarations of the form 'operator+()' 35 * (bugid 1027806) as well as allowing spaces in between 36 * 'operator' and 'oprtk', e.g. 'operator + ()'. 37 * 38 */ 39 40 #ifndef lint 41 char copyright[] = "@(#) Copyright (c) 1980 Regents of the University of " 42 "California.\nAll rights reserved.\n"; 43 #endif 44 45 #include <stdio.h> 46 #include <ctype.h> 47 #include <locale.h> 48 #include <unistd.h> 49 #include <stdlib.h> 50 #include <string.h> 51 #include <limits.h> 52 #include <sys/types.h> 53 #include <sys/stat.h> 54 55 /* 56 * ctags: create a tags file 57 */ 58 59 #define bool char 60 61 #define TRUE (1) 62 #define FALSE (0) 63 64 #define CPFLAG 3 /* # of bytes in a flag */ 65 66 #define iswhite(arg) (_wht[arg]) /* T if char is white */ 67 #define begtoken(arg) (_btk[arg]) /* T if char can start token */ 68 #define intoken(arg) (_itk[arg]) /* T if char can be in token */ 69 #define endtoken(arg) (_etk[arg]) /* T if char ends tokens */ 70 #define isgood(arg) (_gd[arg]) /* T if char can be after ')' */ 71 72 #define optoken(arg) (_opr[arg]) /* T if char can be */ 73 /* an overloaded operator token */ 74 75 #define max(I1, I2) (I1 > I2 ? I1 : I2) 76 77 struct nd_st { /* sorting structure */ 78 char *entry; /* function or type name */ 79 char *file; /* file name */ 80 bool f; /* use pattern or line no */ 81 int lno; /* for -x option */ 82 char *pat; /* search pattern */ 83 bool been_warned; /* set if noticed dup */ 84 struct nd_st *left, *right; /* left and right sons */ 85 }; 86 87 long ftell(); 88 typedef struct nd_st NODE; 89 90 static bool 91 number, /* T if on line starting with # */ 92 gotone, /* found a func already on line */ 93 /* boolean "func" (see init) */ 94 _wht[0177], _etk[0177], _itk[0177], _btk[0177], _gd[0177]; 95 96 /* boolean array for overloadable operator symbols */ 97 static bool _opr[0177]; 98 99 /* 100 * typedefs are recognized using a simple finite automata, 101 * tydef is its state variable. 102 */ 103 typedef enum {none, begin, begin_rec, begin_tag, middle, end } TYST; 104 105 static TYST tydef = none; 106 107 static char searchar = '/'; /* use /.../ searches */ 108 109 static int lineno; /* line number of current line */ 110 static char 111 line[4*BUFSIZ], /* current input line */ 112 *curfile, /* current input file name */ 113 *outfile = "tags", /* output file */ 114 *white = " \f\t\n", /* white chars */ 115 *endtk = " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?", 116 /* token ending chars */ 117 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz", 118 /* token starting chars */ 119 *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz" 120 "0123456789", 121 /* valid in-token chars */ 122 *notgd = ",;"; /* non-valid after-function chars */ 123 124 static char *oprtk = " =-+%*/&|^~!<>[]()"; /* overloadable operators */ 125 126 static int file_num; /* current file number */ 127 static int aflag; /* -a: append to tags */ 128 129 #ifndef XPG4 /* XPG4: handle typedefs by default */ 130 static int tflag; /* -t: create tags for typedefs */ 131 #endif /* !XPG4 */ 132 133 static int uflag; /* -u: update tags */ 134 static int wflag; /* -w: suppress warnings */ 135 static int vflag; /* -v: create vgrind style index output */ 136 static int xflag; /* -x: create cxref style output */ 137 138 static char lbuf[LINE_MAX]; 139 140 static FILE 141 *inf, /* ioptr for current input file */ 142 *outf; /* ioptr for tags file */ 143 144 static long lineftell; /* ftell after getc( inf ) == '\n' */ 145 146 static NODE *head; /* the head of the sorted binary tree */ 147 148 #ifdef __STDC__ 149 char *strrchr(), *strchr(); 150 #else 151 char *rindex(), *index(); 152 #endif 153 154 static int infile_fail; /* Count of bad opens. Fix bug ID #1082298 */ 155 156 static char *dbp = lbuf; 157 static int pfcnt; 158 159 static int mac; /* our modified argc, after parseargs() */ 160 static char **mav; /* our modified argv, after parseargs() */ 161 162 163 /* our local functions: */ 164 static void init(); 165 static void find_entries(char *file); 166 static void pfnote(); 167 static void C_entries(); 168 static int start_entry(char **lp, char *token, int *f); 169 static void Y_entries(); 170 static char *toss_comment(char *start); 171 static void getaline(long int where); 172 static void free_tree(NODE *node); 173 static void add_node(NODE *node, NODE *cur_node); 174 static void put_entries(NODE *node); 175 static int PF_funcs(FILE *fi); 176 static int tail(char *cp); 177 static void takeprec(); 178 static void getit(); 179 static char *savestr(char *cp); 180 static void L_funcs(FILE *fi); 181 static void L_getit(int special); 182 static int striccmp(char *str, char *pat); 183 static int first_char(); 184 static void toss_yysec(); 185 static void Usage(); 186 static void parseargs(int ac, char **av); 187 188 int 189 main(int ac, char *av[]) 190 { 191 int i; 192 char cmd[100]; 193 194 (void) setlocale(LC_ALL, ""); 195 #if !defined(TEXT_DOMAIN) 196 #define TEXT_DOMAIN "SYS_TEST" 197 #endif 198 (void) textdomain(TEXT_DOMAIN); 199 200 parseargs(ac, av); 201 202 while ((i = getopt(mac, mav, "aBFtuvwxf:")) != EOF) { 203 switch (i) { 204 case 'a': /* -a: Append output to existing tags file */ 205 aflag++; 206 break; 207 208 case 'B': /* -B: Use backward search patterns (?...?) */ 209 searchar = '?'; 210 break; 211 212 case 'F': /* -F: Use forward search patterns (/.../) */ 213 searchar = '/'; 214 break; 215 216 case 't': /* -t: Create tags for typedefs. */ 217 /* for XPG4 , we silently ignore "-t". */ 218 #ifndef XPG4 219 tflag++; 220 #endif /* !XPG4 */ 221 break; 222 223 case 'u': /* -u: Update the specified tags file */ 224 uflag++; 225 break; 226 227 case 'v': /* -v: Index listing on stdout */ 228 vflag++; 229 xflag++; 230 break; 231 232 case 'w': /* -w: Suppress warnings */ 233 wflag++; 234 break; 235 236 case 'x': /* -x: Produce a simple index */ 237 xflag++; 238 break; 239 240 case 'f': /* -f tagsfile: output to tagsfile */ 241 outfile = strdup(optarg); 242 break; 243 244 default: 245 Usage(); /* never returns */ 246 break; 247 } 248 } 249 250 /* if we didn't specify any source code to parse, complain and die. */ 251 if (optind == mac) { 252 Usage(); /* never returns */ 253 } 254 255 256 init(); /* set up boolean "functions" */ 257 /* 258 * loop through files finding functions 259 */ 260 for (file_num = optind; file_num < mac; file_num++) 261 find_entries(mav[file_num]); 262 263 if (xflag) { 264 put_entries(head); 265 exit(infile_fail > 0 ? 2 : 0); /* Fix for 1082298 */ 266 } 267 if (uflag) { 268 for (i = 1; i < mac; i++) { 269 (void) sprintf(cmd, 270 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS", 271 outfile, mav[i], outfile); 272 (void) system(cmd); 273 } 274 aflag++; 275 } 276 outf = fopen(outfile, aflag ? "a" : "w"); 277 if (outf == NULL) { 278 perror(outfile); 279 exit(1); 280 } 281 put_entries(head); 282 (void) fclose(outf); 283 if (uflag) { 284 (void) sprintf(cmd, "sort %s -o %s", outfile, outfile); 285 (void) system(cmd); 286 } 287 return (infile_fail > 0 ? 2 : 0); /* Fix for #1082298 */ 288 } 289 290 /* 291 * This routine sets up the boolean psuedo-functions which work 292 * by seting boolean flags dependent upon the corresponding character 293 * Every char which is NOT in that string is not a white char. Therefore, 294 * all of the array "_wht" is set to FALSE, and then the elements 295 * subscripted by the chars in "white" are set to TRUE. Thus "_wht" 296 * of a char is TRUE if it is the string "white", else FALSE. 297 */ 298 static void 299 init() 300 { 301 char *sp; 302 int i; 303 304 for (i = 0; i < 0177; i++) { 305 _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE; 306 _opr[i] = FALSE; /* initialize boolean */ 307 /* array of operator symbols */ 308 _gd[i] = TRUE; 309 } 310 for (sp = white; *sp; sp++) 311 _wht[*sp] = TRUE; 312 for (sp = endtk; *sp; sp++) 313 _etk[*sp] = TRUE; 314 for (sp = intk; *sp; sp++) 315 _itk[*sp] = TRUE; 316 for (sp = begtk; *sp; sp++) 317 _btk[*sp] = TRUE; 318 319 /* mark overloadable operator symbols */ 320 for (sp = oprtk; *sp; sp++) 321 _opr[*sp] = TRUE; 322 323 for (sp = notgd; *sp; sp++) 324 _gd[*sp] = FALSE; 325 } 326 327 /* 328 * This routine opens the specified file and calls the function 329 * which finds the function and type definitions. 330 */ 331 static void 332 find_entries(file) 333 char *file; 334 { 335 char *cp; 336 struct stat st; 337 338 /* skip anything that isn't a regular file */ 339 if (stat(file, &st) == 0 && !S_ISREG(st.st_mode)) 340 return; 341 342 if ((inf = fopen(file, "r")) == NULL) { 343 perror(file); 344 infile_fail++; /* Count bad opens. ID #1082298 */ 345 return; 346 } 347 curfile = savestr(file); 348 lineno = 0; 349 #ifdef __STDC__ 350 cp = strrchr(file, '.'); 351 #else 352 cp = rindex(file, '.'); 353 #endif 354 /* .l implies lisp or lex source code */ 355 if (cp && cp[1] == 'l' && cp[2] == '\0') { 356 #ifdef __STDC__ 357 if (strchr(";([", first_char()) != NULL) /* lisp */ 358 #else 359 if (index(";([", first_char()) != NULL) /* lisp */ 360 #endif 361 { 362 L_funcs(inf); 363 (void) fclose(inf); 364 return; 365 } else { /* lex */ 366 /* 367 * throw away all the code before the second "%%" 368 */ 369 toss_yysec(); 370 getaline(lineftell); 371 pfnote("yylex", lineno, TRUE); 372 toss_yysec(); 373 C_entries(); 374 (void) fclose(inf); 375 return; 376 } 377 } 378 /* .y implies a yacc file */ 379 if (cp && cp[1] == 'y' && cp[2] == '\0') { 380 toss_yysec(); 381 Y_entries(); 382 C_entries(); 383 (void) fclose(inf); 384 return; 385 } 386 387 /* 388 * Add in file name extension support for Sun C++ which 389 * permits .C/.c (AT&T), .cc (G++) and .cxx (Gloksp.) 390 */ 391 392 /* if not a .c, .C, .cc, .cxx or .h file, try fortran */ 393 if (cp && (cp[1] != 'C' && cp[1] != 'c' && cp[1] != 'h') && 394 cp[2] == '\0' && (strcmp(cp, ".cc") == 0) && 395 (strcmp(cp, ".cxx") == 0)) { 396 if (PF_funcs(inf) != 0) { 397 (void) fclose(inf); 398 return; 399 } 400 rewind(inf); /* no fortran tags found, try C */ 401 } 402 C_entries(); 403 (void) fclose(inf); 404 } 405 406 static void 407 pfnote(name, ln, f) 408 char *name; 409 int ln; 410 bool f; /* f == TRUE when function */ 411 { 412 char *fp; 413 NODE *np; 414 char *nametk; /* hold temporary tokens from name */ 415 char nbuf[BUFSIZ]; 416 417 if ((np = malloc(sizeof (NODE))) == NULL) { 418 (void) fprintf(stderr, 419 gettext("ctags: too many entries to sort\n")); 420 put_entries(head); 421 free_tree(head); 422 head = np = (NODE *) malloc(sizeof (NODE)); 423 } 424 if (xflag == 0 && (strcmp(name, "main") == 0)) { 425 #ifdef __STDC__ 426 fp = strrchr(curfile, '/'); 427 #else 428 fp = rindex(curfile, '/'); 429 #endif 430 if (fp == 0) 431 fp = curfile; 432 else 433 fp++; 434 (void) sprintf(nbuf, "M%s", fp); 435 #ifdef __STDC__ 436 fp = strrchr(nbuf, '.'); 437 #else 438 fp = rindex(nbuf, '.'); 439 #endif 440 /* Chop off .cc and .cxx as well as .c, .h, etc */ 441 if (fp && ((fp[2] == 0) || (fp[2] == 'c' && fp[3] == 0) || 442 (fp[3] == 'x' && fp[4] == 0))) 443 *fp = 0; 444 name = nbuf; 445 } 446 447 /* remove in-between blanks operator function tags */ 448 #ifdef __STDC__ 449 if (strchr(name, ' ') != NULL) 450 #else 451 if (index(name, ' ') != NULL) 452 #endif 453 { 454 (void) strcpy(name, strtok(name, " ")); 455 while (nametk = strtok(0, " ")) 456 (void) strcat(name, nametk); 457 } 458 np->entry = savestr(name); 459 np->file = curfile; 460 np->f = f; 461 np->lno = ln; 462 np->left = np->right = 0; 463 if (xflag == 0) { 464 lbuf[50] = 0; 465 (void) strcat(lbuf, "$"); 466 lbuf[50] = 0; 467 } 468 np->pat = savestr(lbuf); 469 if (head == NULL) 470 head = np; 471 else 472 add_node(np, head); 473 } 474 475 /* 476 * This routine finds functions and typedefs in C syntax and adds them 477 * to the list. 478 */ 479 static void 480 C_entries() 481 { 482 int c; 483 char *token, *tp; 484 bool incomm, inquote, inchar, midtoken, isoperator, optfound; 485 int level; 486 char *sp; 487 char tok[BUFSIZ]; 488 long int tokftell; 489 490 number = gotone = midtoken = inquote = inchar = 491 incomm = isoperator = optfound = FALSE; 492 493 level = 0; 494 sp = tp = token = line; 495 lineno++; 496 lineftell = tokftell = ftell(inf); 497 for (;;) { 498 *sp = c = getc(inf); 499 if (feof(inf)) 500 break; 501 if (c == '\n') { 502 lineftell = ftell(inf); 503 lineno++; 504 } else if (c == '\\') { 505 c = *++sp = getc(inf); 506 if ((c == '\n') || (c == EOF)) { /* c == EOF, 1091005 */ 507 lineftell = ftell(inf); 508 lineno++; 509 c = ' '; 510 } 511 } else if (incomm) { 512 if (c == '*') { 513 while ((*++sp = c = getc(inf)) == '*') 514 continue; 515 516 /* c == EOF 1091005 */ 517 if ((c == '\n') || (c == EOF)) { 518 lineftell = ftell(inf); 519 lineno++; 520 } 521 522 if (c == '/') 523 incomm = FALSE; 524 } 525 } else if (inquote) { 526 /* 527 * Too dumb to know about \" not being magic, but 528 * they usually occur in pairs anyway. 529 */ 530 if (c == '"') 531 inquote = FALSE; 532 continue; 533 } else if (inchar) { 534 if (c == '\'') 535 inchar = FALSE; 536 continue; 537 } else if (midtoken == TRUE) { /* if white space omitted */ 538 goto dotoken; 539 } else switch (c) { 540 case '"': 541 inquote = TRUE; 542 continue; 543 case '\'': 544 inchar = TRUE; 545 continue; 546 case '/': 547 *++sp = c = getc(inf); 548 /* Handles the C++ comment token "//" */ 549 if (c == '*') 550 incomm = TRUE; 551 else if (c == '/') { 552 /* 553 * Skip over all the characters after 554 * "//" until a newline character. Now also 555 * includes fix for 1091005, check for EOF. 556 */ 557 do { 558 c = getc(inf); 559 /* 1091005: */ 560 } while ((c != '\n') && (c != EOF)); 561 562 563 /* 564 * Fixed bugid 1030014 565 * Return the current position of the 566 * file after the newline. 567 */ 568 lineftell = ftell(inf); 569 lineno++; 570 *--sp = c; 571 } 572 else 573 (void) ungetc(*sp, inf); 574 continue; 575 case '#': 576 if (sp == line) 577 number = TRUE; 578 continue; 579 case '{': 580 if ((tydef == begin_rec) || (tydef == begin_tag)) { 581 tydef = middle; 582 } 583 level++; 584 continue; 585 case '}': 586 /* 587 * Heuristic for function or structure end; 588 * common for #ifdef/#else blocks to add extra "{" 589 */ 590 if (sp == line) 591 level = 0; /* reset */ 592 else 593 level--; 594 if (!level && tydef == middle) { 595 tydef = end; 596 } 597 if (!level && tydef == none) /* Fix for #1034126 */ 598 goto dotoken; 599 continue; 600 } 601 602 dotoken: 603 604 605 if (!level && !inquote && !incomm && gotone == FALSE) { 606 if (midtoken) { 607 if (endtoken(c)) { 608 609 /* 610 * 611 * ':' +---> ':' -> midtok 612 * 613 * +---> operator{+,-, etc} -> midtok 614 * (continue) 615 * +---> endtok 616 */ 617 /* 618 * Enhance operator function support and 619 * fix bugid 1027806 620 * 621 * For operator token, scanning will continue until 622 * '(' is found. Spaces between 'operater' and 623 * 'oprtk' are allowed (e.g. 'operator + ()'), but 624 * will be removed when the actual entry for the tag 625 * is made. 626 * Note that functions of the form 'operator ()(int)' 627 * will be recognized, but 'operator ()' will not, 628 * even though this is legitimate in C. 629 */ 630 631 if (optoken(c)) { 632 if (isoperator) { 633 if (optfound) { 634 if (c != '(') { 635 tp++; 636 goto next_char; 637 } 638 } else { 639 if (c != ' ') { 640 optfound = TRUE; 641 } 642 tp++; 643 goto next_char; 644 } 645 } else { 646 /* start: this code shifted left for cstyle */ 647 char *backptr = tp - 7; 648 if (strncmp(backptr, "operator", 8) == 0) { 649 /* This is an overloaded operator */ 650 isoperator = TRUE; 651 if (c != ' ') { 652 optfound = TRUE; 653 } 654 655 tp++; 656 goto next_char; 657 } else if (c == '~') { 658 /* This is a destructor */ 659 tp++; 660 goto next_char; 661 } 662 /* end: above code shifted left for cstyle */ 663 } 664 } else if (c == ':') { 665 if ((*++sp = getc(inf)) == ':') { 666 tp += 2; 667 c = *sp; 668 goto next_char; 669 } else { 670 (void) ungetc (*sp, inf); 671 --sp; 672 } 673 } 674 675 /* start: this code shifted left for cstyle */ 676 { 677 int f; 678 int pfline = lineno; 679 680 if (start_entry(&sp, token, &f)) { 681 (void) strncpy(tok, token, tp-token+1); 682 tok[tp-token+1] = 0; 683 getaline(tokftell); 684 pfnote(tok, pfline, f); 685 gotone = f; /* function */ 686 } 687 688 isoperator = optfound = midtoken = FALSE; 689 token = sp; 690 } 691 /* end: above code shifted left for cstyle */ 692 } else if (intoken(c)) 693 tp++; 694 } else if (begtoken(c)) { 695 token = tp = sp; 696 midtoken = TRUE; 697 tokftell = lineftell; 698 } 699 } 700 next_char: 701 if (c == ';' && tydef == end) /* clean with typedefs */ 702 tydef = none; 703 sp++; 704 /* The "c == }" was added to fix #1034126 */ 705 if (c == '\n' ||c == '}'|| sp > &line[sizeof (line) - BUFSIZ]) { 706 tp = token = sp = line; 707 number = gotone = midtoken = inquote = 708 inchar = isoperator = optfound = FALSE; 709 } 710 } 711 } 712 713 /* 714 * This routine checks to see if the current token is 715 * at the start of a function, or corresponds to a typedef 716 * It updates the input line * so that the '(' will be 717 * in it when it returns. 718 */ 719 static int 720 start_entry(lp, token, f) 721 char **lp, *token; 722 int *f; 723 { 724 char *sp; 725 int c; 726 static bool found; 727 bool firsttok; /* T if have seen first token in ()'s */ 728 int bad; 729 730 *f = 1; /* a function */ 731 sp = *lp; 732 c = *sp; 733 bad = FALSE; 734 if (!number) { /* space is not allowed in macro defs */ 735 while (iswhite(c)) { 736 *++sp = c = getc(inf); 737 if ((c == '\n') || (c == EOF)) { /* c==EOF, #1091005 */ 738 lineno++; 739 lineftell = ftell(inf); 740 if (sp > &line[sizeof (line) - BUFSIZ]) 741 goto ret; 742 } 743 } 744 /* the following tries to make it so that a #define a b(c) */ 745 /* doesn't count as a define of b. */ 746 } else { 747 if (strncmp(token, "define", 6) == 0) 748 found = 0; 749 else 750 found++; 751 if (found >= 2) { 752 gotone = TRUE; 753 badone: bad = TRUE; 754 goto ret; 755 } 756 } 757 /* check for the typedef cases */ 758 #ifdef XPG4 759 if (strncmp(token, "typedef", 7) == 0) { 760 #else /* !XPG4 */ 761 if (tflag && (strncmp(token, "typedef", 7) == 0)) { 762 #endif /* XPG4 */ 763 tydef = begin; 764 goto badone; 765 } 766 /* Handles 'class' besides 'struct' etc. */ 767 if (tydef == begin && ((strncmp(token, "struct", 6) == 0) || 768 (strncmp(token, "class", 5) == 0) || 769 (strncmp(token, "union", 5) == 0)|| 770 (strncmp(token, "enum", 4) == 0))) { 771 tydef = begin_rec; 772 goto badone; 773 } 774 if (tydef == begin) { 775 tydef = end; 776 goto badone; 777 } 778 if (tydef == begin_rec) { 779 tydef = begin_tag; 780 goto badone; 781 } 782 if (tydef == begin_tag) { 783 tydef = end; 784 goto gottydef; /* Fall through to "tydef==end" */ 785 } 786 787 gottydef: 788 if (tydef == end) { 789 *f = 0; 790 goto ret; 791 } 792 if (c != '(') 793 goto badone; 794 firsttok = FALSE; 795 while ((*++sp = c = getc(inf)) != ')') { 796 if ((c == '\n') || (c == EOF)) { /* c == EOF Fix for #1091005 */ 797 lineftell = ftell(inf); 798 lineno++; 799 if (sp > &line[sizeof (line) - BUFSIZ]) 800 goto ret; 801 } 802 /* 803 * This line used to confuse ctags: 804 * int (*oldhup)(); 805 * This fixes it. A nonwhite char before the first 806 * token, other than a / (in case of a comment in there) 807 * makes this not a declaration. 808 */ 809 if (begtoken(c) || c == '/') 810 firsttok = TRUE; 811 else if (!iswhite(c) && !firsttok) 812 goto badone; 813 } 814 while (iswhite(*++sp = c = getc(inf))) 815 if ((c == '\n') || (c == EOF)) { /* c == EOF fix for #1091005 */ 816 lineno++; 817 lineftell = ftell(inf); 818 if (sp > &line[sizeof (line) - BUFSIZ]) 819 break; 820 } 821 ret: 822 *lp = --sp; 823 if (c == '\n') 824 lineno--; 825 (void) ungetc(c, inf); 826 return (!bad && (!*f || isgood(c))); 827 /* hack for typedefs */ 828 } 829 830 /* 831 * Y_entries: 832 * Find the yacc tags and put them in. 833 */ 834 static void 835 Y_entries() 836 { 837 char *sp, *orig_sp; 838 int brace; 839 bool in_rule, toklen; 840 char tok[BUFSIZ]; 841 842 brace = 0; 843 getaline(lineftell); 844 pfnote("yyparse", lineno, TRUE); 845 while (fgets(line, sizeof (line), inf) != NULL) 846 for (sp = line; *sp; sp++) 847 switch (*sp) { 848 case '\n': 849 lineno++; 850 /* FALLTHROUGH */ 851 case ' ': 852 case '\t': 853 case '\f': 854 case '\r': 855 break; 856 case '"': 857 do { 858 while (*++sp != '"') 859 continue; 860 } while (sp[-1] == '\\'); 861 break; 862 case '\'': 863 do { 864 while (*++sp != '\'') 865 continue; 866 } while (sp[-1] == '\\'); 867 break; 868 case '/': 869 if (*++sp == '*') 870 sp = toss_comment(sp); 871 else 872 --sp; 873 break; 874 case '{': 875 brace++; 876 break; 877 case '}': 878 brace--; 879 break; 880 case '%': 881 if (sp[1] == '%' && sp == line) 882 return; 883 break; 884 case '|': 885 case ';': 886 in_rule = FALSE; 887 break; 888 default: 889 if (brace == 0 && !in_rule && (isalpha(*sp) || 890 *sp == '.' || 891 *sp == '_')) { 892 orig_sp = sp; 893 ++sp; 894 while (isalnum(*sp) || *sp == '_' || 895 *sp == '.') 896 sp++; 897 toklen = sp - orig_sp; 898 while (isspace(*sp)) 899 sp++; 900 if (*sp == ':' || (*sp == '\0' && 901 first_char() == ':')) { 902 (void) strncpy(tok, 903 orig_sp, toklen); 904 tok[toklen] = '\0'; 905 (void) strcpy(lbuf, line); 906 lbuf[strlen(lbuf) - 1] = '\0'; 907 pfnote(tok, lineno, TRUE); 908 in_rule = TRUE; 909 } 910 else 911 sp--; 912 } 913 break; 914 } 915 } 916 917 static char * 918 toss_comment(start) 919 char *start; 920 { 921 char *sp; 922 923 /* 924 * first, see if the end-of-comment is on the same line 925 */ 926 do { 927 #ifdef __STDC__ 928 while ((sp = strchr(start, '*')) != NULL) 929 #else 930 while ((sp = index(start, '*')) != NULL) 931 #endif 932 if (sp[1] == '/') 933 return (++sp); 934 else 935 start = (++sp); 936 start = line; 937 lineno++; 938 } while (fgets(line, sizeof (line), inf) != NULL); 939 940 /* 941 * running this through lint revealed that the original version 942 * of this routine didn't explicitly return something; while 943 * the return value was always used!. so i've added this 944 * next line. 945 */ 946 return (sp); 947 } 948 949 static void 950 getaline(where) 951 long int where; 952 { 953 long saveftell = ftell(inf); 954 char *cp; 955 956 (void) fseek(inf, where, 0); 957 (void) fgets(lbuf, sizeof (lbuf), inf); 958 #ifdef __STDC__ 959 cp = strrchr(lbuf, '\n'); 960 #else 961 cp = rindex(lbuf, '\n'); 962 #endif 963 if (cp) 964 *cp = 0; 965 (void) fseek(inf, saveftell, 0); 966 } 967 968 static void 969 free_tree(node) 970 NODE *node; 971 { 972 while (node) { 973 free_tree(node->right); 974 free(node); 975 node = node->left; 976 } 977 } 978 979 static void 980 add_node(node, cur_node) 981 NODE *node, *cur_node; 982 { 983 int dif; 984 985 dif = strcmp(node->entry, cur_node->entry); 986 if (dif == 0) { 987 if (node->file == cur_node->file) { 988 if (!wflag) { 989 (void) fprintf(stderr, 990 gettext("Duplicate entry in file %s, line %d: %s\n"), 991 node->file, lineno, node->entry); 992 (void) fprintf(stderr, 993 gettext("Second entry ignored\n")); 994 } 995 return; 996 } 997 if (!cur_node->been_warned) 998 if (!wflag) { 999 (void) fprintf(stderr, gettext("Duplicate " 1000 "entry in files %s and %s: %s " 1001 "(Warning only)\n"), 1002 node->file, cur_node->file, 1003 node->entry); 1004 } 1005 cur_node->been_warned = TRUE; 1006 return; 1007 } 1008 1009 if (dif < 0) { 1010 if (cur_node->left != NULL) 1011 add_node(node, cur_node->left); 1012 else 1013 cur_node->left = node; 1014 return; 1015 } 1016 if (cur_node->right != NULL) 1017 add_node(node, cur_node->right); 1018 else 1019 cur_node->right = node; 1020 } 1021 1022 static void 1023 put_entries(node) 1024 NODE *node; 1025 { 1026 char *sp; 1027 1028 if (node == NULL) 1029 return; 1030 put_entries(node->left); 1031 1032 /* 1033 * while the code in the following #ifdef section could be combined, 1034 * it's explicitly separated here to make maintainance easier. 1035 */ 1036 #ifdef XPG4 1037 /* 1038 * POSIX 2003: we no longer have a "-t" flag; the logic is 1039 * automatically assumed to be "turned on" here. 1040 */ 1041 if (xflag == 0) { 1042 (void) fprintf(outf, "%s\t%s\t%c^", 1043 node->entry, node->file, searchar); 1044 for (sp = node->pat; *sp; sp++) 1045 if (*sp == '\\') 1046 (void) fprintf(outf, "\\\\"); 1047 else if (*sp == searchar) 1048 (void) fprintf(outf, "\\%c", searchar); 1049 else 1050 (void) putc(*sp, outf); 1051 (void) fprintf(outf, "%c\n", searchar); 1052 } else if (vflag) 1053 (void) fprintf(stdout, "%s %s %d\n", 1054 node->entry, node->file, (node->lno+63)/64); 1055 else 1056 (void) fprintf(stdout, "%-16s %4d %-16s %s\n", 1057 node->entry, node->lno, node->file, node->pat); 1058 #else /* XPG4 */ 1059 /* 1060 * original way of doing things. "-t" logic is only turned on 1061 * when the user has specified it via a command-line argument. 1062 */ 1063 if (xflag == 0) 1064 if (node->f) { /* a function */ 1065 (void) fprintf(outf, "%s\t%s\t%c^", 1066 node->entry, node->file, searchar); 1067 for (sp = node->pat; *sp; sp++) 1068 if (*sp == '\\') 1069 (void) fprintf(outf, "\\\\"); 1070 else if (*sp == searchar) 1071 (void) fprintf(outf, "\\%c", searchar); 1072 else 1073 (void) putc(*sp, outf); 1074 (void) fprintf(outf, "%c\n", searchar); 1075 } else { /* a typedef; text pattern inadequate */ 1076 (void) fprintf(outf, "%s\t%s\t%d\n", 1077 node->entry, node->file, node->lno); 1078 } else if (vflag) 1079 (void) fprintf(stdout, "%s %s %d\n", 1080 node->entry, node->file, (node->lno+63)/64); 1081 else 1082 (void) fprintf(stdout, "%-16s %4d %-16s %s\n", 1083 node->entry, node->lno, node->file, node->pat); 1084 #endif /* XPG4 */ 1085 put_entries(node->right); 1086 } 1087 1088 1089 static int 1090 PF_funcs(fi) 1091 FILE *fi; 1092 { 1093 1094 pfcnt = 0; 1095 while (fgets(lbuf, sizeof (lbuf), fi)) { 1096 lineno++; 1097 dbp = lbuf; 1098 if (*dbp == '%') dbp++; /* Ratfor escape to fortran */ 1099 while (isspace(*dbp)) 1100 dbp++; 1101 if (*dbp == 0) 1102 continue; 1103 switch (*dbp |' ') { 1104 1105 case 'i': 1106 if (tail("integer")) 1107 takeprec(); 1108 break; 1109 case 'r': 1110 if (tail("real")) 1111 takeprec(); 1112 break; 1113 case 'l': 1114 if (tail("logical")) 1115 takeprec(); 1116 break; 1117 case 'c': 1118 if (tail("complex") || tail("character")) 1119 takeprec(); 1120 break; 1121 case 'd': 1122 if (tail("double")) { 1123 while (isspace(*dbp)) 1124 dbp++; 1125 if (*dbp == 0) 1126 continue; 1127 if (tail("precision")) 1128 break; 1129 continue; 1130 } 1131 break; 1132 } 1133 while (isspace(*dbp)) 1134 dbp++; 1135 if (*dbp == 0) 1136 continue; 1137 switch (*dbp|' ') { 1138 1139 case 'f': 1140 if (tail("function")) 1141 getit(); 1142 continue; 1143 case 's': 1144 if (tail("subroutine")) 1145 getit(); 1146 continue; 1147 case 'p': 1148 if (tail("program")) { 1149 getit(); 1150 continue; 1151 } 1152 if (tail("procedure")) 1153 getit(); 1154 continue; 1155 } 1156 } 1157 return (pfcnt); 1158 } 1159 1160 static int 1161 tail(cp) 1162 char *cp; 1163 { 1164 int len = 0; 1165 1166 while (*cp && (*cp&~' ') == ((*(dbp+len))&~' ')) 1167 cp++, len++; 1168 if (*cp == 0) { 1169 dbp += len; 1170 return (1); 1171 } 1172 return (0); 1173 } 1174 1175 static void 1176 takeprec() 1177 { 1178 1179 while (isspace(*dbp)) 1180 dbp++; 1181 if (*dbp != '*') 1182 return; 1183 dbp++; 1184 while (isspace(*dbp)) 1185 dbp++; 1186 if (!isdigit(*dbp)) { 1187 --dbp; /* force failure */ 1188 return; 1189 } 1190 do 1191 dbp++; 1192 while (isdigit(*dbp)); 1193 } 1194 1195 static void 1196 getit() 1197 { 1198 char *cp; 1199 char c; 1200 char nambuf[BUFSIZ]; 1201 1202 for (cp = lbuf; *cp; cp++) 1203 ; 1204 *--cp = 0; /* zap newline */ 1205 while (isspace(*dbp)) 1206 dbp++; 1207 if (*dbp == 0 || !isalpha(*dbp) || !isascii(*dbp)) 1208 return; 1209 for (cp = dbp+1; *cp && (isalpha(*cp) || isdigit(*cp)); cp++) 1210 continue; 1211 c = cp[0]; 1212 cp[0] = 0; 1213 (void) strcpy(nambuf, dbp); 1214 cp[0] = c; 1215 pfnote(nambuf, lineno, TRUE); 1216 pfcnt++; 1217 } 1218 1219 static char * 1220 savestr(cp) 1221 char *cp; 1222 { 1223 int len; 1224 char *dp; 1225 1226 len = strlen(cp); 1227 dp = (char *)malloc(len+1); 1228 (void) strcpy(dp, cp); 1229 1230 return (dp); 1231 } 1232 1233 #ifndef __STDC__ 1234 /* 1235 * Return the ptr in sp at which the character c last 1236 * appears; NULL if not found 1237 * 1238 * Identical to v7 rindex, included for portability. 1239 */ 1240 1241 static char * 1242 rindex(sp, c) 1243 char *sp, c; 1244 { 1245 char *r; 1246 1247 r = NULL; 1248 do { 1249 if (*sp == c) 1250 r = sp; 1251 } while (*sp++); 1252 return (r); 1253 } 1254 #endif 1255 1256 /* 1257 * lisp tag functions 1258 * just look for (def or (DEF 1259 */ 1260 1261 static void 1262 L_funcs(fi) 1263 FILE *fi; 1264 { 1265 int special; 1266 1267 pfcnt = 0; 1268 while (fgets(lbuf, sizeof (lbuf), fi)) { 1269 lineno++; 1270 dbp = lbuf; 1271 if (dbp[0] == '(' && 1272 (dbp[1] == 'D' || dbp[1] == 'd') && 1273 (dbp[2] == 'E' || dbp[2] == 'e') && 1274 (dbp[3] == 'F' || dbp[3] == 'f')) { 1275 dbp += 4; 1276 if (striccmp(dbp, "method") == 0 || 1277 striccmp(dbp, "wrapper") == 0 || 1278 striccmp(dbp, "whopper") == 0) 1279 special = TRUE; 1280 else 1281 special = FALSE; 1282 while (!isspace(*dbp)) 1283 dbp++; 1284 while (isspace(*dbp)) 1285 dbp++; 1286 L_getit(special); 1287 } 1288 } 1289 } 1290 1291 static void 1292 L_getit(special) 1293 int special; 1294 { 1295 char *cp; 1296 char c; 1297 char nambuf[BUFSIZ]; 1298 1299 for (cp = lbuf; *cp; cp++) 1300 continue; 1301 *--cp = 0; /* zap newline */ 1302 if (*dbp == 0) 1303 return; 1304 if (special) { 1305 #ifdef __STDC__ 1306 if ((cp = strchr(dbp, ')')) == NULL) 1307 #else 1308 if ((cp = index(dbp, ')')) == NULL) 1309 #endif 1310 return; 1311 while (cp >= dbp && *cp != ':') 1312 cp--; 1313 if (cp < dbp) 1314 return; 1315 dbp = cp; 1316 while (*cp && *cp != ')' && *cp != ' ') 1317 cp++; 1318 } 1319 else 1320 for (cp = dbp + 1; *cp && *cp != '(' && *cp != ' '; cp++) 1321 continue; 1322 c = cp[0]; 1323 cp[0] = 0; 1324 (void) strcpy(nambuf, dbp); 1325 cp[0] = c; 1326 pfnote(nambuf, lineno, TRUE); 1327 pfcnt++; 1328 } 1329 1330 /* 1331 * striccmp: 1332 * Compare two strings over the length of the second, ignoring 1333 * case distinctions. If they are the same, return 0. If they 1334 * are different, return the difference of the first two different 1335 * characters. It is assumed that the pattern (second string) is 1336 * completely lower case. 1337 */ 1338 static int 1339 striccmp(str, pat) 1340 char *str, *pat; 1341 { 1342 int c1; 1343 1344 while (*pat) { 1345 if (isupper(*str)) 1346 c1 = tolower(*str); 1347 else 1348 c1 = *str; 1349 if (c1 != *pat) 1350 return (c1 - *pat); 1351 pat++; 1352 str++; 1353 } 1354 return (0); 1355 } 1356 1357 /* 1358 * first_char: 1359 * Return the first non-blank character in the file. After 1360 * finding it, rewind the input file so we start at the beginning 1361 * again. 1362 */ 1363 static int 1364 first_char() 1365 { 1366 int c; 1367 long off; 1368 1369 off = ftell(inf); 1370 while ((c = getc(inf)) != EOF) 1371 if (!isspace(c) && c != '\r') { 1372 (void) fseek(inf, off, 0); 1373 return (c); 1374 } 1375 (void) fseek(inf, off, 0); 1376 return (EOF); 1377 } 1378 1379 /* 1380 * toss_yysec: 1381 * Toss away code until the next "%%" line. 1382 */ 1383 static void 1384 toss_yysec() 1385 { 1386 char buf[BUFSIZ]; 1387 1388 for (;;) { 1389 lineftell = ftell(inf); 1390 if (fgets(buf, BUFSIZ, inf) == NULL) 1391 return; 1392 lineno++; 1393 if (strncmp(buf, "%%", 2) == 0) 1394 return; 1395 } 1396 } 1397 1398 static void 1399 Usage() 1400 { 1401 #ifdef XPG4 1402 (void) fprintf(stderr, gettext("Usage:\tctags [-aBFuvw] " 1403 #else /* !XPG4 */ 1404 (void) fprintf(stderr, gettext("Usage:\tctags [-aBFtuvw] " 1405 #endif /* XPG4 */ 1406 "[-f tagsfile] file ...\n")); 1407 (void) fprintf(stderr, gettext("OR:\tctags [-x] file ...\n")); 1408 exit(1); 1409 } 1410 1411 1412 /* 1413 * parseargs(): modify the args 1414 * the purpose of this routine is to transform any ancient argument 1415 * usage into a format which is acceptable to getopt(3C), so that we 1416 * retain backwards Solaris 2.[0-4] compatibility. 1417 * 1418 * This routine allows us to make full use of getopts, without any 1419 * funny argument processing in main(). 1420 * 1421 * The other alternative would be to hand-craft the processed arguments 1422 * during and after getopt(3C) - which usually leads to uglier code 1423 * in main(). I've opted to keep the ugliness isolated down here, 1424 * instead of in main(). 1425 * 1426 * In a nutshell, if the user has used the old Solaris syntax of: 1427 * ctags [-aBFtuvwx] [-f tagsfile] filename ... 1428 * We simply change this into: 1429 * ctags [-a] [-B] [-F] [-t] [-u] [-v] [-w] [-x] [-f tags] file... 1430 * 1431 * If the user has specified the new getopt(3C) syntax, we merely 1432 * copy that into our modified argument space. 1433 */ 1434 static void 1435 parseargs(ac, av) 1436 int ac; /* argument count */ 1437 char **av; /* ptr to original argument space */ 1438 { 1439 int i; /* current argument */ 1440 int a; /* used to parse combined arguments */ 1441 int fflag; /* 1 = we're only parsing filenames */ 1442 size_t sz; /* size of the argument */ 1443 size_t mav_sz; /* size of our psuedo argument space */ 1444 1445 i = mac = fflag = 0; /* proper initializations */ 1446 1447 mav_sz = ((ac + 1) * sizeof (char *)); 1448 if ((mav = malloc(mav_sz)) == (char **)NULL) { 1449 perror("Can't malloc argument space"); 1450 exit(1); 1451 } 1452 1453 /* for each argument, see if we need to change things: */ 1454 for (; (av[i] != NULL) && (av[i][0] != '\0'); i++) { 1455 1456 if (strcmp(av[i], "--") == 0) { 1457 fflag = 1; /* just handle filenames now */ 1458 } 1459 1460 sz = strlen(&av[i][0]); /* get this arg's size */ 1461 1462 /* 1463 * if the argument starts with a "-", and has more than 1464 * 1 flag, then we have to search through each character, 1465 * and separate any flags which have been combined. 1466 * 1467 * so, if we've found a "-" string which needs separating: 1468 */ 1469 if (fflag == 0 && /* not handling filename args */ 1470 av[i][0] == '-' && /* and this is a flag */ 1471 sz > 2) { /* and there's more than 1 flag */ 1472 /* then for each flag after the "-" sign: */ 1473 for (a = 1; av[i][a]; a++) { 1474 /* copy the flag into mav space. */ 1475 if (a > 1) { 1476 /* 1477 * we need to call realloc() after the 1478 * 1st combined flag, because "ac" 1479 * doesn't include combined args. 1480 */ 1481 mav_sz += sizeof (char *); 1482 if ((mav = realloc(mav, mav_sz)) == 1483 (char **)NULL) { 1484 perror("Can't realloc " 1485 "argument space"); 1486 exit(1); 1487 } 1488 } 1489 1490 if ((mav[mac] = malloc((size_t)CPFLAG)) == 1491 (char *)NULL) { 1492 perror("Can't malloc argument space"); 1493 exit(1); 1494 } 1495 (void) sprintf(mav[mac], "-%c", av[i][a]); 1496 ++mac; 1497 } 1498 } else { 1499 /* otherwise, just copy the argument: */ 1500 if ((mav[mac] = malloc(sz + 1)) == (char *)NULL) { 1501 perror("Can't malloc argument space"); 1502 exit(1); 1503 } 1504 (void) strcpy(mav[mac], av[i]); 1505 ++mac; 1506 } 1507 } 1508 1509 mav[mac] = (char *)NULL; 1510 } 1511