1 /* 2 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 /* Copyright (c) 1988 AT&T */ 7 /* All Rights Reserved */ 8 9 /* 10 * Copyright (c) 1980 Regents of the University of California. 11 * All rights reserved. The Berkeley software License Agreement 12 * specifies the terms and conditions for redistribution. 13 */ 14 15 /* 16 * Modify ctags to handle C++ in C_entries(), etc: 17 * - Handles C++ comment token "//" 18 * - Handles C++ scope operator "::". 19 * This helps to distinguish between xyz() 20 * definition and X::xyz() definition. 21 * - Recognizes C++ reserved word "class" in typedef processing 22 * (for "-t" option) 23 * - Handles Sun C++ special file name extensions: .c, .C, .cc, and .cxx. 24 * - Handles overloaded unary/binary operator names 25 * Doesn't handle yet: 26 * - inline functions in class definition (currently they get 27 * swallowed within a class definition) 28 * - Tags with scope operator :: with spaces in between, 29 * e.g. classz ::afunc 30 * 31 * Enhance operator functions support: 32 * - Control flow involving operator tokens scanning are 33 * consistent with that of other function tokens - original 34 * hacking method for 2.0 is removed. This will accurately 35 * identify tags for declarations of the form 'operator+()' 36 * (bugid 1027806) as well as allowing spaces in between 37 * 'operator' and 'oprtk', e.g. 'operator + ()'. 38 * 39 */ 40 41 #ifndef lint 42 char copyright[] = "@(#) Copyright (c) 1980 Regents of the University of " 43 "California.\nAll rights reserved.\n"; 44 #endif 45 46 #pragma ident "%Z%%M% %I% %E% SMI" 47 /* from UCB 5.1 5/31/85 */ 48 49 #include <stdio.h> 50 #include <ctype.h> 51 #include <locale.h> 52 #include <unistd.h> 53 #include <stdlib.h> 54 #include <string.h> 55 #include <limits.h> 56 #include <sys/types.h> 57 #include <sys/stat.h> 58 59 /* 60 * ctags: create a tags file 61 */ 62 63 #define bool char 64 65 #define TRUE (1) 66 #define FALSE (0) 67 68 #define CPFLAG 3 /* # of bytes in a flag */ 69 70 #define iswhite(arg) (_wht[arg]) /* T if char is white */ 71 #define begtoken(arg) (_btk[arg]) /* T if char can start token */ 72 #define intoken(arg) (_itk[arg]) /* T if char can be in token */ 73 #define endtoken(arg) (_etk[arg]) /* T if char ends tokens */ 74 #define isgood(arg) (_gd[arg]) /* T if char can be after ')' */ 75 76 #define optoken(arg) (_opr[arg]) /* T if char can be */ 77 /* an overloaded operator token */ 78 79 #define max(I1, I2) (I1 > I2 ? I1 : I2) 80 81 struct nd_st { /* sorting structure */ 82 char *entry; /* function or type name */ 83 char *file; /* file name */ 84 bool f; /* use pattern or line no */ 85 int lno; /* for -x option */ 86 char *pat; /* search pattern */ 87 bool been_warned; /* set if noticed dup */ 88 struct nd_st *left, *right; /* left and right sons */ 89 }; 90 91 long ftell(); 92 typedef struct nd_st NODE; 93 94 static bool 95 number, /* T if on line starting with # */ 96 gotone, /* found a func already on line */ 97 /* boolean "func" (see init) */ 98 _wht[0177], _etk[0177], _itk[0177], _btk[0177], _gd[0177]; 99 100 /* boolean array for overloadable operator symbols */ 101 static bool _opr[0177]; 102 103 /* 104 * typedefs are recognized using a simple finite automata, 105 * tydef is its state variable. 106 */ 107 typedef enum {none, begin, begin_rec, begin_tag, middle, end } TYST; 108 109 static TYST tydef = none; 110 111 static char searchar = '/'; /* use /.../ searches */ 112 113 static int lineno; /* line number of current line */ 114 static char 115 line[4*BUFSIZ], /* current input line */ 116 *curfile, /* current input file name */ 117 *outfile = "tags", /* output file */ 118 *white = " \f\t\n", /* white chars */ 119 *endtk = " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?", 120 /* token ending chars */ 121 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz", 122 /* token starting chars */ 123 *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz" 124 "0123456789", 125 /* valid in-token chars */ 126 *notgd = ",;"; /* non-valid after-function chars */ 127 128 static char *oprtk = " =-+%*/&|^~!<>[]()"; /* overloadable operators */ 129 130 static int file_num; /* current file number */ 131 static int aflag; /* -a: append to tags */ 132 133 #ifndef XPG4 /* XPG4: handle typedefs by default */ 134 static int tflag; /* -t: create tags for typedefs */ 135 #endif /* !XPG4 */ 136 137 static int uflag; /* -u: update tags */ 138 static int wflag; /* -w: suppress warnings */ 139 static int vflag; /* -v: create vgrind style index output */ 140 static int xflag; /* -x: create cxref style output */ 141 142 static char lbuf[LINE_MAX]; 143 144 static FILE 145 *inf, /* ioptr for current input file */ 146 *outf; /* ioptr for tags file */ 147 148 static long lineftell; /* ftell after getc( inf ) == '\n' */ 149 150 static NODE *head; /* the head of the sorted binary tree */ 151 152 #ifdef __STDC__ 153 char *strrchr(), *strchr(); 154 #else 155 char *rindex(), *index(); 156 #endif 157 158 static int infile_fail; /* Count of bad opens. Fix bug ID #1082298 */ 159 160 static char *dbp = lbuf; 161 static int pfcnt; 162 163 static int mac; /* our modified argc, after parseargs() */ 164 static char **mav; /* our modified argv, after parseargs() */ 165 166 167 /* our local functions: */ 168 static void init(); 169 static void find_entries(char *file); 170 static void pfnote(); 171 static void C_entries(); 172 static int start_entry(char **lp, char *token, int *f); 173 static void Y_entries(); 174 static char *toss_comment(char *start); 175 static void getline(long int where); 176 static void free_tree(NODE *node); 177 static void add_node(NODE *node, NODE *cur_node); 178 static void put_entries(NODE *node); 179 static int PF_funcs(FILE *fi); 180 static int tail(char *cp); 181 static void takeprec(); 182 static void getit(); 183 static char *savestr(char *cp); 184 static void L_funcs(FILE *fi); 185 static void L_getit(int special); 186 static int striccmp(char *str, char *pat); 187 static int first_char(); 188 static void toss_yysec(); 189 static void Usage(); 190 static void parseargs(int ac, char **av); 191 192 int 193 main(int ac, char *av[]) 194 { 195 int i; 196 char cmd[100]; 197 198 (void) setlocale(LC_ALL, ""); 199 #if !defined(TEXT_DOMAIN) 200 #define TEXT_DOMAIN "SYS_TEST" 201 #endif 202 (void) textdomain(TEXT_DOMAIN); 203 204 parseargs(ac, av); 205 206 while ((i = getopt(mac, mav, "aBFtuvwxf:")) != EOF) { 207 switch (i) { 208 case 'a': /* -a: Append output to existing tags file */ 209 aflag++; 210 break; 211 212 case 'B': /* -B: Use backward search patterns (?...?) */ 213 searchar = '?'; 214 break; 215 216 case 'F': /* -F: Use forward search patterns (/.../) */ 217 searchar = '/'; 218 break; 219 220 case 't': /* -t: Create tags for typedefs. */ 221 /* for XPG4 , we silently ignore "-t". */ 222 #ifndef XPG4 223 tflag++; 224 #endif /* !XPG4 */ 225 break; 226 227 case 'u': /* -u: Update the specified tags file */ 228 uflag++; 229 break; 230 231 case 'v': /* -v: Index listing on stdout */ 232 vflag++; 233 xflag++; 234 break; 235 236 case 'w': /* -w: Suppress warnings */ 237 wflag++; 238 break; 239 240 case 'x': /* -x: Produce a simple index */ 241 xflag++; 242 break; 243 244 case 'f': /* -f tagsfile: output to tagsfile */ 245 outfile = strdup(optarg); 246 break; 247 248 default: 249 Usage(); /* never returns */ 250 break; 251 } 252 } 253 254 /* if we didn't specify any source code to parse, complain and die. */ 255 if (optind == mac) { 256 Usage(); /* never returns */ 257 } 258 259 260 init(); /* set up boolean "functions" */ 261 /* 262 * loop through files finding functions 263 */ 264 for (file_num = optind; file_num < mac; file_num++) 265 find_entries(mav[file_num]); 266 267 if (xflag) { 268 put_entries(head); 269 exit(infile_fail > 0 ? 2 : 0); /* Fix for 1082298 */ 270 } 271 if (uflag) { 272 for (i = 1; i < mac; i++) { 273 (void) sprintf(cmd, 274 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS", 275 outfile, mav[i], outfile); 276 (void) system(cmd); 277 } 278 aflag++; 279 } 280 outf = fopen(outfile, aflag ? "a" : "w"); 281 if (outf == NULL) { 282 perror(outfile); 283 exit(1); 284 } 285 put_entries(head); 286 (void) fclose(outf); 287 if (uflag) { 288 (void) sprintf(cmd, "sort %s -o %s", outfile, outfile); 289 (void) system(cmd); 290 } 291 return (infile_fail > 0 ? 2 : 0); /* Fix for #1082298 */ 292 } 293 294 /* 295 * This routine sets up the boolean psuedo-functions which work 296 * by seting boolean flags dependent upon the corresponding character 297 * Every char which is NOT in that string is not a white char. Therefore, 298 * all of the array "_wht" is set to FALSE, and then the elements 299 * subscripted by the chars in "white" are set to TRUE. Thus "_wht" 300 * of a char is TRUE if it is the string "white", else FALSE. 301 */ 302 static void 303 init() 304 { 305 char *sp; 306 int i; 307 308 for (i = 0; i < 0177; i++) { 309 _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE; 310 _opr[i] = FALSE; /* initialize boolean */ 311 /* array of operator symbols */ 312 _gd[i] = TRUE; 313 } 314 for (sp = white; *sp; sp++) 315 _wht[*sp] = TRUE; 316 for (sp = endtk; *sp; sp++) 317 _etk[*sp] = TRUE; 318 for (sp = intk; *sp; sp++) 319 _itk[*sp] = TRUE; 320 for (sp = begtk; *sp; sp++) 321 _btk[*sp] = TRUE; 322 323 /* mark overloadable operator symbols */ 324 for (sp = oprtk; *sp; sp++) 325 _opr[*sp] = TRUE; 326 327 for (sp = notgd; *sp; sp++) 328 _gd[*sp] = FALSE; 329 } 330 331 /* 332 * This routine opens the specified file and calls the function 333 * which finds the function and type definitions. 334 */ 335 static void 336 find_entries(file) 337 char *file; 338 { 339 char *cp; 340 struct stat st; 341 342 /* skip anything that isn't a regular file */ 343 if (stat(file, &st) == 0 && !S_ISREG(st.st_mode)) 344 return; 345 346 if ((inf = fopen(file, "r")) == NULL) { 347 perror(file); 348 infile_fail++; /* Count bad opens. ID #1082298 */ 349 return; 350 } 351 curfile = savestr(file); 352 lineno = 0; 353 #ifdef __STDC__ 354 cp = strrchr(file, '.'); 355 #else 356 cp = rindex(file, '.'); 357 #endif 358 /* .l implies lisp or lex source code */ 359 if (cp && cp[1] == 'l' && cp[2] == '\0') { 360 #ifdef __STDC__ 361 if (strchr(";([", first_char()) != NULL) /* lisp */ 362 #else 363 if (index(";([", first_char()) != NULL) /* lisp */ 364 #endif 365 { 366 L_funcs(inf); 367 (void) fclose(inf); 368 return; 369 } else { /* lex */ 370 /* 371 * throw away all the code before the second "%%" 372 */ 373 toss_yysec(); 374 getline(lineftell); 375 pfnote("yylex", lineno, TRUE); 376 toss_yysec(); 377 C_entries(); 378 (void) fclose(inf); 379 return; 380 } 381 } 382 /* .y implies a yacc file */ 383 if (cp && cp[1] == 'y' && cp[2] == '\0') { 384 toss_yysec(); 385 Y_entries(); 386 C_entries(); 387 (void) fclose(inf); 388 return; 389 } 390 391 /* 392 * Add in file name extension support for Sun C++ which 393 * permits .C/.c (AT&T), .cc (G++) and .cxx (Gloksp.) 394 */ 395 396 /* if not a .c, .C, .cc, .cxx or .h file, try fortran */ 397 if (cp && (cp[1] != 'C' && cp[1] != 'c' && cp[1] != 'h') && 398 cp[2] == '\0' && (strcmp(cp, ".cc") == 0) && 399 (strcmp(cp, ".cxx") == 0)) { 400 if (PF_funcs(inf) != 0) { 401 (void) fclose(inf); 402 return; 403 } 404 rewind(inf); /* no fortran tags found, try C */ 405 } 406 C_entries(); 407 (void) fclose(inf); 408 } 409 410 static void 411 pfnote(name, ln, f) 412 char *name; 413 int ln; 414 bool f; /* f == TRUE when function */ 415 { 416 char *fp; 417 NODE *np; 418 char *nametk; /* hold temporary tokens from name */ 419 char nbuf[BUFSIZ]; 420 421 if ((np = malloc(sizeof (NODE))) == NULL) { 422 (void) fprintf(stderr, 423 gettext("ctags: too many entries to sort\n")); 424 put_entries(head); 425 free_tree(head); 426 head = np = (NODE *) malloc(sizeof (NODE)); 427 } 428 if (xflag == 0 && (strcmp(name, "main") == 0)) { 429 #ifdef __STDC__ 430 fp = strrchr(curfile, '/'); 431 #else 432 fp = rindex(curfile, '/'); 433 #endif 434 if (fp == 0) 435 fp = curfile; 436 else 437 fp++; 438 (void) sprintf(nbuf, "M%s", fp); 439 #ifdef __STDC__ 440 fp = strrchr(nbuf, '.'); 441 #else 442 fp = rindex(nbuf, '.'); 443 #endif 444 /* Chop off .cc and .cxx as well as .c, .h, etc */ 445 if (fp && ((fp[2] == 0) || (fp[2] == 'c' && fp[3] == 0) || 446 (fp[3] == 'x' && fp[4] == 0))) 447 *fp = 0; 448 name = nbuf; 449 } 450 451 /* remove in-between blanks operator function tags */ 452 #ifdef __STDC__ 453 if (strchr(name, ' ') != NULL) 454 #else 455 if (index(name, ' ') != NULL) 456 #endif 457 { 458 (void) strcpy(name, strtok(name, " ")); 459 while (nametk = strtok(0, " ")) 460 (void) strcat(name, nametk); 461 } 462 np->entry = savestr(name); 463 np->file = curfile; 464 np->f = f; 465 np->lno = ln; 466 np->left = np->right = 0; 467 if (xflag == 0) { 468 lbuf[50] = 0; 469 (void) strcat(lbuf, "$"); 470 lbuf[50] = 0; 471 } 472 np->pat = savestr(lbuf); 473 if (head == NULL) 474 head = np; 475 else 476 add_node(np, head); 477 } 478 479 /* 480 * This routine finds functions and typedefs in C syntax and adds them 481 * to the list. 482 */ 483 static void 484 C_entries() 485 { 486 int c; 487 char *token, *tp; 488 bool incomm, inquote, inchar, midtoken, isoperator, optfound; 489 int level; 490 char *sp; 491 char tok[BUFSIZ]; 492 long int tokftell; 493 494 number = gotone = midtoken = inquote = inchar = 495 incomm = isoperator = optfound = FALSE; 496 497 level = 0; 498 sp = tp = token = line; 499 lineno++; 500 lineftell = tokftell = ftell(inf); 501 for (;;) { 502 *sp = c = getc(inf); 503 if (feof(inf)) 504 break; 505 if (c == '\n') { 506 lineftell = ftell(inf); 507 lineno++; 508 } else if (c == '\\') { 509 c = *++sp = getc(inf); 510 if ((c == '\n') || (c == EOF)) { /* c == EOF, 1091005 */ 511 lineftell = ftell(inf); 512 lineno++; 513 c = ' '; 514 } 515 } else if (incomm) { 516 if (c == '*') { 517 while ((*++sp = c = getc(inf)) == '*') 518 continue; 519 520 /* c == EOF 1091005 */ 521 if ((c == '\n') || (c == EOF)) { 522 lineftell = ftell(inf); 523 lineno++; 524 } 525 526 if (c == '/') 527 incomm = FALSE; 528 } 529 } else if (inquote) { 530 /* 531 * Too dumb to know about \" not being magic, but 532 * they usually occur in pairs anyway. 533 */ 534 if (c == '"') 535 inquote = FALSE; 536 continue; 537 } else if (inchar) { 538 if (c == '\'') 539 inchar = FALSE; 540 continue; 541 } else if (midtoken == TRUE) { /* if white space omitted */ 542 goto dotoken; 543 } else switch (c) { 544 case '"': 545 inquote = TRUE; 546 continue; 547 case '\'': 548 inchar = TRUE; 549 continue; 550 case '/': 551 *++sp = c = getc(inf); 552 /* Handles the C++ comment token "//" */ 553 if (c == '*') 554 incomm = TRUE; 555 else if (c == '/') { 556 /* 557 * Skip over all the characters after 558 * "//" until a newline character. Now also 559 * includes fix for 1091005, check for EOF. 560 */ 561 do { 562 c = getc(inf); 563 /* 1091005: */ 564 } while ((c != '\n') && (c != EOF)); 565 566 567 /* 568 * Fixed bugid 1030014 569 * Return the current position of the 570 * file after the newline. 571 */ 572 lineftell = ftell(inf); 573 lineno++; 574 *--sp = c; 575 } 576 else 577 (void) ungetc(*sp, inf); 578 continue; 579 case '#': 580 if (sp == line) 581 number = TRUE; 582 continue; 583 case '{': 584 if ((tydef == begin_rec) || (tydef == begin_tag)) { 585 tydef = middle; 586 } 587 level++; 588 continue; 589 case '}': 590 /* 591 * Heuristic for function or structure end; 592 * common for #ifdef/#else blocks to add extra "{" 593 */ 594 if (sp == line) 595 level = 0; /* reset */ 596 else 597 level--; 598 if (!level && tydef == middle) { 599 tydef = end; 600 } 601 if (!level && tydef == none) /* Fix for #1034126 */ 602 goto dotoken; 603 continue; 604 } 605 606 dotoken: 607 608 609 if (!level && !inquote && !incomm && gotone == FALSE) { 610 if (midtoken) { 611 if (endtoken(c)) { 612 613 /* 614 * 615 * ':' +---> ':' -> midtok 616 * 617 * +---> operator{+,-, etc} -> midtok 618 * (continue) 619 * +---> endtok 620 */ 621 /* 622 * Enhance operator function support and 623 * fix bugid 1027806 624 * 625 * For operator token, scanning will continue until 626 * '(' is found. Spaces between 'operater' and 627 * 'oprtk' are allowed (e.g. 'operator + ()'), but 628 * will be removed when the actual entry for the tag 629 * is made. 630 * Note that functions of the form 'operator ()(int)' 631 * will be recognized, but 'operator ()' will not, 632 * even though this is legitimate in C. 633 */ 634 635 if (optoken(c)) { 636 if (isoperator) { 637 if (optfound) { 638 if (c != '(') { 639 tp++; 640 goto next_char; 641 } 642 } else { 643 if (c != ' ') { 644 optfound = TRUE; 645 } 646 tp++; 647 goto next_char; 648 } 649 } else { 650 /* start: this code shifted left for cstyle */ 651 char *backptr = tp - 7; 652 if (strncmp(backptr, "operator", 8) == 0) { 653 /* This is an overloaded operator */ 654 isoperator = TRUE; 655 if (c != ' ') { 656 optfound = TRUE; 657 } 658 659 tp++; 660 goto next_char; 661 } else if (c == '~') { 662 /* This is a destructor */ 663 tp++; 664 goto next_char; 665 } 666 /* end: above code shifted left for cstyle */ 667 } 668 } else if (c == ':') { 669 if ((*++sp = getc(inf)) == ':') { 670 tp += 2; 671 c = *sp; 672 goto next_char; 673 } else { 674 (void) ungetc (*sp, inf); 675 --sp; 676 } 677 } 678 679 /* start: this code shifted left for cstyle */ 680 { 681 int f; 682 int pfline = lineno; 683 684 if (start_entry(&sp, token, &f)) { 685 (void) strncpy(tok, token, tp-token+1); 686 tok[tp-token+1] = 0; 687 getline(tokftell); 688 pfnote(tok, pfline, f); 689 gotone = f; /* function */ 690 } 691 692 isoperator = optfound = midtoken = FALSE; 693 token = sp; 694 } 695 /* end: above code shifted left for cstyle */ 696 } else if (intoken(c)) 697 tp++; 698 } else if (begtoken(c)) { 699 token = tp = sp; 700 midtoken = TRUE; 701 tokftell = lineftell; 702 } 703 } 704 next_char: 705 if (c == ';' && tydef == end) /* clean with typedefs */ 706 tydef = none; 707 sp++; 708 /* The "c == }" was added to fix #1034126 */ 709 if (c == '\n' ||c == '}'|| sp > &line[sizeof (line) - BUFSIZ]) { 710 tp = token = sp = line; 711 number = gotone = midtoken = inquote = 712 inchar = isoperator = optfound = FALSE; 713 } 714 } 715 } 716 717 /* 718 * This routine checks to see if the current token is 719 * at the start of a function, or corresponds to a typedef 720 * It updates the input line * so that the '(' will be 721 * in it when it returns. 722 */ 723 static int 724 start_entry(lp, token, f) 725 char **lp, *token; 726 int *f; 727 { 728 char *sp; 729 int c; 730 static bool found; 731 bool firsttok; /* T if have seen first token in ()'s */ 732 int bad; 733 734 *f = 1; /* a function */ 735 sp = *lp; 736 c = *sp; 737 bad = FALSE; 738 if (!number) { /* space is not allowed in macro defs */ 739 while (iswhite(c)) { 740 *++sp = c = getc(inf); 741 if ((c == '\n') || (c == EOF)) { /* c==EOF, #1091005 */ 742 lineno++; 743 lineftell = ftell(inf); 744 if (sp > &line[sizeof (line) - BUFSIZ]) 745 goto ret; 746 } 747 } 748 /* the following tries to make it so that a #define a b(c) */ 749 /* doesn't count as a define of b. */ 750 } else { 751 if (strncmp(token, "define", 6) == 0) 752 found = 0; 753 else 754 found++; 755 if (found >= 2) { 756 gotone = TRUE; 757 badone: bad = TRUE; 758 goto ret; 759 } 760 } 761 /* check for the typedef cases */ 762 #ifdef XPG4 763 if (strncmp(token, "typedef", 7) == 0) { 764 #else /* !XPG4 */ 765 if (tflag && (strncmp(token, "typedef", 7) == 0)) { 766 #endif /* XPG4 */ 767 tydef = begin; 768 goto badone; 769 } 770 /* Handles 'class' besides 'struct' etc. */ 771 if (tydef == begin && ((strncmp(token, "struct", 6) == 0) || 772 (strncmp(token, "class", 5) == 0) || 773 (strncmp(token, "union", 5) == 0)|| 774 (strncmp(token, "enum", 4) == 0))) { 775 tydef = begin_rec; 776 goto badone; 777 } 778 if (tydef == begin) { 779 tydef = end; 780 goto badone; 781 } 782 if (tydef == begin_rec) { 783 tydef = begin_tag; 784 goto badone; 785 } 786 if (tydef == begin_tag) { 787 tydef = end; 788 goto gottydef; /* Fall through to "tydef==end" */ 789 } 790 791 gottydef: 792 if (tydef == end) { 793 *f = 0; 794 goto ret; 795 } 796 if (c != '(') 797 goto badone; 798 firsttok = FALSE; 799 while ((*++sp = c = getc(inf)) != ')') { 800 if ((c == '\n') || (c == EOF)) { /* c == EOF Fix for #1091005 */ 801 lineftell = ftell(inf); 802 lineno++; 803 if (sp > &line[sizeof (line) - BUFSIZ]) 804 goto ret; 805 } 806 /* 807 * This line used to confuse ctags: 808 * int (*oldhup)(); 809 * This fixes it. A nonwhite char before the first 810 * token, other than a / (in case of a comment in there) 811 * makes this not a declaration. 812 */ 813 if (begtoken(c) || c == '/') 814 firsttok = TRUE; 815 else if (!iswhite(c) && !firsttok) 816 goto badone; 817 } 818 while (iswhite(*++sp = c = getc(inf))) 819 if ((c == '\n') || (c == EOF)) { /* c == EOF fix for #1091005 */ 820 lineno++; 821 lineftell = ftell(inf); 822 if (sp > &line[sizeof (line) - BUFSIZ]) 823 break; 824 } 825 ret: 826 *lp = --sp; 827 if (c == '\n') 828 lineno--; 829 (void) ungetc(c, inf); 830 return (!bad && (!*f || isgood(c))); 831 /* hack for typedefs */ 832 } 833 834 /* 835 * Y_entries: 836 * Find the yacc tags and put them in. 837 */ 838 static void 839 Y_entries() 840 { 841 char *sp, *orig_sp; 842 int brace; 843 bool in_rule, toklen; 844 char tok[BUFSIZ]; 845 846 brace = 0; 847 getline(lineftell); 848 pfnote("yyparse", lineno, TRUE); 849 while (fgets(line, sizeof (line), inf) != NULL) 850 for (sp = line; *sp; sp++) 851 switch (*sp) { 852 case '\n': 853 lineno++; 854 /* FALLTHROUGH */ 855 case ' ': 856 case '\t': 857 case '\f': 858 case '\r': 859 break; 860 case '"': 861 do { 862 while (*++sp != '"') 863 continue; 864 } while (sp[-1] == '\\'); 865 break; 866 case '\'': 867 do { 868 while (*++sp != '\'') 869 continue; 870 } while (sp[-1] == '\\'); 871 break; 872 case '/': 873 if (*++sp == '*') 874 sp = toss_comment(sp); 875 else 876 --sp; 877 break; 878 case '{': 879 brace++; 880 break; 881 case '}': 882 brace--; 883 break; 884 case '%': 885 if (sp[1] == '%' && sp == line) 886 return; 887 break; 888 case '|': 889 case ';': 890 in_rule = FALSE; 891 break; 892 default: 893 if (brace == 0 && !in_rule && (isalpha(*sp) || 894 *sp == '.' || 895 *sp == '_')) { 896 orig_sp = sp; 897 ++sp; 898 while (isalnum(*sp) || *sp == '_' || 899 *sp == '.') 900 sp++; 901 toklen = sp - orig_sp; 902 while (isspace(*sp)) 903 sp++; 904 if (*sp == ':' || (*sp == '\0' && 905 first_char() == ':')) { 906 (void) strncpy(tok, 907 orig_sp, toklen); 908 tok[toklen] = '\0'; 909 (void) strcpy(lbuf, line); 910 lbuf[strlen(lbuf) - 1] = '\0'; 911 pfnote(tok, lineno, TRUE); 912 in_rule = TRUE; 913 } 914 else 915 sp--; 916 } 917 break; 918 } 919 } 920 921 static char * 922 toss_comment(start) 923 char *start; 924 { 925 char *sp; 926 927 /* 928 * first, see if the end-of-comment is on the same line 929 */ 930 do { 931 #ifdef __STDC__ 932 while ((sp = strchr(start, '*')) != NULL) 933 #else 934 while ((sp = index(start, '*')) != NULL) 935 #endif 936 if (sp[1] == '/') 937 return (++sp); 938 else 939 start = (++sp); 940 start = line; 941 lineno++; 942 } while (fgets(line, sizeof (line), inf) != NULL); 943 944 /* 945 * running this through lint revealed that the original version 946 * of this routine didn't explicitly return something; while 947 * the return value was always used!. so i've added this 948 * next line. 949 */ 950 return (sp); 951 } 952 953 static void 954 getline(where) 955 long int where; 956 { 957 long saveftell = ftell(inf); 958 char *cp; 959 960 (void) fseek(inf, where, 0); 961 (void) fgets(lbuf, sizeof (lbuf), inf); 962 #ifdef __STDC__ 963 cp = strrchr(lbuf, '\n'); 964 #else 965 cp = rindex(lbuf, '\n'); 966 #endif 967 if (cp) 968 *cp = 0; 969 (void) fseek(inf, saveftell, 0); 970 } 971 972 static void 973 free_tree(node) 974 NODE *node; 975 { 976 while (node) { 977 free_tree(node->right); 978 free(node); 979 node = node->left; 980 } 981 } 982 983 static void 984 add_node(node, cur_node) 985 NODE *node, *cur_node; 986 { 987 int dif; 988 989 dif = strcmp(node->entry, cur_node->entry); 990 if (dif == 0) { 991 if (node->file == cur_node->file) { 992 if (!wflag) { 993 (void) fprintf(stderr, 994 gettext("Duplicate entry in file %s, line %d: %s\n"), 995 node->file, lineno, node->entry); 996 (void) fprintf(stderr, 997 gettext("Second entry ignored\n")); 998 } 999 return; 1000 } 1001 if (!cur_node->been_warned) 1002 if (!wflag) { 1003 (void) fprintf(stderr, gettext("Duplicate " 1004 "entry in files %s and %s: %s " 1005 "(Warning only)\n"), 1006 node->file, cur_node->file, 1007 node->entry); 1008 } 1009 cur_node->been_warned = TRUE; 1010 return; 1011 } 1012 1013 if (dif < 0) { 1014 if (cur_node->left != NULL) 1015 add_node(node, cur_node->left); 1016 else 1017 cur_node->left = node; 1018 return; 1019 } 1020 if (cur_node->right != NULL) 1021 add_node(node, cur_node->right); 1022 else 1023 cur_node->right = node; 1024 } 1025 1026 static void 1027 put_entries(node) 1028 NODE *node; 1029 { 1030 char *sp; 1031 1032 if (node == NULL) 1033 return; 1034 put_entries(node->left); 1035 1036 /* 1037 * while the code in the following #ifdef section could be combined, 1038 * it's explicitly separated here to make maintainance easier. 1039 */ 1040 #ifdef XPG4 1041 /* 1042 * POSIX 2003: we no longer have a "-t" flag; the logic is 1043 * automatically assumed to be "turned on" here. 1044 */ 1045 if (xflag == 0) { 1046 (void) fprintf(outf, "%s\t%s\t%c^", 1047 node->entry, node->file, searchar); 1048 for (sp = node->pat; *sp; sp++) 1049 if (*sp == '\\') 1050 (void) fprintf(outf, "\\\\"); 1051 else if (*sp == searchar) 1052 (void) fprintf(outf, "\\%c", searchar); 1053 else 1054 (void) putc(*sp, outf); 1055 (void) fprintf(outf, "%c\n", searchar); 1056 } else if (vflag) 1057 (void) fprintf(stdout, "%s %s %d\n", 1058 node->entry, node->file, (node->lno+63)/64); 1059 else 1060 (void) fprintf(stdout, "%-16s %4d %-16s %s\n", 1061 node->entry, node->lno, node->file, node->pat); 1062 #else /* XPG4 */ 1063 /* 1064 * original way of doing things. "-t" logic is only turned on 1065 * when the user has specified it via a command-line argument. 1066 */ 1067 if (xflag == 0) 1068 if (node->f) { /* a function */ 1069 (void) fprintf(outf, "%s\t%s\t%c^", 1070 node->entry, node->file, searchar); 1071 for (sp = node->pat; *sp; sp++) 1072 if (*sp == '\\') 1073 (void) fprintf(outf, "\\\\"); 1074 else if (*sp == searchar) 1075 (void) fprintf(outf, "\\%c", searchar); 1076 else 1077 (void) putc(*sp, outf); 1078 (void) fprintf(outf, "%c\n", searchar); 1079 } else { /* a typedef; text pattern inadequate */ 1080 (void) fprintf(outf, "%s\t%s\t%d\n", 1081 node->entry, node->file, node->lno); 1082 } else if (vflag) 1083 (void) fprintf(stdout, "%s %s %d\n", 1084 node->entry, node->file, (node->lno+63)/64); 1085 else 1086 (void) fprintf(stdout, "%-16s %4d %-16s %s\n", 1087 node->entry, node->lno, node->file, node->pat); 1088 #endif /* XPG4 */ 1089 put_entries(node->right); 1090 } 1091 1092 1093 static int 1094 PF_funcs(fi) 1095 FILE *fi; 1096 { 1097 1098 pfcnt = 0; 1099 while (fgets(lbuf, sizeof (lbuf), fi)) { 1100 lineno++; 1101 dbp = lbuf; 1102 if (*dbp == '%') dbp++; /* Ratfor escape to fortran */ 1103 while (isspace(*dbp)) 1104 dbp++; 1105 if (*dbp == 0) 1106 continue; 1107 switch (*dbp |' ') { 1108 1109 case 'i': 1110 if (tail("integer")) 1111 takeprec(); 1112 break; 1113 case 'r': 1114 if (tail("real")) 1115 takeprec(); 1116 break; 1117 case 'l': 1118 if (tail("logical")) 1119 takeprec(); 1120 break; 1121 case 'c': 1122 if (tail("complex") || tail("character")) 1123 takeprec(); 1124 break; 1125 case 'd': 1126 if (tail("double")) { 1127 while (isspace(*dbp)) 1128 dbp++; 1129 if (*dbp == 0) 1130 continue; 1131 if (tail("precision")) 1132 break; 1133 continue; 1134 } 1135 break; 1136 } 1137 while (isspace(*dbp)) 1138 dbp++; 1139 if (*dbp == 0) 1140 continue; 1141 switch (*dbp|' ') { 1142 1143 case 'f': 1144 if (tail("function")) 1145 getit(); 1146 continue; 1147 case 's': 1148 if (tail("subroutine")) 1149 getit(); 1150 continue; 1151 case 'p': 1152 if (tail("program")) { 1153 getit(); 1154 continue; 1155 } 1156 if (tail("procedure")) 1157 getit(); 1158 continue; 1159 } 1160 } 1161 return (pfcnt); 1162 } 1163 1164 static int 1165 tail(cp) 1166 char *cp; 1167 { 1168 int len = 0; 1169 1170 while (*cp && (*cp&~' ') == ((*(dbp+len))&~' ')) 1171 cp++, len++; 1172 if (*cp == 0) { 1173 dbp += len; 1174 return (1); 1175 } 1176 return (0); 1177 } 1178 1179 static void 1180 takeprec() 1181 { 1182 1183 while (isspace(*dbp)) 1184 dbp++; 1185 if (*dbp != '*') 1186 return; 1187 dbp++; 1188 while (isspace(*dbp)) 1189 dbp++; 1190 if (!isdigit(*dbp)) { 1191 --dbp; /* force failure */ 1192 return; 1193 } 1194 do 1195 dbp++; 1196 while (isdigit(*dbp)); 1197 } 1198 1199 static void 1200 getit() 1201 { 1202 char *cp; 1203 char c; 1204 char nambuf[BUFSIZ]; 1205 1206 for (cp = lbuf; *cp; cp++) 1207 ; 1208 *--cp = 0; /* zap newline */ 1209 while (isspace(*dbp)) 1210 dbp++; 1211 if (*dbp == 0 || !isalpha(*dbp) || !isascii(*dbp)) 1212 return; 1213 for (cp = dbp+1; *cp && (isalpha(*cp) || isdigit(*cp)); cp++) 1214 continue; 1215 c = cp[0]; 1216 cp[0] = 0; 1217 (void) strcpy(nambuf, dbp); 1218 cp[0] = c; 1219 pfnote(nambuf, lineno, TRUE); 1220 pfcnt++; 1221 } 1222 1223 static char * 1224 savestr(cp) 1225 char *cp; 1226 { 1227 int len; 1228 char *dp; 1229 1230 len = strlen(cp); 1231 dp = (char *)malloc(len+1); 1232 (void) strcpy(dp, cp); 1233 1234 return (dp); 1235 } 1236 1237 #ifndef __STDC__ 1238 /* 1239 * Return the ptr in sp at which the character c last 1240 * appears; NULL if not found 1241 * 1242 * Identical to v7 rindex, included for portability. 1243 */ 1244 1245 static char * 1246 rindex(sp, c) 1247 char *sp, c; 1248 { 1249 char *r; 1250 1251 r = NULL; 1252 do { 1253 if (*sp == c) 1254 r = sp; 1255 } while (*sp++); 1256 return (r); 1257 } 1258 #endif 1259 1260 /* 1261 * lisp tag functions 1262 * just look for (def or (DEF 1263 */ 1264 1265 static void 1266 L_funcs(fi) 1267 FILE *fi; 1268 { 1269 int special; 1270 1271 pfcnt = 0; 1272 while (fgets(lbuf, sizeof (lbuf), fi)) { 1273 lineno++; 1274 dbp = lbuf; 1275 if (dbp[0] == '(' && 1276 (dbp[1] == 'D' || dbp[1] == 'd') && 1277 (dbp[2] == 'E' || dbp[2] == 'e') && 1278 (dbp[3] == 'F' || dbp[3] == 'f')) { 1279 dbp += 4; 1280 if (striccmp(dbp, "method") == 0 || 1281 striccmp(dbp, "wrapper") == 0 || 1282 striccmp(dbp, "whopper") == 0) 1283 special = TRUE; 1284 else 1285 special = FALSE; 1286 while (!isspace(*dbp)) 1287 dbp++; 1288 while (isspace(*dbp)) 1289 dbp++; 1290 L_getit(special); 1291 } 1292 } 1293 } 1294 1295 static void 1296 L_getit(special) 1297 int special; 1298 { 1299 char *cp; 1300 char c; 1301 char nambuf[BUFSIZ]; 1302 1303 for (cp = lbuf; *cp; cp++) 1304 continue; 1305 *--cp = 0; /* zap newline */ 1306 if (*dbp == 0) 1307 return; 1308 if (special) { 1309 #ifdef __STDC__ 1310 if ((cp = strchr(dbp, ')')) == NULL) 1311 #else 1312 if ((cp = index(dbp, ')')) == NULL) 1313 #endif 1314 return; 1315 while (cp >= dbp && *cp != ':') 1316 cp--; 1317 if (cp < dbp) 1318 return; 1319 dbp = cp; 1320 while (*cp && *cp != ')' && *cp != ' ') 1321 cp++; 1322 } 1323 else 1324 for (cp = dbp + 1; *cp && *cp != '(' && *cp != ' '; cp++) 1325 continue; 1326 c = cp[0]; 1327 cp[0] = 0; 1328 (void) strcpy(nambuf, dbp); 1329 cp[0] = c; 1330 pfnote(nambuf, lineno, TRUE); 1331 pfcnt++; 1332 } 1333 1334 /* 1335 * striccmp: 1336 * Compare two strings over the length of the second, ignoring 1337 * case distinctions. If they are the same, return 0. If they 1338 * are different, return the difference of the first two different 1339 * characters. It is assumed that the pattern (second string) is 1340 * completely lower case. 1341 */ 1342 static int 1343 striccmp(str, pat) 1344 char *str, *pat; 1345 { 1346 int c1; 1347 1348 while (*pat) { 1349 if (isupper(*str)) 1350 c1 = tolower(*str); 1351 else 1352 c1 = *str; 1353 if (c1 != *pat) 1354 return (c1 - *pat); 1355 pat++; 1356 str++; 1357 } 1358 return (0); 1359 } 1360 1361 /* 1362 * first_char: 1363 * Return the first non-blank character in the file. After 1364 * finding it, rewind the input file so we start at the beginning 1365 * again. 1366 */ 1367 static int 1368 first_char() 1369 { 1370 int c; 1371 long off; 1372 1373 off = ftell(inf); 1374 while ((c = getc(inf)) != EOF) 1375 if (!isspace(c) && c != '\r') { 1376 (void) fseek(inf, off, 0); 1377 return (c); 1378 } 1379 (void) fseek(inf, off, 0); 1380 return (EOF); 1381 } 1382 1383 /* 1384 * toss_yysec: 1385 * Toss away code until the next "%%" line. 1386 */ 1387 static void 1388 toss_yysec() 1389 { 1390 char buf[BUFSIZ]; 1391 1392 for (;;) { 1393 lineftell = ftell(inf); 1394 if (fgets(buf, BUFSIZ, inf) == NULL) 1395 return; 1396 lineno++; 1397 if (strncmp(buf, "%%", 2) == 0) 1398 return; 1399 } 1400 } 1401 1402 static void 1403 Usage() 1404 { 1405 #ifdef XPG4 1406 (void) fprintf(stderr, gettext("Usage:\tctags [-aBFuvw] " 1407 #else /* !XPG4 */ 1408 (void) fprintf(stderr, gettext("Usage:\tctags [-aBFtuvw] " 1409 #endif /* XPG4 */ 1410 "[-f tagsfile] file ...\n")); 1411 (void) fprintf(stderr, gettext("OR:\tctags [-x] file ...\n")); 1412 exit(1); 1413 } 1414 1415 1416 /* 1417 * parseargs(): modify the args 1418 * the purpose of this routine is to transform any ancient argument 1419 * usage into a format which is acceptable to getopt(3C), so that we 1420 * retain backwards Solaris 2.[0-4] compatibility. 1421 * 1422 * This routine allows us to make full use of getopts, without any 1423 * funny argument processing in main(). 1424 * 1425 * The other alternative would be to hand-craft the processed arguments 1426 * during and after getopt(3C) - which usually leads to uglier code 1427 * in main(). I've opted to keep the ugliness isolated down here, 1428 * instead of in main(). 1429 * 1430 * In a nutshell, if the user has used the old Solaris syntax of: 1431 * ctags [-aBFtuvwx] [-f tagsfile] filename ... 1432 * We simply change this into: 1433 * ctags [-a] [-B] [-F] [-t] [-u] [-v] [-w] [-x] [-f tags] file... 1434 * 1435 * If the user has specified the new getopt(3C) syntax, we merely 1436 * copy that into our modified argument space. 1437 */ 1438 static void 1439 parseargs(ac, av) 1440 int ac; /* argument count */ 1441 char **av; /* ptr to original argument space */ 1442 { 1443 int i; /* current argument */ 1444 int a; /* used to parse combined arguments */ 1445 int fflag; /* 1 = we're only parsing filenames */ 1446 size_t sz; /* size of the argument */ 1447 size_t mav_sz; /* size of our psuedo argument space */ 1448 1449 i = mac = fflag = 0; /* proper initializations */ 1450 1451 mav_sz = ((ac + 1) * sizeof (char *)); 1452 if ((mav = malloc(mav_sz)) == (char **)NULL) { 1453 perror("Can't malloc argument space"); 1454 exit(1); 1455 } 1456 1457 /* for each argument, see if we need to change things: */ 1458 for (; (av[i] != (char *)NULL) && (av[i][0] != (char)NULL); i++) { 1459 1460 if (strcmp(av[i], "--") == 0) { 1461 fflag = 1; /* just handle filenames now */ 1462 } 1463 1464 sz = strlen(&av[i][0]); /* get this arg's size */ 1465 1466 /* 1467 * if the argument starts with a "-", and has more than 1468 * 1 flag, then we have to search through each character, 1469 * and separate any flags which have been combined. 1470 * 1471 * so, if we've found a "-" string which needs separating: 1472 */ 1473 if (fflag == 0 && /* not handling filename args */ 1474 av[i][0] == '-' && /* and this is a flag */ 1475 sz > 2) { /* and there's more than 1 flag */ 1476 /* then for each flag after the "-" sign: */ 1477 for (a = 1; av[i][a]; a++) { 1478 /* copy the flag into mav space. */ 1479 if (a > 1) { 1480 /* 1481 * we need to call realloc() after the 1482 * 1st combined flag, because "ac" 1483 * doesn't include combined args. 1484 */ 1485 mav_sz += sizeof (char *); 1486 if ((mav = realloc(mav, mav_sz)) == 1487 (char **)NULL) { 1488 perror("Can't realloc " 1489 "argument space"); 1490 exit(1); 1491 } 1492 } 1493 1494 if ((mav[mac] = malloc((size_t)CPFLAG)) == 1495 (char *)NULL) { 1496 perror("Can't malloc argument space"); 1497 exit(1); 1498 } 1499 (void) sprintf(mav[mac], "-%c", av[i][a]); 1500 ++mac; 1501 } 1502 } else { 1503 /* otherwise, just copy the argument: */ 1504 if ((mav[mac] = malloc(sz + 1)) == (char *)NULL) { 1505 perror("Can't malloc argument space"); 1506 exit(1); 1507 } 1508 (void) strcpy(mav[mac], av[i]); 1509 ++mac; 1510 } 1511 } 1512 1513 mav[mac] = (char *)NULL; 1514 } 1515