1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1987, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #include <limits.h> 34 #include <stddef.h> 35 #include <stdio.h> 36 #include <string.h> 37 38 #include "ctags.h" 39 40 static bool func_entry(void); 41 static void hash_entry(void); 42 static void skip_string(int); 43 static bool str_entry(int); 44 45 /* 46 * c_entries -- 47 * read .c and .h files and call appropriate routines 48 */ 49 void 50 c_entries(void) 51 { 52 int c; /* current character */ 53 int level; /* brace level */ 54 int token; /* if reading a token */ 55 bool t_def; /* if reading a typedef */ 56 int t_level; /* typedef's brace level */ 57 char *sp; /* buffer pointer */ 58 char tok[MAXTOKEN]; /* token buffer */ 59 60 lineftell = ftell(inf); 61 sp = tok; token = t_def = false; t_level = -1; level = 0; lineno = 1; 62 while (GETC(!=, EOF)) { 63 switch (c) { 64 /* 65 * Here's where it DOESN'T handle: { 66 * foo(a) 67 * { 68 * #ifdef notdef 69 * } 70 * #endif 71 * if (a) 72 * puts("hello, world"); 73 * } 74 */ 75 case '{': 76 ++level; 77 goto endtok; 78 case '}': 79 /* 80 * if level goes below zero, try and fix 81 * it, even though we've already messed up 82 */ 83 if (--level < 0) 84 level = 0; 85 goto endtok; 86 87 case '\n': 88 SETLINE; 89 /* 90 * the above 3 cases are similar in that they 91 * are special characters that also end tokens. 92 */ 93 endtok: if (sp > tok) { 94 *sp = EOS; 95 token = true; 96 sp = tok; 97 } 98 else 99 token = false; 100 continue; 101 102 /* 103 * We ignore quoted strings and character constants 104 * completely. 105 */ 106 case '"': 107 case '\'': 108 skip_string(c); 109 break; 110 111 /* 112 * comments can be fun; note the state is unchanged after 113 * return, in case we found: 114 * "foo() XX comment XX { int bar; }" 115 */ 116 case '/': 117 if (GETC(==, '*') || c == '/') { 118 skip_comment(c); 119 continue; 120 } 121 (void)ungetc(c, inf); 122 c = '/'; 123 goto storec; 124 125 /* hash marks flag #define's. */ 126 case '#': 127 if (sp == tok) { 128 hash_entry(); 129 break; 130 } 131 goto storec; 132 133 /* 134 * if we have a current token, parenthesis on 135 * level zero indicates a function. 136 */ 137 case '(': 138 if (!level && token) { 139 int curline; 140 141 if (sp != tok) 142 *sp = EOS; 143 /* 144 * grab the line immediately, we may 145 * already be wrong, for example, 146 * foo\n 147 * (arg1, 148 */ 149 get_line(); 150 curline = lineno; 151 if (func_entry()) { 152 ++level; 153 pfnote(tok, curline); 154 } 155 break; 156 } 157 goto storec; 158 159 /* 160 * semi-colons indicate the end of a typedef; if we find a 161 * typedef we search for the next semi-colon of the same 162 * level as the typedef. Ignoring "structs", they are 163 * tricky, since you can find: 164 * 165 * "typedef long time_t;" 166 * "typedef unsigned int u_int;" 167 * "typedef unsigned int u_int [10];" 168 * 169 * If looking at a typedef, we save a copy of the last token 170 * found. Then, when we find the ';' we take the current 171 * token if it starts with a valid token name, else we take 172 * the one we saved. There's probably some reasonable 173 * alternative to this... 174 */ 175 case ';': 176 if (t_def && level == t_level) { 177 t_def = false; 178 get_line(); 179 if (sp != tok) 180 *sp = EOS; 181 pfnote(tok, lineno); 182 break; 183 } 184 goto storec; 185 186 /* 187 * store characters until one that can't be part of a token 188 * comes along; check the current token against certain 189 * reserved words. 190 */ 191 default: 192 /* ignore whitespace */ 193 if (c == ' ' || c == '\t') { 194 int save = c; 195 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 196 ; 197 if (c == EOF) 198 return; 199 (void)ungetc(c, inf); 200 c = save; 201 } 202 storec: if (!intoken(c)) { 203 if (sp == tok) 204 break; 205 *sp = EOS; 206 if (tflag) { 207 /* no typedefs inside typedefs */ 208 if (!t_def && 209 !memcmp(tok, "typedef",8)) { 210 t_def = true; 211 t_level = level; 212 break; 213 } 214 /* catch "typedef struct" */ 215 if ((!t_def || t_level < level) 216 && (!memcmp(tok, "struct", 7) 217 || !memcmp(tok, "union", 6) 218 || !memcmp(tok, "enum", 5))) { 219 /* 220 * get line immediately; 221 * may change before '{' 222 */ 223 get_line(); 224 if (str_entry(c)) 225 ++level; 226 break; 227 /* } */ 228 } 229 } 230 sp = tok; 231 } 232 else if (sp != tok || begtoken(c)) { 233 if (sp == tok + sizeof tok - 1) 234 /* Too long -- truncate it */ 235 *sp = EOS; 236 else 237 *sp++ = c; 238 token = true; 239 } 240 continue; 241 } 242 243 sp = tok; 244 token = false; 245 } 246 } 247 248 /* 249 * func_entry -- 250 * handle a function reference 251 */ 252 static bool 253 func_entry(void) 254 { 255 int c; /* current character */ 256 int level = 0; /* for matching '()' */ 257 static char attribute[] = "__attribute__"; 258 char maybe_attribute[sizeof attribute + 1], 259 *anext; 260 261 /* 262 * Find the end of the assumed function declaration. 263 * Note that ANSI C functions can have type definitions so keep 264 * track of the parentheses nesting level. 265 */ 266 while (GETC(!=, EOF)) { 267 switch (c) { 268 case '\'': 269 case '"': 270 /* skip strings and character constants */ 271 skip_string(c); 272 break; 273 case '/': 274 /* skip comments */ 275 if (GETC(==, '*') || c == '/') 276 skip_comment(c); 277 break; 278 case '(': 279 level++; 280 break; 281 case ')': 282 if (level == 0) 283 goto fnd; 284 level--; 285 break; 286 case '\n': 287 SETLINE; 288 } 289 } 290 return (false); 291 fnd: 292 /* 293 * we assume that the character after a function's right paren 294 * is a token character if it's a function and a non-token 295 * character if it's a declaration. Comments don't count... 296 */ 297 for (anext = maybe_attribute;;) { 298 while (GETC(!=, EOF) && iswhite(c)) 299 if (c == '\n') 300 SETLINE; 301 if (c == EOF) 302 return false; 303 /* 304 * Recognize the gnu __attribute__ extension, which would 305 * otherwise make the heuristic test DTWT 306 */ 307 if (anext == maybe_attribute) { 308 if (intoken(c)) { 309 *anext++ = c; 310 continue; 311 } 312 } else { 313 if (intoken(c)) { 314 if (anext - maybe_attribute 315 < (ptrdiff_t)(sizeof attribute - 1)) 316 *anext++ = c; 317 else break; 318 continue; 319 } else { 320 *anext++ = '\0'; 321 if (strcmp(maybe_attribute, attribute) == 0) { 322 (void)ungetc(c, inf); 323 return false; 324 } 325 break; 326 } 327 } 328 if (intoken(c) || c == '{') 329 break; 330 if (c == '/' && (GETC(==, '*') || c == '/')) 331 skip_comment(c); 332 else { /* don't ever "read" '/' */ 333 (void)ungetc(c, inf); 334 return (false); 335 } 336 } 337 if (c != '{') 338 (void)skip_key('{'); 339 return (true); 340 } 341 342 /* 343 * hash_entry -- 344 * handle a line starting with a '#' 345 */ 346 static void 347 hash_entry(void) 348 { 349 int c; /* character read */ 350 int curline; /* line started on */ 351 char *sp; /* buffer pointer */ 352 char tok[MAXTOKEN]; /* storage buffer */ 353 354 /* ignore leading whitespace */ 355 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 356 ; 357 (void)ungetc(c, inf); 358 359 curline = lineno; 360 for (sp = tok;;) { /* get next token */ 361 if (GETC(==, EOF)) 362 return; 363 if (iswhite(c)) 364 break; 365 if (sp == tok + sizeof tok - 1) 366 /* Too long -- truncate it */ 367 *sp = EOS; 368 else 369 *sp++ = c; 370 } 371 *sp = EOS; 372 if (memcmp(tok, "define", 6)) /* only interested in #define's */ 373 goto skip; 374 for (;;) { /* this doesn't handle "#define \n" */ 375 if (GETC(==, EOF)) 376 return; 377 if (!iswhite(c)) 378 break; 379 } 380 for (sp = tok;;) { /* get next token */ 381 if (sp == tok + sizeof tok - 1) 382 /* Too long -- truncate it */ 383 *sp = EOS; 384 else 385 *sp++ = c; 386 if (GETC(==, EOF)) 387 return; 388 /* 389 * this is where it DOESN'T handle 390 * "#define \n" 391 */ 392 if (!intoken(c)) 393 break; 394 } 395 *sp = EOS; 396 if (dflag || c == '(') { /* only want macros */ 397 get_line(); 398 pfnote(tok, curline); 399 } 400 skip: if (c == '\n') { /* get rid of rest of define */ 401 SETLINE 402 if (*(sp - 1) != '\\') 403 return; 404 } 405 (void)skip_key('\n'); 406 } 407 408 /* 409 * str_entry -- 410 * handle a struct, union or enum entry 411 */ 412 static bool 413 str_entry(int c) /* c is current character */ 414 { 415 int curline; /* line started on */ 416 char *sp; /* buffer pointer */ 417 char tok[LINE_MAX]; /* storage buffer */ 418 419 curline = lineno; 420 while (iswhite(c)) 421 if (GETC(==, EOF)) 422 return (false); 423 if (c == '{') /* it was "struct {" */ 424 return (true); 425 for (sp = tok;;) { /* get next token */ 426 if (sp == tok + sizeof tok - 1) 427 /* Too long -- truncate it */ 428 *sp = EOS; 429 else 430 *sp++ = c; 431 if (GETC(==, EOF)) 432 return (false); 433 if (!intoken(c)) 434 break; 435 } 436 switch (c) { 437 case '{': /* it was "struct foo{" */ 438 --sp; 439 break; 440 case '\n': /* it was "struct foo\n" */ 441 SETLINE; 442 /*FALLTHROUGH*/ 443 default: /* probably "struct foo " */ 444 while (GETC(!=, EOF)) 445 if (!iswhite(c)) 446 break; 447 if (c != '{') { 448 (void)ungetc(c, inf); 449 return (false); 450 } 451 } 452 *sp = EOS; 453 pfnote(tok, curline); 454 return (true); 455 } 456 457 /* 458 * skip_comment -- 459 * skip over comment 460 */ 461 void 462 skip_comment(int t) /* t is comment character */ 463 { 464 int c; /* character read */ 465 int star; /* '*' flag */ 466 467 for (star = 0; GETC(!=, EOF);) 468 switch(c) { 469 /* comments don't nest, nor can they be escaped. */ 470 case '*': 471 star = true; 472 break; 473 case '/': 474 if (star && t == '*') 475 return; 476 break; 477 case '\n': 478 SETLINE; 479 if (t == '/') 480 return; 481 /*FALLTHROUGH*/ 482 default: 483 star = false; 484 break; 485 } 486 } 487 488 /* 489 * skip_string -- 490 * skip to the end of a string or character constant. 491 */ 492 void 493 skip_string(int key) 494 { 495 int c, 496 skip; 497 498 for (skip = false; GETC(!=, EOF); ) 499 switch (c) { 500 case '\\': /* a backslash escapes anything */ 501 skip = !skip; /* we toggle in case it's "\\" */ 502 break; 503 case '\n': 504 SETLINE; 505 /*FALLTHROUGH*/ 506 default: 507 if (c == key && !skip) 508 return; 509 skip = false; 510 } 511 } 512 513 /* 514 * skip_key -- 515 * skip to next char "key" 516 */ 517 bool 518 skip_key(int key) 519 { 520 int c; 521 bool skip; 522 bool retval; 523 524 for (skip = retval = false; GETC(!=, EOF);) 525 switch(c) { 526 case '\\': /* a backslash escapes anything */ 527 skip = !skip; /* we toggle in case it's "\\" */ 528 break; 529 case ';': /* special case for yacc; if one */ 530 case '|': /* of these chars occurs, we may */ 531 retval = true; /* have moved out of the rule */ 532 break; /* not used by C */ 533 case '\'': 534 case '"': 535 /* skip strings and character constants */ 536 skip_string(c); 537 break; 538 case '/': 539 /* skip comments */ 540 if (GETC(==, '*') || c == '/') { 541 skip_comment(c); 542 break; 543 } 544 (void)ungetc(c, inf); 545 c = '/'; 546 goto norm; 547 case '\n': 548 SETLINE; 549 /*FALLTHROUGH*/ 550 default: 551 norm: 552 if (c == key && !skip) 553 return (retval); 554 skip = false; 555 } 556 return (retval); 557 } 558