1 /* 2 * Do C preprocessing, based on a token list gathered by 3 * the tokenizer. 4 * 5 * This may not be the smartest preprocessor on the planet. 6 * 7 * Copyright (C) 2003 Transmeta Corp. 8 * 2003-2004 Linus Torvalds 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 */ 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <stdarg.h> 31 #include <stddef.h> 32 #include <string.h> 33 #include <ctype.h> 34 #include <unistd.h> 35 #include <fcntl.h> 36 #include <limits.h> 37 #include <time.h> 38 #include <dirent.h> 39 #include <sys/stat.h> 40 41 #include "lib.h" 42 #include "allocate.h" 43 #include "parse.h" 44 #include "token.h" 45 #include "symbol.h" 46 #include "expression.h" 47 #include "scope.h" 48 49 static struct ident_list *macros; // only needed for -dD 50 static int false_nesting = 0; 51 static int counter_macro = 0; // __COUNTER__ expansion 52 53 #define INCLUDEPATHS 300 54 const char *includepath[INCLUDEPATHS+1] = { 55 "", 56 "/usr/include", 57 "/usr/local/include", 58 NULL 59 }; 60 61 static const char **quote_includepath = includepath; 62 static const char **angle_includepath = includepath + 1; 63 static const char **isys_includepath = includepath + 1; 64 static const char **sys_includepath = includepath + 1; 65 static const char **dirafter_includepath = includepath + 3; 66 67 #define dirty_stream(stream) \ 68 do { \ 69 if (!stream->dirty) { \ 70 stream->dirty = 1; \ 71 if (!stream->ifndef) \ 72 stream->protect = NULL; \ 73 } \ 74 } while(0) 75 76 #define end_group(stream) \ 77 do { \ 78 if (stream->ifndef == stream->top_if) { \ 79 stream->ifndef = NULL; \ 80 if (!stream->dirty) \ 81 stream->protect = NULL; \ 82 else if (stream->protect) \ 83 stream->dirty = 0; \ 84 } \ 85 } while(0) 86 87 #define nesting_error(stream) \ 88 do { \ 89 stream->dirty = 1; \ 90 stream->ifndef = NULL; \ 91 stream->protect = NULL; \ 92 } while(0) 93 94 static struct token *alloc_token(struct position *pos) 95 { 96 struct token *token = __alloc_token(0); 97 98 token->pos.stream = pos->stream; 99 token->pos.line = pos->line; 100 token->pos.pos = pos->pos; 101 token->pos.whitespace = 1; 102 return token; 103 } 104 105 /* Expand symbol 'sym' at '*list' */ 106 static int expand(struct token **, struct symbol *); 107 108 static void replace_with_string(struct token *token, const char *str) 109 { 110 int size = strlen(str) + 1; 111 struct string *s = __alloc_string(size); 112 113 s->length = size; 114 memcpy(s->data, str, size); 115 token_type(token) = TOKEN_STRING; 116 token->string = s; 117 } 118 119 static void replace_with_integer(struct token *token, unsigned int val) 120 { 121 char *buf = __alloc_bytes(11); 122 sprintf(buf, "%u", val); 123 token_type(token) = TOKEN_NUMBER; 124 token->number = buf; 125 } 126 127 static struct symbol *lookup_macro(struct ident *ident) 128 { 129 struct symbol *sym = lookup_symbol(ident, NS_MACRO | NS_UNDEF); 130 if (sym && sym->namespace != NS_MACRO) 131 sym = NULL; 132 return sym; 133 } 134 135 static int token_defined(struct token *token) 136 { 137 if (token_type(token) == TOKEN_IDENT) { 138 struct symbol *sym = lookup_macro(token->ident); 139 if (sym) { 140 sym->used_in = file_scope; 141 return 1; 142 } 143 return 0; 144 } 145 146 sparse_error(token->pos, "expected preprocessor identifier"); 147 return 0; 148 } 149 150 static void replace_with_defined(struct token *token) 151 { 152 static const char *string[] = { "0", "1" }; 153 int defined = token_defined(token); 154 155 token_type(token) = TOKEN_NUMBER; 156 token->number = string[defined]; 157 } 158 159 static int expand_one_symbol(struct token **list) 160 { 161 struct token *token = *list; 162 struct symbol *sym; 163 static char buffer[12]; /* __DATE__: 3 + ' ' + 2 + ' ' + 4 + '\0' */ 164 static time_t t = 0; 165 166 if (token->pos.noexpand) 167 return 1; 168 169 sym = lookup_macro(token->ident); 170 if (sym) { 171 store_macro_pos(token); 172 sym->used_in = file_scope; 173 return expand(list, sym); 174 } 175 if (token->ident == &__LINE___ident) { 176 replace_with_integer(token, token->pos.line); 177 } else if (token->ident == &__FILE___ident) { 178 replace_with_string(token, stream_name(token->pos.stream)); 179 } else if (token->ident == &__DATE___ident) { 180 if (!t) 181 time(&t); 182 strftime(buffer, 12, "%b %e %Y", localtime(&t)); 183 replace_with_string(token, buffer); 184 } else if (token->ident == &__TIME___ident) { 185 if (!t) 186 time(&t); 187 strftime(buffer, 9, "%T", localtime(&t)); 188 replace_with_string(token, buffer); 189 } else if (token->ident == &__COUNTER___ident) { 190 replace_with_integer(token, counter_macro++); 191 } 192 return 1; 193 } 194 195 static inline struct token *scan_next(struct token **where) 196 { 197 struct token *token = *where; 198 if (token_type(token) != TOKEN_UNTAINT) 199 return token; 200 do { 201 token->ident->tainted = 0; 202 token = token->next; 203 } while (token_type(token) == TOKEN_UNTAINT); 204 *where = token; 205 return token; 206 } 207 208 static void expand_list(struct token **list) 209 { 210 struct token *next; 211 while (!eof_token(next = scan_next(list))) { 212 if (token_type(next) != TOKEN_IDENT || expand_one_symbol(list)) 213 list = &next->next; 214 } 215 } 216 217 static void preprocessor_line(struct stream *stream, struct token **line); 218 219 static struct token *collect_arg(struct token *prev, int vararg, struct position *pos, int count) 220 { 221 struct stream *stream = input_streams + prev->pos.stream; 222 struct token **p = &prev->next; 223 struct token *next; 224 int nesting = 0; 225 226 while (!eof_token(next = scan_next(p))) { 227 if (next->pos.newline && match_op(next, '#')) { 228 if (!next->pos.noexpand) { 229 sparse_error(next->pos, 230 "directive in argument list"); 231 preprocessor_line(stream, p); 232 __free_token(next); /* Free the '#' token */ 233 continue; 234 } 235 } 236 switch (token_type(next)) { 237 case TOKEN_STREAMEND: 238 case TOKEN_STREAMBEGIN: 239 *p = &eof_token_entry; 240 return next; 241 case TOKEN_STRING: 242 case TOKEN_WIDE_STRING: 243 if (count > 1) 244 next->string->immutable = 1; 245 break; 246 } 247 if (false_nesting) { 248 *p = next->next; 249 __free_token(next); 250 continue; 251 } 252 if (match_op(next, '(')) { 253 nesting++; 254 } else if (match_op(next, ')')) { 255 if (!nesting--) 256 break; 257 } else if (match_op(next, ',') && !nesting && !vararg) { 258 break; 259 } 260 next->pos.stream = pos->stream; 261 next->pos.line = pos->line; 262 next->pos.pos = pos->pos; 263 p = &next->next; 264 } 265 *p = &eof_token_entry; 266 return next; 267 } 268 269 /* 270 * We store arglist as <counter> [arg1] <number of uses for arg1> ... eof 271 */ 272 273 struct arg { 274 struct token *arg; 275 struct token *expanded; 276 struct token *str; 277 int n_normal; 278 int n_quoted; 279 int n_str; 280 }; 281 282 static int collect_arguments(struct token *start, struct token *arglist, struct arg *args, struct token *what) 283 { 284 int wanted = arglist->count.normal; 285 struct token *next = NULL; 286 int count = 0; 287 288 arglist = arglist->next; /* skip counter */ 289 290 if (!wanted) { 291 next = collect_arg(start, 0, &what->pos, 0); 292 if (eof_token(next)) 293 goto Eclosing; 294 if (!eof_token(start->next) || !match_op(next, ')')) { 295 count++; 296 goto Emany; 297 } 298 } else { 299 for (count = 0; count < wanted; count++) { 300 struct argcount *p = &arglist->next->count; 301 next = collect_arg(start, p->vararg, &what->pos, p->normal); 302 if (eof_token(next)) 303 goto Eclosing; 304 if (p->vararg && wanted == 1 && eof_token(start->next)) 305 break; 306 arglist = arglist->next->next; 307 args[count].arg = start->next; 308 args[count].n_normal = p->normal; 309 args[count].n_quoted = p->quoted; 310 args[count].n_str = p->str; 311 if (match_op(next, ')')) { 312 count++; 313 break; 314 } 315 start = next; 316 } 317 if (count == wanted && !match_op(next, ')')) 318 goto Emany; 319 if (count == wanted - 1) { 320 struct argcount *p = &arglist->next->count; 321 if (!p->vararg) 322 goto Efew; 323 args[count].arg = NULL; 324 args[count].n_normal = p->normal; 325 args[count].n_quoted = p->quoted; 326 args[count].n_str = p->str; 327 } 328 if (count < wanted - 1) 329 goto Efew; 330 } 331 what->next = next->next; 332 return 1; 333 334 Efew: 335 sparse_error(what->pos, "macro \"%s\" requires %d arguments, but only %d given", 336 show_token(what), wanted, count); 337 goto out; 338 Emany: 339 while (match_op(next, ',')) { 340 next = collect_arg(next, 0, &what->pos, 0); 341 count++; 342 } 343 if (eof_token(next)) 344 goto Eclosing; 345 sparse_error(what->pos, "macro \"%s\" passed %d arguments, but takes just %d", 346 show_token(what), count, wanted); 347 goto out; 348 Eclosing: 349 sparse_error(what->pos, "unterminated argument list invoking macro \"%s\"", 350 show_token(what)); 351 out: 352 what->next = next->next; 353 return 0; 354 } 355 356 static struct token *dup_list(struct token *list) 357 { 358 struct token *res = NULL; 359 struct token **p = &res; 360 361 while (!eof_token(list)) { 362 struct token *newtok = __alloc_token(0); 363 *newtok = *list; 364 *p = newtok; 365 p = &newtok->next; 366 list = list->next; 367 } 368 return res; 369 } 370 371 static const char *show_token_sequence(struct token *token, int quote) 372 { 373 static char buffer[MAX_STRING]; 374 char *ptr = buffer; 375 int whitespace = 0; 376 377 if (!token && !quote) 378 return "<none>"; 379 while (!eof_token(token)) { 380 const char *val = quote ? quote_token(token) : show_token(token); 381 int len = strlen(val); 382 383 if (ptr + whitespace + len >= buffer + sizeof(buffer)) { 384 sparse_error(token->pos, "too long token expansion"); 385 break; 386 } 387 388 if (whitespace) 389 *ptr++ = ' '; 390 memcpy(ptr, val, len); 391 ptr += len; 392 token = token->next; 393 whitespace = token->pos.whitespace; 394 } 395 *ptr = 0; 396 return buffer; 397 } 398 399 static struct token *stringify(struct token *arg) 400 { 401 const char *s = show_token_sequence(arg, 1); 402 int size = strlen(s)+1; 403 struct token *token = __alloc_token(0); 404 struct string *string = __alloc_string(size); 405 406 memcpy(string->data, s, size); 407 string->length = size; 408 token->pos = arg->pos; 409 token_type(token) = TOKEN_STRING; 410 token->string = string; 411 token->next = &eof_token_entry; 412 return token; 413 } 414 415 static void expand_arguments(int count, struct arg *args) 416 { 417 int i; 418 for (i = 0; i < count; i++) { 419 struct token *arg = args[i].arg; 420 if (!arg) 421 arg = &eof_token_entry; 422 if (args[i].n_str) 423 args[i].str = stringify(arg); 424 if (args[i].n_normal) { 425 if (!args[i].n_quoted) { 426 args[i].expanded = arg; 427 args[i].arg = NULL; 428 } else if (eof_token(arg)) { 429 args[i].expanded = arg; 430 } else { 431 args[i].expanded = dup_list(arg); 432 } 433 expand_list(&args[i].expanded); 434 } 435 } 436 } 437 438 /* 439 * Possibly valid combinations: 440 * - ident + ident -> ident 441 * - ident + number -> ident unless number contains '.', '+' or '-'. 442 * - 'L' + char constant -> wide char constant 443 * - 'L' + string literal -> wide string literal 444 * - number + number -> number 445 * - number + ident -> number 446 * - number + '.' -> number 447 * - number + '+' or '-' -> number, if number used to end on [eEpP]. 448 * - '.' + number -> number, if number used to start with a digit. 449 * - special + special -> either special or an error. 450 */ 451 static enum token_type combine(struct token *left, struct token *right, char *p) 452 { 453 int len; 454 enum token_type t1 = token_type(left), t2 = token_type(right); 455 456 if (t1 != TOKEN_IDENT && t1 != TOKEN_NUMBER && t1 != TOKEN_SPECIAL) 457 return TOKEN_ERROR; 458 459 if (t1 == TOKEN_IDENT && left->ident == &L_ident) { 460 if (t2 >= TOKEN_CHAR && t2 < TOKEN_WIDE_CHAR) 461 return t2 + TOKEN_WIDE_CHAR - TOKEN_CHAR; 462 if (t2 == TOKEN_STRING) 463 return TOKEN_WIDE_STRING; 464 } 465 466 if (t2 != TOKEN_IDENT && t2 != TOKEN_NUMBER && t2 != TOKEN_SPECIAL) 467 return TOKEN_ERROR; 468 469 strcpy(p, show_token(left)); 470 strcat(p, show_token(right)); 471 len = strlen(p); 472 473 if (len >= 256) 474 return TOKEN_ERROR; 475 476 if (t1 == TOKEN_IDENT) { 477 if (t2 == TOKEN_SPECIAL) 478 return TOKEN_ERROR; 479 if (t2 == TOKEN_NUMBER && strpbrk(p, "+-.")) 480 return TOKEN_ERROR; 481 return TOKEN_IDENT; 482 } 483 484 if (t1 == TOKEN_NUMBER) { 485 if (t2 == TOKEN_SPECIAL) { 486 switch (right->special) { 487 case '.': 488 break; 489 case '+': case '-': 490 if (strchr("eEpP", p[len - 2])) 491 break; 492 default: 493 return TOKEN_ERROR; 494 } 495 } 496 return TOKEN_NUMBER; 497 } 498 499 if (p[0] == '.' && isdigit((unsigned char)p[1])) 500 return TOKEN_NUMBER; 501 502 return TOKEN_SPECIAL; 503 } 504 505 static int merge(struct token *left, struct token *right) 506 { 507 static char buffer[512]; 508 enum token_type res = combine(left, right, buffer); 509 int n; 510 511 switch (res) { 512 case TOKEN_IDENT: 513 left->ident = built_in_ident(buffer); 514 left->pos.noexpand = 0; 515 return 1; 516 517 case TOKEN_NUMBER: { 518 char *number = __alloc_bytes(strlen(buffer) + 1); 519 memcpy(number, buffer, strlen(buffer) + 1); 520 token_type(left) = TOKEN_NUMBER; /* could be . + num */ 521 left->number = number; 522 return 1; 523 } 524 525 case TOKEN_SPECIAL: 526 if (buffer[2] && buffer[3]) 527 break; 528 for (n = SPECIAL_BASE; n < SPECIAL_ARG_SEPARATOR; n++) { 529 if (!memcmp(buffer, combinations[n-SPECIAL_BASE], 3)) { 530 left->special = n; 531 return 1; 532 } 533 } 534 break; 535 536 case TOKEN_WIDE_CHAR: 537 case TOKEN_WIDE_STRING: 538 token_type(left) = res; 539 left->pos.noexpand = 0; 540 left->string = right->string; 541 return 1; 542 543 case TOKEN_WIDE_CHAR_EMBEDDED_0 ... TOKEN_WIDE_CHAR_EMBEDDED_3: 544 token_type(left) = res; 545 left->pos.noexpand = 0; 546 memcpy(left->embedded, right->embedded, 4); 547 return 1; 548 549 default: 550 ; 551 } 552 sparse_error(left->pos, "'##' failed: concatenation is not a valid token"); 553 return 0; 554 } 555 556 static struct token *dup_token(struct token *token, struct position *streampos) 557 { 558 struct token *alloc = alloc_token(streampos); 559 token_type(alloc) = token_type(token); 560 alloc->pos.newline = token->pos.newline; 561 alloc->pos.whitespace = token->pos.whitespace; 562 alloc->number = token->number; 563 alloc->pos.noexpand = token->pos.noexpand; 564 return alloc; 565 } 566 567 static struct token **copy(struct token **where, struct token *list, int *count) 568 { 569 int need_copy = --*count; 570 while (!eof_token(list)) { 571 struct token *token; 572 if (need_copy) 573 token = dup_token(list, &list->pos); 574 else 575 token = list; 576 if (token_type(token) == TOKEN_IDENT && token->ident->tainted) 577 token->pos.noexpand = 1; 578 *where = token; 579 where = &token->next; 580 list = list->next; 581 } 582 *where = &eof_token_entry; 583 return where; 584 } 585 586 static int handle_kludge(struct token **p, struct arg *args) 587 { 588 struct token *t = (*p)->next->next; 589 while (1) { 590 struct arg *v = &args[t->argnum]; 591 if (token_type(t->next) != TOKEN_CONCAT) { 592 if (v->arg) { 593 /* ignore the first ## */ 594 *p = (*p)->next; 595 return 0; 596 } 597 /* skip the entire thing */ 598 *p = t; 599 return 1; 600 } 601 if (v->arg && !eof_token(v->arg)) 602 return 0; /* no magic */ 603 t = t->next->next; 604 } 605 } 606 607 static struct token **substitute(struct token **list, struct token *body, struct arg *args) 608 { 609 struct position *base_pos = &(*list)->pos; 610 int *count; 611 enum {Normal, Placeholder, Concat} state = Normal; 612 613 for (; !eof_token(body); body = body->next) { 614 struct token *added, *arg; 615 struct token **tail; 616 struct token *t; 617 618 switch (token_type(body)) { 619 case TOKEN_GNU_KLUDGE: 620 /* 621 * GNU kludge: if we had <comma>##<vararg>, behaviour 622 * depends on whether we had enough arguments to have 623 * a vararg. If we did, ## is just ignored. Otherwise 624 * both , and ## are ignored. Worse, there can be 625 * an arbitrary number of ##<arg> in between; if all of 626 * those are empty, we act as if they hadn't been there, 627 * otherwise we act as if the kludge didn't exist. 628 */ 629 t = body; 630 if (handle_kludge(&body, args)) { 631 if (state == Concat) 632 state = Normal; 633 else 634 state = Placeholder; 635 continue; 636 } 637 added = dup_token(t, base_pos); 638 token_type(added) = TOKEN_SPECIAL; 639 tail = &added->next; 640 break; 641 642 case TOKEN_STR_ARGUMENT: 643 arg = args[body->argnum].str; 644 count = &args[body->argnum].n_str; 645 goto copy_arg; 646 647 case TOKEN_QUOTED_ARGUMENT: 648 arg = args[body->argnum].arg; 649 count = &args[body->argnum].n_quoted; 650 if (!arg || eof_token(arg)) { 651 if (state == Concat) 652 state = Normal; 653 else 654 state = Placeholder; 655 continue; 656 } 657 goto copy_arg; 658 659 case TOKEN_MACRO_ARGUMENT: 660 arg = args[body->argnum].expanded; 661 count = &args[body->argnum].n_normal; 662 if (eof_token(arg)) { 663 state = Normal; 664 continue; 665 } 666 copy_arg: 667 tail = copy(&added, arg, count); 668 added->pos.newline = body->pos.newline; 669 added->pos.whitespace = body->pos.whitespace; 670 break; 671 672 case TOKEN_CONCAT: 673 if (state == Placeholder) 674 state = Normal; 675 else 676 state = Concat; 677 continue; 678 679 case TOKEN_IDENT: 680 added = dup_token(body, base_pos); 681 if (added->ident->tainted) 682 added->pos.noexpand = 1; 683 tail = &added->next; 684 break; 685 686 default: 687 added = dup_token(body, base_pos); 688 tail = &added->next; 689 break; 690 } 691 692 /* 693 * if we got to doing real concatenation, we already have 694 * added something into the list, so containing_token() is OK. 695 */ 696 if (state == Concat && merge(containing_token(list), added)) { 697 *list = added->next; 698 if (tail != &added->next) 699 list = tail; 700 } else { 701 *list = added; 702 list = tail; 703 } 704 state = Normal; 705 } 706 *list = &eof_token_entry; 707 return list; 708 } 709 710 static int expand(struct token **list, struct symbol *sym) 711 { 712 struct token *last; 713 struct token *token = *list; 714 struct ident *expanding = token->ident; 715 struct token **tail; 716 int nargs = sym->arglist ? sym->arglist->count.normal : 0; 717 struct arg args[nargs]; 718 719 if (expanding->tainted) { 720 token->pos.noexpand = 1; 721 return 1; 722 } 723 724 if (sym->arglist) { 725 if (!match_op(scan_next(&token->next), '(')) 726 return 1; 727 if (!collect_arguments(token->next, sym->arglist, args, token)) 728 return 1; 729 expand_arguments(nargs, args); 730 } 731 732 expanding->tainted = 1; 733 734 last = token->next; 735 tail = substitute(list, sym->expansion, args); 736 /* 737 * Note that it won't be eof - at least TOKEN_UNTAINT will be there. 738 * We still can lose the newline flag if the sucker expands to nothing, 739 * but the price of dealing with that is probably too high (we'd need 740 * to collect the flags during scan_next()) 741 */ 742 (*list)->pos.newline = token->pos.newline; 743 (*list)->pos.whitespace = token->pos.whitespace; 744 *tail = last; 745 746 return 0; 747 } 748 749 static const char *token_name_sequence(struct token *token, int endop, struct token *start) 750 { 751 static char buffer[256]; 752 char *ptr = buffer; 753 754 while (!eof_token(token) && !match_op(token, endop)) { 755 int len; 756 const char *val = token->string->data; 757 if (token_type(token) != TOKEN_STRING) 758 val = show_token(token); 759 len = strlen(val); 760 memcpy(ptr, val, len); 761 ptr += len; 762 token = token->next; 763 } 764 *ptr = 0; 765 if (endop && !match_op(token, endop)) 766 sparse_error(start->pos, "expected '>' at end of filename"); 767 return buffer; 768 } 769 770 static int already_tokenized(const char *path) 771 { 772 int stream, next; 773 774 for (stream = *hash_stream(path); stream >= 0 ; stream = next) { 775 struct stream *s = input_streams + stream; 776 777 next = s->next_stream; 778 if (s->once) { 779 if (strcmp(path, s->name)) 780 continue; 781 return 1; 782 } 783 if (s->constant != CONSTANT_FILE_YES) 784 continue; 785 if (strcmp(path, s->name)) 786 continue; 787 if (s->protect && !lookup_macro(s->protect)) 788 continue; 789 return 1; 790 } 791 return 0; 792 } 793 794 /* Handle include of header files. 795 * The relevant options are made compatible with gcc. The only options that 796 * are not supported is -withprefix and friends. 797 * 798 * Three set of include paths are known: 799 * quote_includepath: Path to search when using #include "file.h" 800 * angle_includepath: Paths to search when using #include <file.h> 801 * isys_includepath: Paths specified with -isystem, come before the 802 * built-in system include paths. Gcc would suppress 803 * warnings from system headers. Here we separate 804 * them from the angle_ ones to keep search ordering. 805 * 806 * sys_includepath: Built-in include paths. 807 * dirafter_includepath Paths added with -dirafter. 808 * 809 * The above is implemented as one array with pointers 810 * +--------------+ 811 * quote_includepath ---> | | 812 * +--------------+ 813 * | | 814 * +--------------+ 815 * angle_includepath ---> | | 816 * +--------------+ 817 * isys_includepath ---> | | 818 * +--------------+ 819 * sys_includepath ---> | | 820 * +--------------+ 821 * dirafter_includepath -> | | 822 * +--------------+ 823 * 824 * -I dir insert dir just before isys_includepath and move the rest 825 * -I- makes all dirs specified with -I before to quote dirs only and 826 * angle_includepath is set equal to isys_includepath. 827 * -nostdinc removes all sys dirs by storing NULL in entry pointed 828 * to by * sys_includepath. Note that this will reset all dirs built-in 829 * and added before -nostdinc by -isystem and -idirafter. 830 * -isystem dir adds dir where isys_includepath points adding this dir as 831 * first systemdir 832 * -idirafter dir adds dir to the end of the list 833 */ 834 835 static void set_stream_include_path(struct stream *stream) 836 { 837 const char *path = stream->path; 838 if (!path) { 839 const char *p = strrchr(stream->name, '/'); 840 path = ""; 841 if (p) { 842 int len = p - stream->name + 1; 843 char *m = malloc(len+1); 844 /* This includes the final "/" */ 845 memcpy(m, stream->name, len); 846 m[len] = 0; 847 path = m; 848 } 849 stream->path = path; 850 } 851 includepath[0] = path; 852 } 853 854 static int try_include(const char *path, const char *filename, int flen, struct token **where, const char **next_path) 855 { 856 int fd; 857 int plen = strlen(path); 858 static char fullname[PATH_MAX]; 859 860 memcpy(fullname, path, plen); 861 if (plen && path[plen-1] != '/') { 862 fullname[plen] = '/'; 863 plen++; 864 } 865 memcpy(fullname+plen, filename, flen); 866 if (already_tokenized(fullname)) 867 return 1; 868 fd = open(fullname, O_RDONLY); 869 if (fd >= 0) { 870 char * streamname = __alloc_bytes(plen + flen); 871 memcpy(streamname, fullname, plen + flen); 872 *where = tokenize(streamname, fd, *where, next_path); 873 close(fd); 874 return 1; 875 } 876 return 0; 877 } 878 879 static int do_include_path(const char **pptr, struct token **list, struct token *token, const char *filename, int flen) 880 { 881 const char *path; 882 883 while ((path = *pptr++) != NULL) { 884 if (!try_include(path, filename, flen, list, pptr)) 885 continue; 886 return 1; 887 } 888 return 0; 889 } 890 891 static int free_preprocessor_line(struct token *token) 892 { 893 while (token_type(token) != TOKEN_EOF) { 894 struct token *free = token; 895 token = token->next; 896 __free_token(free); 897 }; 898 return 1; 899 } 900 901 const char *find_include(const char *skip, const char *look_for) 902 { 903 DIR *dp; 904 struct dirent *entry; 905 struct stat statbuf; 906 const char *ret; 907 char cwd[PATH_MAX]; 908 static char buf[PATH_MAX + 1]; 909 910 dp = opendir("."); 911 if (!dp) 912 return NULL; 913 914 if (!getcwd(cwd, sizeof(cwd))) 915 return NULL; 916 917 while ((entry = readdir(dp))) { 918 lstat(entry->d_name, &statbuf); 919 920 if (strcmp(entry->d_name, look_for) == 0) { 921 snprintf(buf, sizeof(buf), "%s/%s", cwd, entry->d_name); 922 return buf; 923 } 924 925 if (S_ISDIR(statbuf.st_mode)) { 926 /* Found a directory, but ignore . and .. */ 927 if (strcmp(".", entry->d_name) == 0 || 928 strcmp("..", entry->d_name) == 0 || 929 strcmp(skip, entry->d_name) == 0) 930 continue; 931 932 chdir(entry->d_name); 933 ret = find_include("", look_for); 934 chdir(".."); 935 if (ret) 936 return ret; 937 } 938 } 939 closedir(dp); 940 941 return NULL; 942 } 943 944 const char *search_dir(const char *stop, const char *look_for) 945 { 946 char cwd[PATH_MAX]; 947 int len; 948 const char *ret; 949 int cnt = 0; 950 951 if (!getcwd(cwd, sizeof(cwd))) 952 return NULL; 953 954 len = strlen(cwd); 955 while (len >= 0) { 956 ret = find_include(cnt++ ? cwd + len + 1 : "", look_for); 957 if (ret) 958 return ret; 959 960 if (strcmp(cwd, stop) == 0 || 961 strcmp(cwd, "/usr/include") == 0 || 962 strcmp(cwd, "/usr/local/include") == 0 || 963 strlen(cwd) <= 10 || /* heck... don't search /usr/lib/ */ 964 strcmp(cwd, "/") == 0) 965 return NULL; 966 967 while (--len >= 0) { 968 if (cwd[len] == '/') { 969 cwd[len] = '\0'; 970 break; 971 } 972 } 973 974 chdir(".."); 975 } 976 return NULL; 977 } 978 979 static void use_best_guess_header_file(struct token *token, const char *filename, struct token **list) 980 { 981 char cwd[PATH_MAX]; 982 char dir_part[PATH_MAX]; 983 const char *file_part; 984 const char *include_name; 985 int len; 986 987 if (!filename || filename[0] == '\0') 988 return; 989 990 file_part = filename; 991 while ((filename = strchr(filename, '/'))) { 992 ++filename; 993 if (filename[0]) 994 file_part = filename; 995 } 996 997 snprintf(dir_part, sizeof(dir_part), "%s", stream_name(token->pos.stream)); 998 len = strlen(dir_part); 999 while (--len >= 0) { 1000 if (dir_part[len] == '/') { 1001 dir_part[len] = '\0'; 1002 break; 1003 } 1004 } 1005 if (len < 0) 1006 sprintf(dir_part, "."); 1007 1008 if (!getcwd(cwd, sizeof(cwd))) 1009 return; 1010 1011 chdir(dir_part); 1012 include_name = search_dir(cwd, file_part); 1013 chdir(cwd); 1014 if (!include_name) 1015 return; 1016 sparse_error(token->pos, "using '%s'", include_name); 1017 1018 try_include("", include_name, strlen(include_name), list, includepath); 1019 } 1020 1021 static int handle_include_path(struct stream *stream, struct token **list, struct token *token, int how) 1022 { 1023 const char *filename; 1024 struct token *next; 1025 const char **path; 1026 int expect; 1027 int flen; 1028 1029 next = token->next; 1030 expect = '>'; 1031 if (!match_op(next, '<')) { 1032 expand_list(&token->next); 1033 expect = 0; 1034 next = token; 1035 if (match_op(token->next, '<')) { 1036 next = token->next; 1037 expect = '>'; 1038 } 1039 } 1040 1041 token = next->next; 1042 filename = token_name_sequence(token, expect, token); 1043 flen = strlen(filename) + 1; 1044 1045 /* Absolute path? */ 1046 if (filename[0] == '/') { 1047 if (try_include("", filename, flen, list, includepath)) 1048 return 0; 1049 goto out; 1050 } 1051 1052 switch (how) { 1053 case 1: 1054 path = stream->next_path; 1055 break; 1056 case 2: 1057 includepath[0] = ""; 1058 path = includepath; 1059 break; 1060 default: 1061 /* Dir of input file is first dir to search for quoted includes */ 1062 set_stream_include_path(stream); 1063 path = expect ? angle_includepath : quote_includepath; 1064 break; 1065 } 1066 /* Check the standard include paths.. */ 1067 if (do_include_path(path, list, token, filename, flen)) 1068 return 0; 1069 out: 1070 sparse_error(token->pos, "unable to open '%s'", filename); 1071 use_best_guess_header_file(token, filename, list); 1072 return 0; 1073 } 1074 1075 static int handle_include(struct stream *stream, struct token **list, struct token *token) 1076 { 1077 return handle_include_path(stream, list, token, 0); 1078 } 1079 1080 static int handle_include_next(struct stream *stream, struct token **list, struct token *token) 1081 { 1082 return handle_include_path(stream, list, token, 1); 1083 } 1084 1085 static int handle_argv_include(struct stream *stream, struct token **list, struct token *token) 1086 { 1087 return handle_include_path(stream, list, token, 2); 1088 } 1089 1090 static int token_different(struct token *t1, struct token *t2) 1091 { 1092 int different; 1093 1094 if (token_type(t1) != token_type(t2)) 1095 return 1; 1096 1097 switch (token_type(t1)) { 1098 case TOKEN_IDENT: 1099 different = t1->ident != t2->ident; 1100 break; 1101 case TOKEN_ARG_COUNT: 1102 case TOKEN_UNTAINT: 1103 case TOKEN_CONCAT: 1104 case TOKEN_GNU_KLUDGE: 1105 different = 0; 1106 break; 1107 case TOKEN_NUMBER: 1108 different = strcmp(t1->number, t2->number); 1109 break; 1110 case TOKEN_SPECIAL: 1111 different = t1->special != t2->special; 1112 break; 1113 case TOKEN_MACRO_ARGUMENT: 1114 case TOKEN_QUOTED_ARGUMENT: 1115 case TOKEN_STR_ARGUMENT: 1116 different = t1->argnum != t2->argnum; 1117 break; 1118 case TOKEN_CHAR_EMBEDDED_0 ... TOKEN_CHAR_EMBEDDED_3: 1119 case TOKEN_WIDE_CHAR_EMBEDDED_0 ... TOKEN_WIDE_CHAR_EMBEDDED_3: 1120 different = memcmp(t1->embedded, t2->embedded, 4); 1121 break; 1122 case TOKEN_CHAR: 1123 case TOKEN_WIDE_CHAR: 1124 case TOKEN_STRING: 1125 case TOKEN_WIDE_STRING: { 1126 struct string *s1, *s2; 1127 1128 s1 = t1->string; 1129 s2 = t2->string; 1130 different = 1; 1131 if (s1->length != s2->length) 1132 break; 1133 different = memcmp(s1->data, s2->data, s1->length); 1134 break; 1135 } 1136 default: 1137 different = 1; 1138 break; 1139 } 1140 return different; 1141 } 1142 1143 static int token_list_different(struct token *list1, struct token *list2) 1144 { 1145 for (;;) { 1146 if (list1 == list2) 1147 return 0; 1148 if (!list1 || !list2) 1149 return 1; 1150 if (token_different(list1, list2)) 1151 return 1; 1152 list1 = list1->next; 1153 list2 = list2->next; 1154 } 1155 } 1156 1157 static inline void set_arg_count(struct token *token) 1158 { 1159 token_type(token) = TOKEN_ARG_COUNT; 1160 token->count.normal = token->count.quoted = 1161 token->count.str = token->count.vararg = 0; 1162 } 1163 1164 static struct token *parse_arguments(struct token *list) 1165 { 1166 struct token *arg = list->next, *next = list; 1167 struct argcount *count = &list->count; 1168 1169 set_arg_count(list); 1170 1171 if (match_op(arg, ')')) { 1172 next = arg->next; 1173 list->next = &eof_token_entry; 1174 return next; 1175 } 1176 1177 while (token_type(arg) == TOKEN_IDENT) { 1178 if (arg->ident == &__VA_ARGS___ident) 1179 goto Eva_args; 1180 if (!++count->normal) 1181 goto Eargs; 1182 next = arg->next; 1183 1184 if (match_op(next, ',')) { 1185 set_arg_count(next); 1186 arg = next->next; 1187 continue; 1188 } 1189 1190 if (match_op(next, ')')) { 1191 set_arg_count(next); 1192 next = next->next; 1193 arg->next->next = &eof_token_entry; 1194 return next; 1195 } 1196 1197 /* normal cases are finished here */ 1198 1199 if (match_op(next, SPECIAL_ELLIPSIS)) { 1200 if (match_op(next->next, ')')) { 1201 set_arg_count(next); 1202 next->count.vararg = 1; 1203 next = next->next; 1204 arg->next->next = &eof_token_entry; 1205 return next->next; 1206 } 1207 1208 arg = next; 1209 goto Enotclosed; 1210 } 1211 1212 if (eof_token(next)) { 1213 goto Enotclosed; 1214 } else { 1215 arg = next; 1216 goto Ebadstuff; 1217 } 1218 } 1219 1220 if (match_op(arg, SPECIAL_ELLIPSIS)) { 1221 next = arg->next; 1222 token_type(arg) = TOKEN_IDENT; 1223 arg->ident = &__VA_ARGS___ident; 1224 if (!match_op(next, ')')) 1225 goto Enotclosed; 1226 if (!++count->normal) 1227 goto Eargs; 1228 set_arg_count(next); 1229 next->count.vararg = 1; 1230 next = next->next; 1231 arg->next->next = &eof_token_entry; 1232 return next; 1233 } 1234 1235 if (eof_token(arg)) { 1236 arg = next; 1237 goto Enotclosed; 1238 } 1239 if (match_op(arg, ',')) 1240 goto Emissing; 1241 else 1242 goto Ebadstuff; 1243 1244 1245 Emissing: 1246 sparse_error(arg->pos, "parameter name missing"); 1247 return NULL; 1248 Ebadstuff: 1249 sparse_error(arg->pos, "\"%s\" may not appear in macro parameter list", 1250 show_token(arg)); 1251 return NULL; 1252 Enotclosed: 1253 sparse_error(arg->pos, "missing ')' in macro parameter list"); 1254 return NULL; 1255 Eva_args: 1256 sparse_error(arg->pos, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro"); 1257 return NULL; 1258 Eargs: 1259 sparse_error(arg->pos, "too many arguments in macro definition"); 1260 return NULL; 1261 } 1262 1263 static int try_arg(struct token *token, enum token_type type, struct token *arglist) 1264 { 1265 struct ident *ident = token->ident; 1266 int nr; 1267 1268 if (!arglist || token_type(token) != TOKEN_IDENT) 1269 return 0; 1270 1271 arglist = arglist->next; 1272 1273 for (nr = 0; !eof_token(arglist); nr++, arglist = arglist->next->next) { 1274 if (arglist->ident == ident) { 1275 struct argcount *count = &arglist->next->count; 1276 int n; 1277 1278 token->argnum = nr; 1279 token_type(token) = type; 1280 switch (type) { 1281 case TOKEN_MACRO_ARGUMENT: 1282 n = ++count->normal; 1283 break; 1284 case TOKEN_QUOTED_ARGUMENT: 1285 n = ++count->quoted; 1286 break; 1287 default: 1288 n = ++count->str; 1289 } 1290 if (n) 1291 return count->vararg ? 2 : 1; 1292 /* 1293 * XXX - need saner handling of that 1294 * (>= 1024 instances of argument) 1295 */ 1296 token_type(token) = TOKEN_ERROR; 1297 return -1; 1298 } 1299 } 1300 return 0; 1301 } 1302 1303 static struct token *handle_hash(struct token **p, struct token *arglist) 1304 { 1305 struct token *token = *p; 1306 if (arglist) { 1307 struct token *next = token->next; 1308 if (!try_arg(next, TOKEN_STR_ARGUMENT, arglist)) 1309 goto Equote; 1310 next->pos.whitespace = token->pos.whitespace; 1311 __free_token(token); 1312 token = *p = next; 1313 } else { 1314 token->pos.noexpand = 1; 1315 } 1316 return token; 1317 1318 Equote: 1319 sparse_error(token->pos, "'#' is not followed by a macro parameter"); 1320 return NULL; 1321 } 1322 1323 /* token->next is ## */ 1324 static struct token *handle_hashhash(struct token *token, struct token *arglist) 1325 { 1326 struct token *last = token; 1327 struct token *concat; 1328 int state = match_op(token, ','); 1329 1330 try_arg(token, TOKEN_QUOTED_ARGUMENT, arglist); 1331 1332 while (1) { 1333 struct token *t; 1334 int is_arg; 1335 1336 /* eat duplicate ## */ 1337 concat = token->next; 1338 while (match_op(t = concat->next, SPECIAL_HASHHASH)) { 1339 token->next = t; 1340 __free_token(concat); 1341 concat = t; 1342 } 1343 token_type(concat) = TOKEN_CONCAT; 1344 1345 if (eof_token(t)) 1346 goto Econcat; 1347 1348 if (match_op(t, '#')) { 1349 t = handle_hash(&concat->next, arglist); 1350 if (!t) 1351 return NULL; 1352 } 1353 1354 is_arg = try_arg(t, TOKEN_QUOTED_ARGUMENT, arglist); 1355 1356 if (state == 1 && is_arg) { 1357 state = is_arg; 1358 } else { 1359 last = t; 1360 state = match_op(t, ','); 1361 } 1362 1363 token = t; 1364 if (!match_op(token->next, SPECIAL_HASHHASH)) 1365 break; 1366 } 1367 /* handle GNU ,##__VA_ARGS__ kludge, in all its weirdness */ 1368 if (state == 2) 1369 token_type(last) = TOKEN_GNU_KLUDGE; 1370 return token; 1371 1372 Econcat: 1373 sparse_error(concat->pos, "'##' cannot appear at the ends of macro expansion"); 1374 return NULL; 1375 } 1376 1377 static struct token *parse_expansion(struct token *expansion, struct token *arglist, struct ident *name) 1378 { 1379 struct token *token = expansion; 1380 struct token **p; 1381 1382 if (match_op(token, SPECIAL_HASHHASH)) 1383 goto Econcat; 1384 1385 for (p = &expansion; !eof_token(token); p = &token->next, token = *p) { 1386 if (match_op(token, '#')) { 1387 token = handle_hash(p, arglist); 1388 if (!token) 1389 return NULL; 1390 } 1391 if (match_op(token->next, SPECIAL_HASHHASH)) { 1392 token = handle_hashhash(token, arglist); 1393 if (!token) 1394 return NULL; 1395 } else { 1396 try_arg(token, TOKEN_MACRO_ARGUMENT, arglist); 1397 } 1398 switch (token_type(token)) { 1399 case TOKEN_ERROR: 1400 goto Earg; 1401 1402 case TOKEN_STRING: 1403 case TOKEN_WIDE_STRING: 1404 token->string->immutable = 1; 1405 break; 1406 } 1407 } 1408 token = alloc_token(&expansion->pos); 1409 token_type(token) = TOKEN_UNTAINT; 1410 token->ident = name; 1411 token->next = *p; 1412 *p = token; 1413 return expansion; 1414 1415 Econcat: 1416 sparse_error(token->pos, "'##' cannot appear at the ends of macro expansion"); 1417 return NULL; 1418 Earg: 1419 sparse_error(token->pos, "too many instances of argument in body"); 1420 return NULL; 1421 } 1422 1423 static int do_handle_define(struct stream *stream, struct token **line, struct token *token, int attr) 1424 { 1425 struct token *arglist, *expansion; 1426 struct token *left = token->next; 1427 struct symbol *sym; 1428 struct ident *name; 1429 int ret; 1430 1431 if (token_type(left) != TOKEN_IDENT) { 1432 sparse_error(token->pos, "expected identifier to 'define'"); 1433 return 1; 1434 } 1435 1436 name = left->ident; 1437 1438 arglist = NULL; 1439 expansion = left->next; 1440 if (!expansion->pos.whitespace) { 1441 if (match_op(expansion, '(')) { 1442 arglist = expansion; 1443 expansion = parse_arguments(expansion); 1444 if (!expansion) 1445 return 1; 1446 } else if (!eof_token(expansion)) { 1447 warning(expansion->pos, 1448 "no whitespace before object-like macro body"); 1449 } 1450 } 1451 1452 expansion = parse_expansion(expansion, arglist, name); 1453 if (!expansion) 1454 return 1; 1455 1456 ret = 1; 1457 sym = lookup_symbol(name, NS_MACRO | NS_UNDEF); 1458 if (sym) { 1459 int clean; 1460 1461 if (attr < sym->attr) 1462 goto out; 1463 1464 clean = (attr == sym->attr && sym->namespace == NS_MACRO); 1465 1466 if (token_list_different(sym->expansion, expansion) || 1467 token_list_different(sym->arglist, arglist)) { 1468 ret = 0; 1469 if ((clean && attr == SYM_ATTR_NORMAL) 1470 || sym->used_in == file_scope) { 1471 warning(left->pos, "preprocessor token %.*s redefined", 1472 name->len, name->name); 1473 info(sym->pos, "this was the original definition"); 1474 } 1475 } else if (clean) 1476 goto out; 1477 } 1478 1479 if (!sym || sym->scope != file_scope) { 1480 sym = alloc_symbol(left->pos, SYM_NODE); 1481 bind_symbol(sym, name, NS_MACRO); 1482 add_ident(¯os, name); 1483 ret = 0; 1484 } 1485 1486 if (!ret) { 1487 sym->expansion = expansion; 1488 sym->arglist = arglist; 1489 __free_token(token); /* Free the "define" token, but not the rest of the line */ 1490 } 1491 1492 sym->namespace = NS_MACRO; 1493 sym->used_in = NULL; 1494 sym->attr = attr; 1495 out: 1496 return ret; 1497 } 1498 1499 static int handle_define(struct stream *stream, struct token **line, struct token *token) 1500 { 1501 return do_handle_define(stream, line, token, SYM_ATTR_NORMAL); 1502 } 1503 1504 static int handle_weak_define(struct stream *stream, struct token **line, struct token *token) 1505 { 1506 return do_handle_define(stream, line, token, SYM_ATTR_WEAK); 1507 } 1508 1509 static int handle_strong_define(struct stream *stream, struct token **line, struct token *token) 1510 { 1511 return do_handle_define(stream, line, token, SYM_ATTR_STRONG); 1512 } 1513 1514 static int do_handle_undef(struct stream *stream, struct token **line, struct token *token, int attr) 1515 { 1516 struct token *left = token->next; 1517 struct symbol *sym; 1518 1519 if (token_type(left) != TOKEN_IDENT) { 1520 sparse_error(token->pos, "expected identifier to 'undef'"); 1521 return 1; 1522 } 1523 1524 sym = lookup_symbol(left->ident, NS_MACRO | NS_UNDEF); 1525 if (sym) { 1526 if (attr < sym->attr) 1527 return 1; 1528 if (attr == sym->attr && sym->namespace == NS_UNDEF) 1529 return 1; 1530 } else if (attr <= SYM_ATTR_NORMAL) 1531 return 1; 1532 1533 if (!sym || sym->scope != file_scope) { 1534 sym = alloc_symbol(left->pos, SYM_NODE); 1535 bind_symbol(sym, left->ident, NS_MACRO); 1536 } 1537 1538 sym->namespace = NS_UNDEF; 1539 sym->used_in = NULL; 1540 sym->attr = attr; 1541 1542 return 1; 1543 } 1544 1545 static int handle_undef(struct stream *stream, struct token **line, struct token *token) 1546 { 1547 return do_handle_undef(stream, line, token, SYM_ATTR_NORMAL); 1548 } 1549 1550 static int handle_strong_undef(struct stream *stream, struct token **line, struct token *token) 1551 { 1552 return do_handle_undef(stream, line, token, SYM_ATTR_STRONG); 1553 } 1554 1555 static int preprocessor_if(struct stream *stream, struct token *token, int true) 1556 { 1557 token_type(token) = false_nesting ? TOKEN_SKIP_GROUPS : TOKEN_IF; 1558 free_preprocessor_line(token->next); 1559 token->next = stream->top_if; 1560 stream->top_if = token; 1561 if (false_nesting || true != 1) 1562 false_nesting++; 1563 return 0; 1564 } 1565 1566 static int handle_ifdef(struct stream *stream, struct token **line, struct token *token) 1567 { 1568 struct token *next = token->next; 1569 int arg; 1570 if (token_type(next) == TOKEN_IDENT) { 1571 arg = token_defined(next); 1572 } else { 1573 dirty_stream(stream); 1574 if (!false_nesting) 1575 sparse_error(token->pos, "expected preprocessor identifier"); 1576 arg = -1; 1577 } 1578 return preprocessor_if(stream, token, arg); 1579 } 1580 1581 static int handle_ifndef(struct stream *stream, struct token **line, struct token *token) 1582 { 1583 struct token *next = token->next; 1584 int arg; 1585 if (token_type(next) == TOKEN_IDENT) { 1586 if (!stream->dirty && !stream->ifndef) { 1587 if (!stream->protect) { 1588 stream->ifndef = token; 1589 stream->protect = next->ident; 1590 } else if (stream->protect == next->ident) { 1591 stream->ifndef = token; 1592 stream->dirty = 1; 1593 } 1594 } 1595 arg = !token_defined(next); 1596 } else { 1597 dirty_stream(stream); 1598 if (!false_nesting) 1599 sparse_error(token->pos, "expected preprocessor identifier"); 1600 arg = -1; 1601 } 1602 1603 return preprocessor_if(stream, token, arg); 1604 } 1605 1606 static const char *show_token_sequence(struct token *token, int quote); 1607 1608 /* 1609 * Expression handling for #if and #elif; it differs from normal expansion 1610 * due to special treatment of "defined". 1611 */ 1612 static int expression_value(struct token **where) 1613 { 1614 struct expression *expr; 1615 struct token *p; 1616 struct token **list = where, **beginning = NULL; 1617 long long value; 1618 int state = 0; 1619 1620 while (!eof_token(p = scan_next(list))) { 1621 switch (state) { 1622 case 0: 1623 if (token_type(p) != TOKEN_IDENT) 1624 break; 1625 if (p->ident == &defined_ident) { 1626 state = 1; 1627 beginning = list; 1628 break; 1629 } 1630 if (!expand_one_symbol(list)) 1631 continue; 1632 if (token_type(p) != TOKEN_IDENT) 1633 break; 1634 token_type(p) = TOKEN_ZERO_IDENT; 1635 break; 1636 case 1: 1637 if (match_op(p, '(')) { 1638 state = 2; 1639 } else { 1640 state = 0; 1641 replace_with_defined(p); 1642 *beginning = p; 1643 } 1644 break; 1645 case 2: 1646 if (token_type(p) == TOKEN_IDENT) 1647 state = 3; 1648 else 1649 state = 0; 1650 replace_with_defined(p); 1651 *beginning = p; 1652 break; 1653 case 3: 1654 state = 0; 1655 if (!match_op(p, ')')) 1656 sparse_error(p->pos, "missing ')' after \"defined\""); 1657 *list = p->next; 1658 continue; 1659 } 1660 list = &p->next; 1661 } 1662 1663 p = constant_expression(*where, &expr); 1664 if (!eof_token(p)) 1665 sparse_error(p->pos, "garbage at end: %s", show_token_sequence(p, 0)); 1666 value = get_expression_value(expr); 1667 return value != 0; 1668 } 1669 1670 static int handle_if(struct stream *stream, struct token **line, struct token *token) 1671 { 1672 int value = 0; 1673 if (!false_nesting) 1674 value = expression_value(&token->next); 1675 1676 dirty_stream(stream); 1677 return preprocessor_if(stream, token, value); 1678 } 1679 1680 static int handle_elif(struct stream * stream, struct token **line, struct token *token) 1681 { 1682 struct token *top_if = stream->top_if; 1683 end_group(stream); 1684 1685 if (!top_if) { 1686 nesting_error(stream); 1687 sparse_error(token->pos, "unmatched #elif within stream"); 1688 return 1; 1689 } 1690 1691 if (token_type(top_if) == TOKEN_ELSE) { 1692 nesting_error(stream); 1693 sparse_error(token->pos, "#elif after #else"); 1694 if (!false_nesting) 1695 false_nesting = 1; 1696 return 1; 1697 } 1698 1699 dirty_stream(stream); 1700 if (token_type(top_if) != TOKEN_IF) 1701 return 1; 1702 if (false_nesting) { 1703 false_nesting = 0; 1704 if (!expression_value(&token->next)) 1705 false_nesting = 1; 1706 } else { 1707 false_nesting = 1; 1708 token_type(top_if) = TOKEN_SKIP_GROUPS; 1709 } 1710 return 1; 1711 } 1712 1713 static int handle_else(struct stream *stream, struct token **line, struct token *token) 1714 { 1715 struct token *top_if = stream->top_if; 1716 end_group(stream); 1717 1718 if (!top_if) { 1719 nesting_error(stream); 1720 sparse_error(token->pos, "unmatched #else within stream"); 1721 return 1; 1722 } 1723 1724 if (token_type(top_if) == TOKEN_ELSE) { 1725 nesting_error(stream); 1726 sparse_error(token->pos, "#else after #else"); 1727 } 1728 if (false_nesting) { 1729 if (token_type(top_if) == TOKEN_IF) 1730 false_nesting = 0; 1731 } else { 1732 false_nesting = 1; 1733 } 1734 token_type(top_if) = TOKEN_ELSE; 1735 return 1; 1736 } 1737 1738 static int handle_endif(struct stream *stream, struct token **line, struct token *token) 1739 { 1740 struct token *top_if = stream->top_if; 1741 end_group(stream); 1742 if (!top_if) { 1743 nesting_error(stream); 1744 sparse_error(token->pos, "unmatched #endif in stream"); 1745 return 1; 1746 } 1747 if (false_nesting) 1748 false_nesting--; 1749 stream->top_if = top_if->next; 1750 __free_token(top_if); 1751 return 1; 1752 } 1753 1754 static int handle_warning(struct stream *stream, struct token **line, struct token *token) 1755 { 1756 warning(token->pos, "%s", show_token_sequence(token->next, 0)); 1757 return 1; 1758 } 1759 1760 static int handle_error(struct stream *stream, struct token **line, struct token *token) 1761 { 1762 sparse_error(token->pos, "%s", show_token_sequence(token->next, 0)); 1763 return 1; 1764 } 1765 1766 static int handle_nostdinc(struct stream *stream, struct token **line, struct token *token) 1767 { 1768 /* 1769 * Do we have any non-system includes? 1770 * Clear them out if so.. 1771 */ 1772 *sys_includepath = NULL; 1773 return 1; 1774 } 1775 1776 static inline void update_inc_ptrs(const char ***where) 1777 { 1778 1779 if (*where <= dirafter_includepath) { 1780 dirafter_includepath++; 1781 /* If this was the entry that we prepend, don't 1782 * rise the lower entries, even if they are at 1783 * the same level. */ 1784 if (where == &dirafter_includepath) 1785 return; 1786 } 1787 if (*where <= sys_includepath) { 1788 sys_includepath++; 1789 if (where == &sys_includepath) 1790 return; 1791 } 1792 if (*where <= isys_includepath) { 1793 isys_includepath++; 1794 if (where == &isys_includepath) 1795 return; 1796 } 1797 1798 /* angle_includepath is actually never updated, since we 1799 * don't suppport -iquote rught now. May change some day. */ 1800 if (*where <= angle_includepath) { 1801 angle_includepath++; 1802 if (where == &angle_includepath) 1803 return; 1804 } 1805 } 1806 1807 /* Add a path before 'where' and update the pointers associated with the 1808 * includepath array */ 1809 static void add_path_entry(struct token *token, const char *path, 1810 const char ***where) 1811 { 1812 const char **dst; 1813 const char *next; 1814 1815 /* Need one free entry.. */ 1816 if (includepath[INCLUDEPATHS-2]) 1817 error_die(token->pos, "too many include path entries"); 1818 1819 /* check that this is not a duplicate */ 1820 dst = includepath; 1821 while (*dst) { 1822 if (strcmp(*dst, path) == 0) 1823 return; 1824 dst++; 1825 } 1826 next = path; 1827 dst = *where; 1828 1829 update_inc_ptrs(where); 1830 1831 /* 1832 * Move them all up starting at dst, 1833 * insert the new entry.. 1834 */ 1835 do { 1836 const char *tmp = *dst; 1837 *dst = next; 1838 next = tmp; 1839 dst++; 1840 } while (next); 1841 } 1842 1843 static int handle_add_include(struct stream *stream, struct token **line, struct token *token) 1844 { 1845 for (;;) { 1846 token = token->next; 1847 if (eof_token(token)) 1848 return 1; 1849 if (token_type(token) != TOKEN_STRING) { 1850 warning(token->pos, "expected path string"); 1851 return 1; 1852 } 1853 add_path_entry(token, token->string->data, &isys_includepath); 1854 } 1855 } 1856 1857 static int handle_add_isystem(struct stream *stream, struct token **line, struct token *token) 1858 { 1859 for (;;) { 1860 token = token->next; 1861 if (eof_token(token)) 1862 return 1; 1863 if (token_type(token) != TOKEN_STRING) { 1864 sparse_error(token->pos, "expected path string"); 1865 return 1; 1866 } 1867 add_path_entry(token, token->string->data, &sys_includepath); 1868 } 1869 } 1870 1871 static int handle_add_system(struct stream *stream, struct token **line, struct token *token) 1872 { 1873 for (;;) { 1874 token = token->next; 1875 if (eof_token(token)) 1876 return 1; 1877 if (token_type(token) != TOKEN_STRING) { 1878 sparse_error(token->pos, "expected path string"); 1879 return 1; 1880 } 1881 add_path_entry(token, token->string->data, &dirafter_includepath); 1882 } 1883 } 1884 1885 /* Add to end on includepath list - no pointer updates */ 1886 static void add_dirafter_entry(struct token *token, const char *path) 1887 { 1888 const char **dst = includepath; 1889 1890 /* Need one free entry.. */ 1891 if (includepath[INCLUDEPATHS-2]) 1892 error_die(token->pos, "too many include path entries"); 1893 1894 /* Add to the end */ 1895 while (*dst) 1896 dst++; 1897 *dst = path; 1898 dst++; 1899 *dst = NULL; 1900 } 1901 1902 static int handle_add_dirafter(struct stream *stream, struct token **line, struct token *token) 1903 { 1904 for (;;) { 1905 token = token->next; 1906 if (eof_token(token)) 1907 return 1; 1908 if (token_type(token) != TOKEN_STRING) { 1909 sparse_error(token->pos, "expected path string"); 1910 return 1; 1911 } 1912 add_dirafter_entry(token, token->string->data); 1913 } 1914 } 1915 1916 static int handle_split_include(struct stream *stream, struct token **line, struct token *token) 1917 { 1918 /* 1919 * -I- 1920 * From info gcc: 1921 * Split the include path. Any directories specified with `-I' 1922 * options before `-I-' are searched only for headers requested with 1923 * `#include "FILE"'; they are not searched for `#include <FILE>'. 1924 * If additional directories are specified with `-I' options after 1925 * the `-I-', those directories are searched for all `#include' 1926 * directives. 1927 * In addition, `-I-' inhibits the use of the directory of the current 1928 * file directory as the first search directory for `#include "FILE"'. 1929 */ 1930 quote_includepath = includepath+1; 1931 angle_includepath = sys_includepath; 1932 return 1; 1933 } 1934 1935 /* 1936 * We replace "#pragma xxx" with "__pragma__" in the token 1937 * stream. Just as an example. 1938 * 1939 * We'll just #define that away for now, but the theory here 1940 * is that we can use this to insert arbitrary token sequences 1941 * to turn the pragmas into internal front-end sequences for 1942 * when we actually start caring about them. 1943 * 1944 * So eventually this will turn into some kind of extended 1945 * __attribute__() like thing, except called __pragma__(xxx). 1946 */ 1947 static int handle_pragma(struct stream *stream, struct token **line, struct token *token) 1948 { 1949 struct token *next = *line; 1950 1951 if (match_ident(token->next, &once_ident) && eof_token(token->next->next)) { 1952 stream->once = 1; 1953 return 1; 1954 } 1955 token->ident = &pragma_ident; 1956 token->pos.newline = 1; 1957 token->pos.whitespace = 1; 1958 token->pos.pos = 1; 1959 *line = token; 1960 token->next = next; 1961 return 0; 1962 } 1963 1964 /* 1965 * We ignore #line for now. 1966 */ 1967 static int handle_line(struct stream *stream, struct token **line, struct token *token) 1968 { 1969 return 1; 1970 } 1971 1972 /* 1973 * Ignore "#ident". 1974 */ 1975 static int handle_ident(struct stream *stream, struct token **line, struct token *token) 1976 { 1977 return 1; 1978 } 1979 1980 static int handle_nondirective(struct stream *stream, struct token **line, struct token *token) 1981 { 1982 sparse_error(token->pos, "unrecognized preprocessor line '%s'", show_token_sequence(token, 0)); 1983 return 1; 1984 } 1985 1986 1987 static void init_preprocessor(void) 1988 { 1989 int i; 1990 int stream = init_stream("preprocessor", -1, includepath); 1991 static struct { 1992 const char *name; 1993 int (*handler)(struct stream *, struct token **, struct token *); 1994 } normal[] = { 1995 { "define", handle_define }, 1996 { "weak_define", handle_weak_define }, 1997 { "strong_define", handle_strong_define }, 1998 { "undef", handle_undef }, 1999 { "strong_undef", handle_strong_undef }, 2000 { "warning", handle_warning }, 2001 { "error", handle_error }, 2002 { "include", handle_include }, 2003 { "include_next", handle_include_next }, 2004 { "pragma", handle_pragma }, 2005 { "line", handle_line }, 2006 { "ident", handle_ident }, 2007 2008 // our internal preprocessor tokens 2009 { "nostdinc", handle_nostdinc }, 2010 { "add_include", handle_add_include }, 2011 { "add_isystem", handle_add_isystem }, 2012 { "add_system", handle_add_system }, 2013 { "add_dirafter", handle_add_dirafter }, 2014 { "split_include", handle_split_include }, 2015 { "argv_include", handle_argv_include }, 2016 }, special[] = { 2017 { "ifdef", handle_ifdef }, 2018 { "ifndef", handle_ifndef }, 2019 { "else", handle_else }, 2020 { "endif", handle_endif }, 2021 { "if", handle_if }, 2022 { "elif", handle_elif }, 2023 }; 2024 2025 for (i = 0; i < ARRAY_SIZE(normal); i++) { 2026 struct symbol *sym; 2027 sym = create_symbol(stream, normal[i].name, SYM_PREPROCESSOR, NS_PREPROCESSOR); 2028 sym->handler = normal[i].handler; 2029 sym->normal = 1; 2030 } 2031 for (i = 0; i < ARRAY_SIZE(special); i++) { 2032 struct symbol *sym; 2033 sym = create_symbol(stream, special[i].name, SYM_PREPROCESSOR, NS_PREPROCESSOR); 2034 sym->handler = special[i].handler; 2035 sym->normal = 0; 2036 } 2037 2038 counter_macro = 0; 2039 } 2040 2041 static void handle_preprocessor_line(struct stream *stream, struct token **line, struct token *start) 2042 { 2043 int (*handler)(struct stream *, struct token **, struct token *); 2044 struct token *token = start->next; 2045 int is_normal = 1; 2046 2047 if (eof_token(token)) 2048 return; 2049 2050 if (token_type(token) == TOKEN_IDENT) { 2051 struct symbol *sym = lookup_symbol(token->ident, NS_PREPROCESSOR); 2052 if (sym) { 2053 handler = sym->handler; 2054 is_normal = sym->normal; 2055 } else { 2056 handler = handle_nondirective; 2057 } 2058 } else if (token_type(token) == TOKEN_NUMBER) { 2059 handler = handle_line; 2060 } else { 2061 handler = handle_nondirective; 2062 } 2063 2064 if (is_normal) { 2065 dirty_stream(stream); 2066 if (false_nesting) 2067 goto out; 2068 } 2069 if (!handler(stream, line, token)) /* all set */ 2070 return; 2071 2072 out: 2073 free_preprocessor_line(token); 2074 } 2075 2076 static void preprocessor_line(struct stream *stream, struct token **line) 2077 { 2078 struct token *start = *line, *next; 2079 struct token **tp = &start->next; 2080 2081 for (;;) { 2082 next = *tp; 2083 if (next->pos.newline) 2084 break; 2085 tp = &next->next; 2086 } 2087 *line = next; 2088 *tp = &eof_token_entry; 2089 handle_preprocessor_line(stream, line, start); 2090 } 2091 2092 static void do_preprocess(struct token **list) 2093 { 2094 struct token *next; 2095 2096 while (!eof_token(next = scan_next(list))) { 2097 struct stream *stream = input_streams + next->pos.stream; 2098 2099 if (next->pos.newline && match_op(next, '#')) { 2100 if (!next->pos.noexpand) { 2101 preprocessor_line(stream, list); 2102 __free_token(next); /* Free the '#' token */ 2103 continue; 2104 } 2105 } 2106 2107 switch (token_type(next)) { 2108 case TOKEN_STREAMEND: 2109 if (stream->top_if) { 2110 nesting_error(stream); 2111 sparse_error(stream->top_if->pos, "unterminated preprocessor conditional"); 2112 stream->top_if = NULL; 2113 false_nesting = 0; 2114 } 2115 if (!stream->dirty) 2116 stream->constant = CONSTANT_FILE_YES; 2117 *list = next->next; 2118 continue; 2119 case TOKEN_STREAMBEGIN: 2120 *list = next->next; 2121 continue; 2122 2123 default: 2124 dirty_stream(stream); 2125 if (false_nesting) { 2126 *list = next->next; 2127 __free_token(next); 2128 continue; 2129 } 2130 2131 if (token_type(next) != TOKEN_IDENT || 2132 expand_one_symbol(list)) 2133 list = &next->next; 2134 } 2135 } 2136 } 2137 2138 void init_include_path(void) 2139 { 2140 FILE *fp; 2141 char path[256]; 2142 char arch[32]; 2143 char os[32]; 2144 2145 fp = popen("/bin/uname -m", "r"); 2146 if (!fp) 2147 return; 2148 if (!fgets(arch, sizeof(arch) - 1, fp)) 2149 return; 2150 pclose(fp); 2151 if (arch[strlen(arch) - 1] == '\n') 2152 arch[strlen(arch) - 1] = '\0'; 2153 2154 fp = popen("/bin/uname -o", "r"); 2155 if (!fp) 2156 return; 2157 fgets(os, sizeof(os) - 1, fp); 2158 pclose(fp); 2159 2160 if (strcmp(os, "GNU/Linux\n") != 0) 2161 return; 2162 strcpy(os, "linux-gnu"); 2163 2164 snprintf(path, sizeof(path), "/usr/include/%s-%s/", arch, os); 2165 add_pre_buffer("#add_system \"%s/\"\n", path); 2166 } 2167 2168 struct token * preprocess(struct token *token) 2169 { 2170 preprocessing = 1; 2171 init_preprocessor(); 2172 do_preprocess(&token); 2173 2174 // Drop all expressions from preprocessing, they're not used any more. 2175 // This is not true when we have multiple files, though ;/ 2176 // clear_expression_alloc(); 2177 preprocessing = 0; 2178 2179 return token; 2180 } 2181 2182 static void dump_macro(struct symbol *sym) 2183 { 2184 int nargs = sym->arglist ? sym->arglist->count.normal : 0; 2185 struct token *args[nargs]; 2186 struct token *token; 2187 2188 printf("#define %s", show_ident(sym->ident)); 2189 token = sym->arglist; 2190 if (token) { 2191 const char *sep = ""; 2192 int narg = 0; 2193 putchar('('); 2194 for (; !eof_token(token); token = token->next) { 2195 if (token_type(token) == TOKEN_ARG_COUNT) 2196 continue; 2197 printf("%s%s", sep, show_token(token)); 2198 args[narg++] = token; 2199 sep = ", "; 2200 } 2201 putchar(')'); 2202 } 2203 putchar(' '); 2204 2205 token = sym->expansion; 2206 while (!eof_token(token)) { 2207 struct token *next = token->next; 2208 switch (token_type(token)) { 2209 case TOKEN_UNTAINT: 2210 break; 2211 case TOKEN_MACRO_ARGUMENT: 2212 token = args[token->argnum]; 2213 /* fall-through */ 2214 default: 2215 printf("%s", show_token(token)); 2216 if (next->pos.whitespace) 2217 putchar(' '); 2218 } 2219 token = next; 2220 } 2221 putchar('\n'); 2222 } 2223 2224 void dump_macro_definitions(void) 2225 { 2226 struct ident *name; 2227 2228 FOR_EACH_PTR(macros, name) { 2229 struct symbol *sym = lookup_macro(name); 2230 if (sym) 2231 dump_macro(sym); 2232 } END_FOR_EACH_PTR(name); 2233 } 2234