1 /* 2 * Copyright (c) 2002 - 2009 Tony Finch <dot@dotat.at> 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 /* 27 * This code was derived from software contributed to Berkeley by Dave Yost. 28 * It was rewritten to support ANSI C by Tony Finch. The original version 29 * of unifdef carried the 4-clause BSD copyright licence. None of its code 30 * remains in this version (though some of the names remain) so it now 31 * carries a more liberal licence. 32 * 33 * The latest version is available from http://dotat.at/prog/unifdef 34 */ 35 36 static const char * const copyright[] = { 37 "@(#) Copyright (c) 2002 - 2009 Tony Finch <dot@dotat.at>\n", 38 "$dotat: unifdef/unifdef.c,v 1.190 2009/11/27 17:21:26 fanf2 Exp $", 39 }; 40 41 /* 42 * unifdef - remove ifdef'ed lines 43 * 44 * Wishlist: 45 * provide an option which will append the name of the 46 * appropriate symbol after #else's and #endif's 47 * provide an option which will check symbols after 48 * #else's and #endif's to see that they match their 49 * corresponding #ifdef or #ifndef 50 * 51 * The first two items above require better buffer handling, which would 52 * also make it possible to handle all "dodgy" directives correctly. 53 */ 54 55 #include <ctype.h> 56 #include <err.h> 57 #include <stdarg.h> 58 #include <stdbool.h> 59 #include <stdio.h> 60 #include <stdlib.h> 61 #include <string.h> 62 #include <unistd.h> 63 64 /* types of input lines: */ 65 typedef enum { 66 LT_TRUEI, /* a true #if with ignore flag */ 67 LT_FALSEI, /* a false #if with ignore flag */ 68 LT_IF, /* an unknown #if */ 69 LT_TRUE, /* a true #if */ 70 LT_FALSE, /* a false #if */ 71 LT_ELIF, /* an unknown #elif */ 72 LT_ELTRUE, /* a true #elif */ 73 LT_ELFALSE, /* a false #elif */ 74 LT_ELSE, /* #else */ 75 LT_ENDIF, /* #endif */ 76 LT_DODGY, /* flag: directive is not on one line */ 77 LT_DODGY_LAST = LT_DODGY + LT_ENDIF, 78 LT_PLAIN, /* ordinary line */ 79 LT_EOF, /* end of file */ 80 LT_ERROR, /* unevaluable #if */ 81 LT_COUNT 82 } Linetype; 83 84 static char const * const linetype_name[] = { 85 "TRUEI", "FALSEI", "IF", "TRUE", "FALSE", 86 "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF", 87 "DODGY TRUEI", "DODGY FALSEI", 88 "DODGY IF", "DODGY TRUE", "DODGY FALSE", 89 "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE", 90 "DODGY ELSE", "DODGY ENDIF", 91 "PLAIN", "EOF", "ERROR" 92 }; 93 94 /* state of #if processing */ 95 typedef enum { 96 IS_OUTSIDE, 97 IS_FALSE_PREFIX, /* false #if followed by false #elifs */ 98 IS_TRUE_PREFIX, /* first non-false #(el)if is true */ 99 IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */ 100 IS_FALSE_MIDDLE, /* a false #elif after a pass state */ 101 IS_TRUE_MIDDLE, /* a true #elif after a pass state */ 102 IS_PASS_ELSE, /* an else after a pass state */ 103 IS_FALSE_ELSE, /* an else after a true state */ 104 IS_TRUE_ELSE, /* an else after only false states */ 105 IS_FALSE_TRAILER, /* #elifs after a true are false */ 106 IS_COUNT 107 } Ifstate; 108 109 static char const * const ifstate_name[] = { 110 "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX", 111 "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE", 112 "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE", 113 "FALSE_TRAILER" 114 }; 115 116 /* state of comment parser */ 117 typedef enum { 118 NO_COMMENT = false, /* outside a comment */ 119 C_COMMENT, /* in a comment like this one */ 120 CXX_COMMENT, /* between // and end of line */ 121 STARTING_COMMENT, /* just after slash-backslash-newline */ 122 FINISHING_COMMENT, /* star-backslash-newline in a C comment */ 123 CHAR_LITERAL, /* inside '' */ 124 STRING_LITERAL /* inside "" */ 125 } Comment_state; 126 127 static char const * const comment_name[] = { 128 "NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING" 129 }; 130 131 /* state of preprocessor line parser */ 132 typedef enum { 133 LS_START, /* only space and comments on this line */ 134 LS_HASH, /* only space, comments, and a hash */ 135 LS_DIRTY /* this line can't be a preprocessor line */ 136 } Line_state; 137 138 static char const * const linestate_name[] = { 139 "START", "HASH", "DIRTY" 140 }; 141 142 /* 143 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1 144 */ 145 #define MAXDEPTH 64 /* maximum #if nesting */ 146 #define MAXLINE 4096 /* maximum length of line */ 147 #define MAXSYMS 4096 /* maximum number of symbols */ 148 149 /* 150 * Sometimes when editing a keyword the replacement text is longer, so 151 * we leave some space at the end of the tline buffer to accommodate this. 152 */ 153 #define EDITSLOP 10 154 155 /* 156 * Globals. 157 */ 158 159 static bool compblank; /* -B: compress blank lines */ 160 static bool lnblank; /* -b: blank deleted lines */ 161 static bool complement; /* -c: do the complement */ 162 static bool debugging; /* -d: debugging reports */ 163 static bool iocccok; /* -e: fewer IOCCC errors */ 164 static bool strictlogic; /* -K: keep ambiguous #ifs */ 165 static bool killconsts; /* -k: eval constant #ifs */ 166 static bool lnnum; /* -n: add #line directives */ 167 static bool symlist; /* -s: output symbol list */ 168 static bool text; /* -t: this is a text file */ 169 170 static const char *symname[MAXSYMS]; /* symbol name */ 171 static const char *value[MAXSYMS]; /* -Dsym=value */ 172 static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */ 173 static int nsyms; /* number of symbols */ 174 175 static FILE *input; /* input file pointer */ 176 static const char *filename; /* input file name */ 177 static int linenum; /* current line number */ 178 179 static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */ 180 static char *keyword; /* used for editing #elif's */ 181 182 static Comment_state incomment; /* comment parser state */ 183 static Line_state linestate; /* #if line parser state */ 184 static Ifstate ifstate[MAXDEPTH]; /* #if processor state */ 185 static bool ignoring[MAXDEPTH]; /* ignore comments state */ 186 static int stifline[MAXDEPTH]; /* start of current #if */ 187 static int depth; /* current #if nesting */ 188 static int delcount; /* count of deleted lines */ 189 static unsigned blankcount; /* count of blank lines */ 190 static unsigned blankmax; /* maximum recent blankcount */ 191 static bool constexpr; /* constant #if expression */ 192 193 static int exitstat; /* program exit status */ 194 195 static void addsym(bool, bool, char *); 196 static void debug(const char *, ...); 197 static void done(void); 198 static void error(const char *); 199 static int findsym(const char *); 200 static void flushline(bool); 201 static Linetype parseline(void); 202 static Linetype ifeval(const char **); 203 static void ignoreoff(void); 204 static void ignoreon(void); 205 static void keywordedit(const char *); 206 static void nest(void); 207 static void process(void); 208 static const char *skipargs(const char *); 209 static const char *skipcomment(const char *); 210 static const char *skipsym(const char *); 211 static void state(Ifstate); 212 static int strlcmp(const char *, const char *, size_t); 213 static void unnest(void); 214 static void usage(void); 215 216 #define endsym(c) (!isalnum((unsigned char)c) && c != '_') 217 218 /* 219 * The main program. 220 */ 221 int 222 main(int argc, char *argv[]) 223 { 224 int opt; 225 226 while ((opt = getopt(argc, argv, "i:D:U:I:BbcdeKklnst")) != -1) 227 switch (opt) { 228 case 'i': /* treat stuff controlled by these symbols as text */ 229 /* 230 * For strict backwards-compatibility the U or D 231 * should be immediately after the -i but it doesn't 232 * matter much if we relax that requirement. 233 */ 234 opt = *optarg++; 235 if (opt == 'D') 236 addsym(true, true, optarg); 237 else if (opt == 'U') 238 addsym(true, false, optarg); 239 else 240 usage(); 241 break; 242 case 'D': /* define a symbol */ 243 addsym(false, true, optarg); 244 break; 245 case 'U': /* undef a symbol */ 246 addsym(false, false, optarg); 247 break; 248 case 'I': 249 /* no-op for compatibility with cpp */ 250 break; 251 case 'B': /* compress blank lines around removed section */ 252 compblank = true; 253 break; 254 case 'b': /* blank deleted lines instead of omitting them */ 255 case 'l': /* backwards compatibility */ 256 lnblank = true; 257 break; 258 case 'c': /* treat -D as -U and vice versa */ 259 complement = true; 260 break; 261 case 'd': 262 debugging = true; 263 break; 264 case 'e': /* fewer errors from dodgy lines */ 265 iocccok = true; 266 break; 267 case 'K': /* keep ambiguous #ifs */ 268 strictlogic = true; 269 break; 270 case 'k': /* process constant #ifs */ 271 killconsts = true; 272 break; 273 case 'n': /* add #line directive after deleted lines */ 274 lnnum = true; 275 break; 276 case 's': /* only output list of symbols that control #ifs */ 277 symlist = true; 278 break; 279 case 't': /* don't parse C comments */ 280 text = true; 281 break; 282 default: 283 usage(); 284 } 285 argc -= optind; 286 argv += optind; 287 if (compblank && lnblank) 288 errx(2, "-B and -b are mutually exclusive"); 289 if (argc > 1) { 290 errx(2, "can only do one file"); 291 } else if (argc == 1 && strcmp(*argv, "-") != 0) { 292 filename = *argv; 293 input = fopen(filename, "r"); 294 if (input == NULL) 295 err(2, "can't open %s", filename); 296 } else { 297 filename = "[stdin]"; 298 input = stdin; 299 } 300 process(); 301 abort(); /* bug */ 302 } 303 304 static void 305 usage(void) 306 { 307 fprintf(stderr, "usage: unifdef [-BbcdeKknst] [-Ipath]" 308 " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n"); 309 exit(2); 310 } 311 312 /* 313 * A state transition function alters the global #if processing state 314 * in a particular way. The table below is indexed by the current 315 * processing state and the type of the current line. 316 * 317 * Nesting is handled by keeping a stack of states; some transition 318 * functions increase or decrease the depth. They also maintain the 319 * ignore state on a stack. In some complicated cases they have to 320 * alter the preprocessor directive, as follows. 321 * 322 * When we have processed a group that starts off with a known-false 323 * #if/#elif sequence (which has therefore been deleted) followed by a 324 * #elif that we don't understand and therefore must keep, we edit the 325 * latter into a #if to keep the nesting correct. 326 * 327 * When we find a true #elif in a group, the following block will 328 * always be kept and the rest of the sequence after the next #elif or 329 * #else will be discarded. We edit the #elif into a #else and the 330 * following directive to #endif since this has the desired behaviour. 331 * 332 * "Dodgy" directives are split across multiple lines, the most common 333 * example being a multi-line comment hanging off the right of the 334 * directive. We can handle them correctly only if there is no change 335 * from printing to dropping (or vice versa) caused by that directive. 336 * If the directive is the first of a group we have a choice between 337 * failing with an error, or passing it through unchanged instead of 338 * evaluating it. The latter is not the default to avoid questions from 339 * users about unifdef unexpectedly leaving behind preprocessor directives. 340 */ 341 typedef void state_fn(void); 342 343 /* report an error */ 344 static void Eelif (void) { error("Inappropriate #elif"); } 345 static void Eelse (void) { error("Inappropriate #else"); } 346 static void Eendif(void) { error("Inappropriate #endif"); } 347 static void Eeof (void) { error("Premature EOF"); } 348 static void Eioccc(void) { error("Obfuscated preprocessor control line"); } 349 /* plain line handling */ 350 static void print (void) { flushline(true); } 351 static void drop (void) { flushline(false); } 352 /* output lacks group's start line */ 353 static void Strue (void) { drop(); ignoreoff(); state(IS_TRUE_PREFIX); } 354 static void Sfalse(void) { drop(); ignoreoff(); state(IS_FALSE_PREFIX); } 355 static void Selse (void) { drop(); state(IS_TRUE_ELSE); } 356 /* print/pass this block */ 357 static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); } 358 static void Pelse (void) { print(); state(IS_PASS_ELSE); } 359 static void Pendif(void) { print(); unnest(); } 360 /* discard this block */ 361 static void Dfalse(void) { drop(); ignoreoff(); state(IS_FALSE_TRAILER); } 362 static void Delif (void) { drop(); ignoreoff(); state(IS_FALSE_MIDDLE); } 363 static void Delse (void) { drop(); state(IS_FALSE_ELSE); } 364 static void Dendif(void) { drop(); unnest(); } 365 /* first line of group */ 366 static void Fdrop (void) { nest(); Dfalse(); } 367 static void Fpass (void) { nest(); Pelif(); } 368 static void Ftrue (void) { nest(); Strue(); } 369 static void Ffalse(void) { nest(); Sfalse(); } 370 /* variable pedantry for obfuscated lines */ 371 static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); } 372 static void Oif (void) { if (!iocccok) Eioccc(); Fpass(); } 373 static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); } 374 /* ignore comments in this block */ 375 static void Idrop (void) { Fdrop(); ignoreon(); } 376 static void Itrue (void) { Ftrue(); ignoreon(); } 377 static void Ifalse(void) { Ffalse(); ignoreon(); } 378 /* edit this line */ 379 static void Mpass (void) { strncpy(keyword, "if ", 4); Pelif(); } 380 static void Mtrue (void) { keywordedit("else\n"); state(IS_TRUE_MIDDLE); } 381 static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); } 382 static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); } 383 384 static state_fn * const trans_table[IS_COUNT][LT_COUNT] = { 385 /* IS_OUTSIDE */ 386 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif, 387 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif, 388 print, done, abort }, 389 /* IS_FALSE_PREFIX */ 390 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif, 391 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc, 392 drop, Eeof, abort }, 393 /* IS_TRUE_PREFIX */ 394 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif, 395 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, 396 print, Eeof, abort }, 397 /* IS_PASS_MIDDLE */ 398 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif, 399 Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif, 400 print, Eeof, abort }, 401 /* IS_FALSE_MIDDLE */ 402 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif, 403 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, 404 drop, Eeof, abort }, 405 /* IS_TRUE_MIDDLE */ 406 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif, 407 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif, 408 print, Eeof, abort }, 409 /* IS_PASS_ELSE */ 410 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif, 411 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif, 412 print, Eeof, abort }, 413 /* IS_FALSE_ELSE */ 414 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif, 415 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc, 416 drop, Eeof, abort }, 417 /* IS_TRUE_ELSE */ 418 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif, 419 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc, 420 print, Eeof, abort }, 421 /* IS_FALSE_TRAILER */ 422 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif, 423 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc, 424 drop, Eeof, abort } 425 /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF 426 TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY) 427 PLAIN EOF ERROR */ 428 }; 429 430 /* 431 * State machine utility functions 432 */ 433 static void 434 done(void) 435 { 436 if (incomment) 437 error("EOF in comment"); 438 exit(exitstat); 439 } 440 static void 441 ignoreoff(void) 442 { 443 if (depth == 0) 444 abort(); /* bug */ 445 ignoring[depth] = ignoring[depth-1]; 446 } 447 static void 448 ignoreon(void) 449 { 450 ignoring[depth] = true; 451 } 452 static void 453 keywordedit(const char *replacement) 454 { 455 size_t size = tline + sizeof(tline) - keyword; 456 char *dst = keyword; 457 const char *src = replacement; 458 if (size != 0) { 459 while ((--size != 0) && (*src != '\0')) 460 *dst++ = *src++; 461 *dst = '\0'; 462 } 463 print(); 464 } 465 static void 466 nest(void) 467 { 468 if (depth > MAXDEPTH-1) 469 abort(); /* bug */ 470 if (depth == MAXDEPTH-1) 471 error("Too many levels of nesting"); 472 depth += 1; 473 stifline[depth] = linenum; 474 } 475 static void 476 unnest(void) 477 { 478 if (depth == 0) 479 abort(); /* bug */ 480 depth -= 1; 481 } 482 static void 483 state(Ifstate is) 484 { 485 ifstate[depth] = is; 486 } 487 488 /* 489 * Write a line to the output or not, according to command line options. 490 */ 491 static void 492 flushline(bool keep) 493 { 494 if (symlist) 495 return; 496 if (keep ^ complement) { 497 bool blankline = tline[strspn(tline, " \t\n")] == '\0'; 498 if (blankline && compblank && blankcount != blankmax) { 499 delcount += 1; 500 blankcount += 1; 501 } else { 502 if (lnnum && delcount > 0) 503 printf("#line %d\n", linenum); 504 fputs(tline, stdout); 505 delcount = 0; 506 blankmax = blankcount = blankline ? blankcount + 1 : 0; 507 } 508 } else { 509 if (lnblank) 510 putc('\n', stdout); 511 exitstat = 1; 512 delcount += 1; 513 blankcount = 0; 514 } 515 } 516 517 /* 518 * The driver for the state machine. 519 */ 520 static void 521 process(void) 522 { 523 Linetype lineval; 524 525 /* When compressing blank lines, act as if the file 526 is preceded by a large number of blank lines. */ 527 blankmax = blankcount = 1000; 528 for (;;) { 529 linenum++; 530 lineval = parseline(); 531 trans_table[ifstate[depth]][lineval](); 532 debug("process %s -> %s depth %d", 533 linetype_name[lineval], 534 ifstate_name[ifstate[depth]], depth); 535 } 536 } 537 538 /* 539 * Parse a line and determine its type. We keep the preprocessor line 540 * parser state between calls in the global variable linestate, with 541 * help from skipcomment(). 542 */ 543 static Linetype 544 parseline(void) 545 { 546 const char *cp; 547 int cursym; 548 int kwlen; 549 Linetype retval; 550 Comment_state wascomment; 551 552 if (fgets(tline, MAXLINE, input) == NULL) 553 return (LT_EOF); 554 retval = LT_PLAIN; 555 wascomment = incomment; 556 cp = skipcomment(tline); 557 if (linestate == LS_START) { 558 if (*cp == '#') { 559 linestate = LS_HASH; 560 cp = skipcomment(cp + 1); 561 } else if (*cp != '\0') 562 linestate = LS_DIRTY; 563 } 564 if (!incomment && linestate == LS_HASH) { 565 keyword = tline + (cp - tline); 566 cp = skipsym(cp); 567 kwlen = cp - keyword; 568 /* no way can we deal with a continuation inside a keyword */ 569 if (strncmp(cp, "\\\n", 2) == 0) 570 Eioccc(); 571 if (strlcmp("ifdef", keyword, kwlen) == 0 || 572 strlcmp("ifndef", keyword, kwlen) == 0) { 573 cp = skipcomment(cp); 574 if ((cursym = findsym(cp)) < 0) 575 retval = LT_IF; 576 else { 577 retval = (keyword[2] == 'n') 578 ? LT_FALSE : LT_TRUE; 579 if (value[cursym] == NULL) 580 retval = (retval == LT_TRUE) 581 ? LT_FALSE : LT_TRUE; 582 if (ignore[cursym]) 583 retval = (retval == LT_TRUE) 584 ? LT_TRUEI : LT_FALSEI; 585 } 586 cp = skipsym(cp); 587 } else if (strlcmp("if", keyword, kwlen) == 0) 588 retval = ifeval(&cp); 589 else if (strlcmp("elif", keyword, kwlen) == 0) 590 retval = ifeval(&cp) - LT_IF + LT_ELIF; 591 else if (strlcmp("else", keyword, kwlen) == 0) 592 retval = LT_ELSE; 593 else if (strlcmp("endif", keyword, kwlen) == 0) 594 retval = LT_ENDIF; 595 else { 596 linestate = LS_DIRTY; 597 retval = LT_PLAIN; 598 } 599 cp = skipcomment(cp); 600 if (*cp != '\0') { 601 linestate = LS_DIRTY; 602 if (retval == LT_TRUE || retval == LT_FALSE || 603 retval == LT_TRUEI || retval == LT_FALSEI) 604 retval = LT_IF; 605 if (retval == LT_ELTRUE || retval == LT_ELFALSE) 606 retval = LT_ELIF; 607 } 608 if (retval != LT_PLAIN && (wascomment || incomment)) { 609 retval += LT_DODGY; 610 if (incomment) 611 linestate = LS_DIRTY; 612 } 613 /* skipcomment normally changes the state, except 614 if the last line of the file lacks a newline, or 615 if there is too much whitespace in a directive */ 616 if (linestate == LS_HASH) { 617 size_t len = cp - tline; 618 if (fgets(tline + len, MAXLINE - len, input) == NULL) { 619 /* append the missing newline */ 620 tline[len+0] = '\n'; 621 tline[len+1] = '\0'; 622 cp++; 623 linestate = LS_START; 624 } else { 625 linestate = LS_DIRTY; 626 } 627 } 628 } 629 if (linestate == LS_DIRTY) { 630 while (*cp != '\0') 631 cp = skipcomment(cp + 1); 632 } 633 debug("parser %s comment %s line", 634 comment_name[incomment], linestate_name[linestate]); 635 return (retval); 636 } 637 638 /* 639 * These are the binary operators that are supported by the expression 640 * evaluator. 641 */ 642 static Linetype op_strict(int *p, int v, Linetype at, Linetype bt) { 643 if(at == LT_IF || bt == LT_IF) return (LT_IF); 644 return (*p = v, v ? LT_TRUE : LT_FALSE); 645 } 646 static Linetype op_lt(int *p, Linetype at, int a, Linetype bt, int b) { 647 return op_strict(p, a < b, at, bt); 648 } 649 static Linetype op_gt(int *p, Linetype at, int a, Linetype bt, int b) { 650 return op_strict(p, a > b, at, bt); 651 } 652 static Linetype op_le(int *p, Linetype at, int a, Linetype bt, int b) { 653 return op_strict(p, a <= b, at, bt); 654 } 655 static Linetype op_ge(int *p, Linetype at, int a, Linetype bt, int b) { 656 return op_strict(p, a >= b, at, bt); 657 } 658 static Linetype op_eq(int *p, Linetype at, int a, Linetype bt, int b) { 659 return op_strict(p, a == b, at, bt); 660 } 661 static Linetype op_ne(int *p, Linetype at, int a, Linetype bt, int b) { 662 return op_strict(p, a != b, at, bt); 663 } 664 static Linetype op_or(int *p, Linetype at, int a, Linetype bt, int b) { 665 if (!strictlogic && (at == LT_TRUE || bt == LT_TRUE)) 666 return (*p = 1, LT_TRUE); 667 return op_strict(p, a || b, at, bt); 668 } 669 static Linetype op_and(int *p, Linetype at, int a, Linetype bt, int b) { 670 if (!strictlogic && (at == LT_FALSE || bt == LT_FALSE)) 671 return (*p = 0, LT_FALSE); 672 return op_strict(p, a && b, at, bt); 673 } 674 675 /* 676 * An evaluation function takes three arguments, as follows: (1) a pointer to 677 * an element of the precedence table which lists the operators at the current 678 * level of precedence; (2) a pointer to an integer which will receive the 679 * value of the expression; and (3) a pointer to a char* that points to the 680 * expression to be evaluated and that is updated to the end of the expression 681 * when evaluation is complete. The function returns LT_FALSE if the value of 682 * the expression is zero, LT_TRUE if it is non-zero, LT_IF if the expression 683 * depends on an unknown symbol, or LT_ERROR if there is a parse failure. 684 */ 685 struct ops; 686 687 typedef Linetype eval_fn(const struct ops *, int *, const char **); 688 689 static eval_fn eval_table, eval_unary; 690 691 /* 692 * The precedence table. Expressions involving binary operators are evaluated 693 * in a table-driven way by eval_table. When it evaluates a subexpression it 694 * calls the inner function with its first argument pointing to the next 695 * element of the table. Innermost expressions have special non-table-driven 696 * handling. 697 */ 698 static const struct ops { 699 eval_fn *inner; 700 struct op { 701 const char *str; 702 Linetype (*fn)(int *, Linetype, int, Linetype, int); 703 } op[5]; 704 } eval_ops[] = { 705 { eval_table, { { "||", op_or } } }, 706 { eval_table, { { "&&", op_and } } }, 707 { eval_table, { { "==", op_eq }, 708 { "!=", op_ne } } }, 709 { eval_unary, { { "<=", op_le }, 710 { ">=", op_ge }, 711 { "<", op_lt }, 712 { ">", op_gt } } } 713 }; 714 715 /* 716 * Function for evaluating the innermost parts of expressions, 717 * viz. !expr (expr) number defined(symbol) symbol 718 * We reset the constexpr flag in the last two cases. 719 */ 720 static Linetype 721 eval_unary(const struct ops *ops, int *valp, const char **cpp) 722 { 723 const char *cp; 724 char *ep; 725 int sym; 726 bool defparen; 727 Linetype lt; 728 729 cp = skipcomment(*cpp); 730 if (*cp == '!') { 731 debug("eval%d !", ops - eval_ops); 732 cp++; 733 lt = eval_unary(ops, valp, &cp); 734 if (lt == LT_ERROR) 735 return (LT_ERROR); 736 if (lt != LT_IF) { 737 *valp = !*valp; 738 lt = *valp ? LT_TRUE : LT_FALSE; 739 } 740 } else if (*cp == '(') { 741 cp++; 742 debug("eval%d (", ops - eval_ops); 743 lt = eval_table(eval_ops, valp, &cp); 744 if (lt == LT_ERROR) 745 return (LT_ERROR); 746 cp = skipcomment(cp); 747 if (*cp++ != ')') 748 return (LT_ERROR); 749 } else if (isdigit((unsigned char)*cp)) { 750 debug("eval%d number", ops - eval_ops); 751 *valp = strtol(cp, &ep, 0); 752 if (ep == cp) 753 return (LT_ERROR); 754 lt = *valp ? LT_TRUE : LT_FALSE; 755 cp = skipsym(cp); 756 } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) { 757 cp = skipcomment(cp+7); 758 debug("eval%d defined", ops - eval_ops); 759 if (*cp == '(') { 760 cp = skipcomment(cp+1); 761 defparen = true; 762 } else { 763 defparen = false; 764 } 765 sym = findsym(cp); 766 if (sym < 0) { 767 lt = LT_IF; 768 } else { 769 *valp = (value[sym] != NULL); 770 lt = *valp ? LT_TRUE : LT_FALSE; 771 } 772 cp = skipsym(cp); 773 cp = skipcomment(cp); 774 if (defparen && *cp++ != ')') 775 return (LT_ERROR); 776 constexpr = false; 777 } else if (!endsym(*cp)) { 778 debug("eval%d symbol", ops - eval_ops); 779 sym = findsym(cp); 780 cp = skipsym(cp); 781 if (sym < 0) { 782 lt = LT_IF; 783 cp = skipargs(cp); 784 } else if (value[sym] == NULL) { 785 *valp = 0; 786 lt = LT_FALSE; 787 } else { 788 *valp = strtol(value[sym], &ep, 0); 789 if (*ep != '\0' || ep == value[sym]) 790 return (LT_ERROR); 791 lt = *valp ? LT_TRUE : LT_FALSE; 792 cp = skipargs(cp); 793 } 794 constexpr = false; 795 } else { 796 debug("eval%d bad expr", ops - eval_ops); 797 return (LT_ERROR); 798 } 799 800 *cpp = cp; 801 debug("eval%d = %d", ops - eval_ops, *valp); 802 return (lt); 803 } 804 805 /* 806 * Table-driven evaluation of binary operators. 807 */ 808 static Linetype 809 eval_table(const struct ops *ops, int *valp, const char **cpp) 810 { 811 const struct op *op; 812 const char *cp; 813 int val; 814 Linetype lt, rt; 815 816 debug("eval%d", ops - eval_ops); 817 cp = *cpp; 818 lt = ops->inner(ops+1, valp, &cp); 819 if (lt == LT_ERROR) 820 return (LT_ERROR); 821 for (;;) { 822 cp = skipcomment(cp); 823 for (op = ops->op; op->str != NULL; op++) 824 if (strncmp(cp, op->str, strlen(op->str)) == 0) 825 break; 826 if (op->str == NULL) 827 break; 828 cp += strlen(op->str); 829 debug("eval%d %s", ops - eval_ops, op->str); 830 rt = ops->inner(ops+1, &val, &cp); 831 if (rt == LT_ERROR) 832 return (LT_ERROR); 833 lt = op->fn(valp, lt, *valp, rt, val); 834 } 835 836 *cpp = cp; 837 debug("eval%d = %d", ops - eval_ops, *valp); 838 debug("eval%d lt = %s", ops - eval_ops, linetype_name[lt]); 839 return (lt); 840 } 841 842 /* 843 * Evaluate the expression on a #if or #elif line. If we can work out 844 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we 845 * return just a generic LT_IF. 846 */ 847 static Linetype 848 ifeval(const char **cpp) 849 { 850 int ret; 851 int val = 0; 852 853 debug("eval %s", *cpp); 854 constexpr = killconsts ? false : true; 855 ret = eval_table(eval_ops, &val, cpp); 856 debug("eval = %d", val); 857 return (constexpr ? LT_IF : ret == LT_ERROR ? LT_IF : ret); 858 } 859 860 /* 861 * Skip over comments, strings, and character literals and stop at the 862 * next character position that is not whitespace. Between calls we keep 863 * the comment state in the global variable incomment, and we also adjust 864 * the global variable linestate when we see a newline. 865 * XXX: doesn't cope with the buffer splitting inside a state transition. 866 */ 867 static const char * 868 skipcomment(const char *cp) 869 { 870 if (text || ignoring[depth]) { 871 for (; isspace((unsigned char)*cp); cp++) 872 if (*cp == '\n') 873 linestate = LS_START; 874 return (cp); 875 } 876 while (*cp != '\0') 877 /* don't reset to LS_START after a line continuation */ 878 if (strncmp(cp, "\\\n", 2) == 0) 879 cp += 2; 880 else switch (incomment) { 881 case NO_COMMENT: 882 if (strncmp(cp, "/\\\n", 3) == 0) { 883 incomment = STARTING_COMMENT; 884 cp += 3; 885 } else if (strncmp(cp, "/*", 2) == 0) { 886 incomment = C_COMMENT; 887 cp += 2; 888 } else if (strncmp(cp, "//", 2) == 0) { 889 incomment = CXX_COMMENT; 890 cp += 2; 891 } else if (strncmp(cp, "\'", 1) == 0) { 892 incomment = CHAR_LITERAL; 893 linestate = LS_DIRTY; 894 cp += 1; 895 } else if (strncmp(cp, "\"", 1) == 0) { 896 incomment = STRING_LITERAL; 897 linestate = LS_DIRTY; 898 cp += 1; 899 } else if (strncmp(cp, "\n", 1) == 0) { 900 linestate = LS_START; 901 cp += 1; 902 } else if (strchr(" \t", *cp) != NULL) { 903 cp += 1; 904 } else 905 return (cp); 906 continue; 907 case CXX_COMMENT: 908 if (strncmp(cp, "\n", 1) == 0) { 909 incomment = NO_COMMENT; 910 linestate = LS_START; 911 } 912 cp += 1; 913 continue; 914 case CHAR_LITERAL: 915 case STRING_LITERAL: 916 if ((incomment == CHAR_LITERAL && cp[0] == '\'') || 917 (incomment == STRING_LITERAL && cp[0] == '\"')) { 918 incomment = NO_COMMENT; 919 cp += 1; 920 } else if (cp[0] == '\\') { 921 if (cp[1] == '\0') 922 cp += 1; 923 else 924 cp += 2; 925 } else if (strncmp(cp, "\n", 1) == 0) { 926 if (incomment == CHAR_LITERAL) 927 error("unterminated char literal"); 928 else 929 error("unterminated string literal"); 930 } else 931 cp += 1; 932 continue; 933 case C_COMMENT: 934 if (strncmp(cp, "*\\\n", 3) == 0) { 935 incomment = FINISHING_COMMENT; 936 cp += 3; 937 } else if (strncmp(cp, "*/", 2) == 0) { 938 incomment = NO_COMMENT; 939 cp += 2; 940 } else 941 cp += 1; 942 continue; 943 case STARTING_COMMENT: 944 if (*cp == '*') { 945 incomment = C_COMMENT; 946 cp += 1; 947 } else if (*cp == '/') { 948 incomment = CXX_COMMENT; 949 cp += 1; 950 } else { 951 incomment = NO_COMMENT; 952 linestate = LS_DIRTY; 953 } 954 continue; 955 case FINISHING_COMMENT: 956 if (*cp == '/') { 957 incomment = NO_COMMENT; 958 cp += 1; 959 } else 960 incomment = C_COMMENT; 961 continue; 962 default: 963 abort(); /* bug */ 964 } 965 return (cp); 966 } 967 968 /* 969 * Skip macro arguments. 970 */ 971 static const char * 972 skipargs(const char *cp) 973 { 974 const char *ocp = cp; 975 int level = 0; 976 cp = skipcomment(cp); 977 if (*cp != '(') 978 return (cp); 979 do { 980 if (*cp == '(') 981 level++; 982 if (*cp == ')') 983 level--; 984 cp = skipcomment(cp+1); 985 } while (level != 0 && *cp != '\0'); 986 if (level == 0) 987 return (cp); 988 else 989 /* Rewind and re-detect the syntax error later. */ 990 return (ocp); 991 } 992 993 /* 994 * Skip over an identifier. 995 */ 996 static const char * 997 skipsym(const char *cp) 998 { 999 while (!endsym(*cp)) 1000 ++cp; 1001 return (cp); 1002 } 1003 1004 /* 1005 * Look for the symbol in the symbol table. If it is found, we return 1006 * the symbol table index, else we return -1. 1007 */ 1008 static int 1009 findsym(const char *str) 1010 { 1011 const char *cp; 1012 int symind; 1013 1014 cp = skipsym(str); 1015 if (cp == str) 1016 return (-1); 1017 if (symlist) { 1018 printf("%.*s\n", (int)(cp-str), str); 1019 /* we don't care about the value of the symbol */ 1020 return (0); 1021 } 1022 for (symind = 0; symind < nsyms; ++symind) { 1023 if (strlcmp(symname[symind], str, cp-str) == 0) { 1024 debug("findsym %s %s", symname[symind], 1025 value[symind] ? value[symind] : ""); 1026 return (symind); 1027 } 1028 } 1029 return (-1); 1030 } 1031 1032 /* 1033 * Add a symbol to the symbol table. 1034 */ 1035 static void 1036 addsym(bool ignorethis, bool definethis, char *sym) 1037 { 1038 int symind; 1039 char *val; 1040 1041 symind = findsym(sym); 1042 if (symind < 0) { 1043 if (nsyms >= MAXSYMS) 1044 errx(2, "too many symbols"); 1045 symind = nsyms++; 1046 } 1047 symname[symind] = sym; 1048 ignore[symind] = ignorethis; 1049 val = sym + (skipsym(sym) - sym); 1050 if (definethis) { 1051 if (*val == '=') { 1052 value[symind] = val+1; 1053 *val = '\0'; 1054 } else if (*val == '\0') 1055 value[symind] = ""; 1056 else 1057 usage(); 1058 } else { 1059 if (*val != '\0') 1060 usage(); 1061 value[symind] = NULL; 1062 } 1063 } 1064 1065 /* 1066 * Compare s with n characters of t. 1067 * The same as strncmp() except that it checks that s[n] == '\0'. 1068 */ 1069 static int 1070 strlcmp(const char *s, const char *t, size_t n) 1071 { 1072 while (n-- && *t != '\0') 1073 if (*s != *t) 1074 return ((unsigned char)*s - (unsigned char)*t); 1075 else 1076 ++s, ++t; 1077 return ((unsigned char)*s); 1078 } 1079 1080 /* 1081 * Diagnostics. 1082 */ 1083 static void 1084 debug(const char *msg, ...) 1085 { 1086 va_list ap; 1087 1088 if (debugging) { 1089 va_start(ap, msg); 1090 vwarnx(msg, ap); 1091 va_end(ap); 1092 } 1093 } 1094 1095 static void 1096 error(const char *msg) 1097 { 1098 if (depth == 0) 1099 warnx("%s: %d: %s", filename, linenum, msg); 1100 else 1101 warnx("%s: %d: %s (#if line %d depth %d)", 1102 filename, linenum, msg, stifline[depth], depth); 1103 errx(2, "output may be truncated"); 1104 } 1105