1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1985 Sun Microsystems, Inc. 5 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 6 * Copyright (c) 1980, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #if 0 39 #endif 40 41 #include <sys/cdefs.h> 42 #include <sys/param.h> 43 #include <sys/capsicum.h> 44 #include <capsicum_helpers.h> 45 #include <err.h> 46 #include <errno.h> 47 #include <fcntl.h> 48 #include <unistd.h> 49 #include <stdio.h> 50 #include <stdlib.h> 51 #include <string.h> 52 #include <ctype.h> 53 #include "indent_globs.h" 54 #include "indent_codes.h" 55 #include "indent.h" 56 57 /* Globals */ 58 FILE *input, *output; 59 char *labbuf, *s_lab, *e_lab, *l_lab; 60 char *codebuf, *s_code, *e_code, *l_code; 61 char *combuf, *s_com, *e_com, *l_com; 62 char *tokenbuf, *s_token, *e_token, *l_token; 63 char *in_buffer, *in_buffer_limit; 64 char *buf_ptr, *buf_end; 65 66 char sc_buf[sc_size]; 67 68 char *save_com, *sc_end; 69 char *bp_save; 70 char *be_save; 71 72 struct options opt; 73 int line_no; 74 75 struct parser_state ps; 76 int ifdef_level; 77 struct parser_state state_stack[5]; 78 struct parser_state match_state[5]; 79 80 81 static void bakcopy(void); 82 static void indent_declaration(int, int); 83 84 const char *in_name = "Standard Input"; /* will always point to name of input 85 * file */ 86 const char *out_name = "Standard Output"; /* will always point to name 87 * of output file */ 88 const char *simple_backup_suffix = ".BAK"; /* Suffix to use for backup 89 * files */ 90 char bakfile[MAXPATHLEN] = ""; 91 92 int 93 main(int argc, char **argv) 94 { 95 cap_rights_t rights; 96 97 int dec_ind; /* current indentation for declarations */ 98 int di_stack[20]; /* a stack of structure indentation levels */ 99 int force_nl; /* when true, code must be broken */ 100 int hd_type = 0; /* used to store type of stmt for if (...), 101 * for (...), etc */ 102 int i; /* local loop counter */ 103 int scase; /* set to true when we see a case, so we will 104 * know what to do with the following colon */ 105 int sp_sw; /* when true, we are in the expression of 106 * if(...), while(...), etc. */ 107 int squest; /* when this is positive, we have seen a ? 108 * without the matching : in a <c>?<s>:<s> 109 * construct */ 110 const char *t_ptr; /* used for copying tokens */ 111 int tabs_to_var; /* true if using tabs to indent to var name */ 112 int type_code; /* the type of token, returned by lexi */ 113 114 int last_else = 0; /* true iff last keyword was an else */ 115 const char *profile_name = NULL; 116 const char *envval = NULL; 117 struct parser_state transient_state; /* a copy for lookup */ 118 119 /*-----------------------------------------------*\ 120 | INITIALIZATION | 121 \*-----------------------------------------------*/ 122 123 found_err = 0; 124 125 ps.p_stack[0] = stmt; /* this is the parser's stack */ 126 ps.last_nl = true; /* this is true if the last thing scanned was 127 * a newline */ 128 ps.last_token = semicolon; 129 combuf = (char *) malloc(bufsize); 130 if (combuf == NULL) 131 err(1, NULL); 132 labbuf = (char *) malloc(bufsize); 133 if (labbuf == NULL) 134 err(1, NULL); 135 codebuf = (char *) malloc(bufsize); 136 if (codebuf == NULL) 137 err(1, NULL); 138 tokenbuf = (char *) malloc(bufsize); 139 if (tokenbuf == NULL) 140 err(1, NULL); 141 alloc_typenames(); 142 init_constant_tt(); 143 l_com = combuf + bufsize - 5; 144 l_lab = labbuf + bufsize - 5; 145 l_code = codebuf + bufsize - 5; 146 l_token = tokenbuf + bufsize - 5; 147 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and 148 * comment buffers */ 149 combuf[1] = codebuf[1] = labbuf[1] = '\0'; 150 opt.else_if = 1; /* Default else-if special processing to on */ 151 s_lab = e_lab = labbuf + 1; 152 s_code = e_code = codebuf + 1; 153 s_com = e_com = combuf + 1; 154 s_token = e_token = tokenbuf + 1; 155 156 in_buffer = (char *) malloc(10); 157 if (in_buffer == NULL) 158 err(1, NULL); 159 in_buffer_limit = in_buffer + 8; 160 buf_ptr = buf_end = in_buffer; 161 line_no = 1; 162 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false; 163 sp_sw = force_nl = false; 164 ps.in_or_st = false; 165 ps.bl_line = true; 166 dec_ind = 0; 167 di_stack[ps.dec_nest = 0] = 0; 168 ps.want_blank = ps.in_stmt = ps.ind_stmt = false; 169 170 scase = ps.pcase = false; 171 squest = 0; 172 sc_end = NULL; 173 bp_save = NULL; 174 be_save = NULL; 175 176 output = NULL; 177 tabs_to_var = 0; 178 179 envval = getenv("SIMPLE_BACKUP_SUFFIX"); 180 if (envval) 181 simple_backup_suffix = envval; 182 183 /*--------------------------------------------------*\ 184 | COMMAND LINE SCAN | 185 \*--------------------------------------------------*/ 186 187 #ifdef undef 188 max_col = 78; /* -l78 */ 189 lineup_to_parens = 1; /* -lp */ 190 lineup_to_parens_always = 0; /* -nlpl */ 191 ps.ljust_decl = 0; /* -ndj */ 192 ps.com_ind = 33; /* -c33 */ 193 star_comment_cont = 1; /* -sc */ 194 ps.ind_size = 8; /* -i8 */ 195 verbose = 0; 196 ps.decl_indent = 16; /* -di16 */ 197 ps.local_decl_indent = -1; /* if this is not set to some nonnegative value 198 * by an arg, we will set this equal to 199 * ps.decl_ind */ 200 ps.indent_parameters = 1; /* -ip */ 201 ps.decl_com_ind = 0; /* if this is not set to some positive value 202 * by an arg, we will set this equal to 203 * ps.com_ind */ 204 btype_2 = 1; /* -br */ 205 cuddle_else = 1; /* -ce */ 206 ps.unindent_displace = 0; /* -d0 */ 207 ps.case_indent = 0; /* -cli0 */ 208 format_block_comments = 1; /* -fcb */ 209 format_col1_comments = 1; /* -fc1 */ 210 procnames_start_line = 1; /* -psl */ 211 proc_calls_space = 0; /* -npcs */ 212 comment_delimiter_on_blankline = 1; /* -cdb */ 213 ps.leave_comma = 1; /* -nbc */ 214 #endif 215 216 for (i = 1; i < argc; ++i) 217 if (strcmp(argv[i], "-npro") == 0) 218 break; 219 else if (argv[i][0] == '-' && argv[i][1] == 'P' && argv[i][2] != '\0') 220 profile_name = argv[i]; /* non-empty -P (set profile) */ 221 set_defaults(); 222 if (i >= argc) 223 set_profile(profile_name); 224 225 for (i = 1; i < argc; ++i) { 226 227 /* 228 * look thru args (if any) for changes to defaults 229 */ 230 if (argv[i][0] != '-') {/* no flag on parameter */ 231 if (input == NULL) { /* we must have the input file */ 232 in_name = argv[i]; /* remember name of input file */ 233 input = fopen(in_name, "r"); 234 if (input == NULL) /* check for open error */ 235 err(1, "%s", in_name); 236 continue; 237 } 238 else if (output == NULL) { /* we have the output file */ 239 out_name = argv[i]; /* remember name of output file */ 240 if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite 241 * the file */ 242 errx(1, "input and output files must be different"); 243 } 244 output = fopen(out_name, "w"); 245 if (output == NULL) /* check for create error */ 246 err(1, "%s", out_name); 247 continue; 248 } 249 errx(1, "unknown parameter: %s", argv[i]); 250 } 251 else 252 set_option(argv[i]); 253 } /* end of for */ 254 if (input == NULL) 255 input = stdin; 256 if (output == NULL) { 257 if (input == stdin) 258 output = stdout; 259 else { 260 out_name = in_name; 261 bakcopy(); 262 } 263 } 264 265 /* Restrict input/output descriptors and enter Capsicum sandbox. */ 266 cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE); 267 if (caph_rights_limit(fileno(output), &rights) < 0) 268 err(EXIT_FAILURE, "unable to limit rights for %s", out_name); 269 cap_rights_init(&rights, CAP_FSTAT, CAP_READ); 270 if (caph_rights_limit(fileno(input), &rights) < 0) 271 err(EXIT_FAILURE, "unable to limit rights for %s", in_name); 272 if (caph_enter() < 0) 273 err(EXIT_FAILURE, "unable to enter capability mode"); 274 275 if (opt.com_ind <= 1) 276 opt.com_ind = 2; /* don't put normal comments before column 2 */ 277 if (opt.block_comment_max_col <= 0) 278 opt.block_comment_max_col = opt.max_col; 279 if (opt.local_decl_indent < 0) /* if not specified by user, set this */ 280 opt.local_decl_indent = opt.decl_indent; 281 if (opt.decl_com_ind <= 0) /* if not specified by user, set this */ 282 opt.decl_com_ind = opt.ljust_decl ? (opt.com_ind <= 10 ? 2 : opt.com_ind - 8) : opt.com_ind; 283 if (opt.continuation_indent == 0) 284 opt.continuation_indent = opt.ind_size; 285 fill_buffer(); /* get first batch of stuff into input buffer */ 286 287 parse(semicolon); 288 { 289 char *p = buf_ptr; 290 int col = 1; 291 292 while (1) { 293 if (*p == ' ') 294 col++; 295 else if (*p == '\t') 296 col = opt.tabsize * (1 + (col - 1) / opt.tabsize) + 1; 297 else 298 break; 299 p++; 300 } 301 if (col > opt.ind_size) 302 ps.ind_level = ps.i_l_follow = col / opt.ind_size; 303 } 304 305 /* 306 * START OF MAIN LOOP 307 */ 308 309 while (1) { /* this is the main loop. it will go until we 310 * reach eof */ 311 int comment_buffered = false; 312 313 type_code = lexi(&ps); /* lexi reads one token. The actual 314 * characters read are stored in "token". lexi 315 * returns a code indicating the type of token */ 316 317 /* 318 * The following code moves newlines and comments following an if (), 319 * while (), else, etc. up to the start of the following stmt to 320 * a buffer. This allows proper handling of both kinds of brace 321 * placement (-br, -bl) and cuddling "else" (-ce). 322 */ 323 324 while (ps.search_brace) { 325 switch (type_code) { 326 case newline: 327 if (sc_end == NULL) { 328 save_com = sc_buf; 329 save_com[0] = save_com[1] = ' '; 330 sc_end = &save_com[2]; 331 } 332 *sc_end++ = '\n'; 333 /* 334 * We may have inherited a force_nl == true from the previous 335 * token (like a semicolon). But once we know that a newline 336 * has been scanned in this loop, force_nl should be false. 337 * 338 * However, the force_nl == true must be preserved if newline 339 * is never scanned in this loop, so this assignment cannot be 340 * done earlier. 341 */ 342 force_nl = false; 343 case form_feed: 344 break; 345 case comment: 346 if (sc_end == NULL) { 347 /* 348 * Copy everything from the start of the line, because 349 * pr_comment() will use that to calculate original 350 * indentation of a boxed comment. 351 */ 352 memcpy(sc_buf, in_buffer, buf_ptr - in_buffer - 4); 353 save_com = sc_buf + (buf_ptr - in_buffer - 4); 354 save_com[0] = save_com[1] = ' '; 355 sc_end = &save_com[2]; 356 } 357 comment_buffered = true; 358 *sc_end++ = '/'; /* copy in start of comment */ 359 *sc_end++ = '*'; 360 for (;;) { /* loop until we get to the end of the comment */ 361 *sc_end = *buf_ptr++; 362 if (buf_ptr >= buf_end) 363 fill_buffer(); 364 if (*sc_end++ == '*' && *buf_ptr == '/') 365 break; /* we are at end of comment */ 366 if (sc_end >= &save_com[sc_size]) { /* check for temp buffer 367 * overflow */ 368 diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever"); 369 fflush(output); 370 exit(1); 371 } 372 } 373 *sc_end++ = '/'; /* add ending slash */ 374 if (++buf_ptr >= buf_end) /* get past / in buffer */ 375 fill_buffer(); 376 break; 377 case lbrace: 378 /* 379 * Put KNF-style lbraces before the buffered up tokens and 380 * jump out of this loop in order to avoid copying the token 381 * again under the default case of the switch below. 382 */ 383 if (sc_end != NULL && opt.btype_2) { 384 save_com[0] = '{'; 385 /* 386 * Originally the lbrace may have been alone on its own 387 * line, but it will be moved into "the else's line", so 388 * if there was a newline resulting from the "{" before, 389 * it must be scanned now and ignored. 390 */ 391 while (isspace((unsigned char)*buf_ptr)) { 392 if (++buf_ptr >= buf_end) 393 fill_buffer(); 394 if (*buf_ptr == '\n') 395 break; 396 } 397 goto sw_buffer; 398 } 399 /* FALLTHROUGH */ 400 default: /* it is the start of a normal statement */ 401 { 402 int remove_newlines; 403 404 remove_newlines = 405 /* "} else" */ 406 (type_code == sp_nparen && *token == 'e' && 407 e_code != s_code && e_code[-1] == '}') 408 /* "else if" */ 409 || (type_code == sp_paren && *token == 'i' && 410 last_else && opt.else_if); 411 if (remove_newlines) 412 force_nl = false; 413 if (sc_end == NULL) { /* ignore buffering if 414 * comment wasn't saved up */ 415 ps.search_brace = false; 416 goto check_type; 417 } 418 while (sc_end > save_com && isblank((unsigned char)sc_end[-1])) { 419 sc_end--; 420 } 421 if (opt.swallow_optional_blanklines || 422 (!comment_buffered && remove_newlines)) { 423 force_nl = !remove_newlines; 424 while (sc_end > save_com && sc_end[-1] == '\n') { 425 sc_end--; 426 } 427 } 428 if (force_nl) { /* if we should insert a nl here, put 429 * it into the buffer */ 430 force_nl = false; 431 --line_no; /* this will be re-increased when the 432 * newline is read from the buffer */ 433 *sc_end++ = '\n'; 434 *sc_end++ = ' '; 435 if (opt.verbose) /* print error msg if the line was 436 * not already broken */ 437 diag2(0, "Line broken"); 438 } 439 for (t_ptr = token; *t_ptr; ++t_ptr) 440 *sc_end++ = *t_ptr; 441 442 sw_buffer: 443 ps.search_brace = false; /* stop looking for start of 444 * stmt */ 445 bp_save = buf_ptr; /* save current input buffer */ 446 be_save = buf_end; 447 buf_ptr = save_com; /* fix so that subsequent calls to 448 * lexi will take tokens out of 449 * save_com */ 450 *sc_end++ = ' ';/* add trailing blank, just in case */ 451 buf_end = sc_end; 452 sc_end = NULL; 453 break; 454 } 455 } /* end of switch */ 456 /* 457 * We must make this check, just in case there was an unexpected 458 * EOF. 459 */ 460 if (type_code != 0) { 461 /* 462 * The only intended purpose of calling lexi() below is to 463 * categorize the next token in order to decide whether to 464 * continue buffering forthcoming tokens. Once the buffering 465 * is over, lexi() will be called again elsewhere on all of 466 * the tokens - this time for normal processing. 467 * 468 * Calling it for this purpose is a bug, because lexi() also 469 * changes the parser state and discards leading whitespace, 470 * which is needed mostly for comment-related considerations. 471 * 472 * Work around the former problem by giving lexi() a copy of 473 * the current parser state and discard it if the call turned 474 * out to be just a look ahead. 475 * 476 * Work around the latter problem by copying all whitespace 477 * characters into the buffer so that the later lexi() call 478 * will read them. 479 */ 480 if (sc_end != NULL) { 481 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 482 *sc_end++ = *buf_ptr++; 483 if (sc_end >= &save_com[sc_size]) { 484 errx(1, "input too long"); 485 } 486 } 487 if (buf_ptr >= buf_end) { 488 fill_buffer(); 489 } 490 } 491 transient_state = ps; 492 type_code = lexi(&transient_state); /* read another token */ 493 if (type_code != newline && type_code != form_feed && 494 type_code != comment && !transient_state.search_brace) { 495 ps = transient_state; 496 } 497 } 498 } /* end of while (search_brace) */ 499 last_else = 0; 500 check_type: 501 if (type_code == 0) { /* we got eof */ 502 if (s_lab != e_lab || s_code != e_code 503 || s_com != e_com) /* must dump end of line */ 504 dump_line(); 505 if (ps.tos > 1) /* check for balanced braces */ 506 diag2(1, "Stuff missing from end of file"); 507 508 if (opt.verbose) { 509 printf("There were %d output lines and %d comments\n", 510 ps.out_lines, ps.out_coms); 511 printf("(Lines with comments)/(Lines with code): %6.3f\n", 512 (1.0 * ps.com_lines) / code_lines); 513 } 514 fflush(output); 515 exit(found_err); 516 } 517 if ( 518 (type_code != comment) && 519 (type_code != newline) && 520 (type_code != preesc) && 521 (type_code != form_feed)) { 522 if (force_nl && 523 (type_code != semicolon) && 524 (type_code != lbrace || !opt.btype_2)) { 525 /* we should force a broken line here */ 526 if (opt.verbose) 527 diag2(0, "Line broken"); 528 dump_line(); 529 ps.want_blank = false; /* dont insert blank at line start */ 530 force_nl = false; 531 } 532 ps.in_stmt = true; /* turn on flag which causes an extra level of 533 * indentation. this is turned off by a ; or 534 * '}' */ 535 if (s_com != e_com) { /* the turkey has embedded a comment 536 * in a line. fix it */ 537 int len = e_com - s_com; 538 539 CHECK_SIZE_CODE(len + 3); 540 *e_code++ = ' '; 541 memcpy(e_code, s_com, len); 542 e_code += len; 543 *e_code++ = ' '; 544 *e_code = '\0'; /* null terminate code sect */ 545 ps.want_blank = false; 546 e_com = s_com; 547 } 548 } 549 else if (type_code != comment) /* preserve force_nl thru a comment */ 550 force_nl = false; /* cancel forced newline after newline, form 551 * feed, etc */ 552 553 554 555 /*-----------------------------------------------------*\ 556 | do switch on type of token scanned | 557 \*-----------------------------------------------------*/ 558 CHECK_SIZE_CODE(3); /* maximum number of increments of e_code 559 * before the next CHECK_SIZE_CODE or 560 * dump_line() is 2. After that there's the 561 * final increment for the null character. */ 562 switch (type_code) { /* now, decide what to do with the token */ 563 564 case form_feed: /* found a form feed in line */ 565 ps.use_ff = true; /* a form feed is treated much like a newline */ 566 dump_line(); 567 ps.want_blank = false; 568 break; 569 570 case newline: 571 if (ps.last_token != comma || ps.p_l_follow > 0 572 || !opt.leave_comma || ps.block_init || !break_comma || s_com != e_com) { 573 dump_line(); 574 ps.want_blank = false; 575 } 576 ++line_no; /* keep track of input line number */ 577 break; 578 579 case lparen: /* got a '(' or '[' */ 580 /* count parens to make Healy happy */ 581 if (++ps.p_l_follow == nitems(ps.paren_indents)) { 582 diag3(0, "Reached internal limit of %d unclosed parens", 583 nitems(ps.paren_indents)); 584 ps.p_l_follow--; 585 } 586 if (*token == '[') 587 /* not a function pointer declaration or a function call */; 588 else if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent && 589 ps.procname[0] == '\0' && ps.paren_level == 0) { 590 /* function pointer declarations */ 591 indent_declaration(dec_ind, tabs_to_var); 592 ps.dumped_decl_indent = true; 593 } 594 else if (ps.want_blank && 595 ((ps.last_token != ident && ps.last_token != funcname) || 596 opt.proc_calls_space || 597 /* offsetof (1) is never allowed a space; sizeof (2) gets 598 * one iff -bs; all other keywords (>2) always get a space 599 * before lparen */ 600 ps.keyword + opt.Bill_Shannon > 2)) 601 *e_code++ = ' '; 602 ps.want_blank = false; 603 *e_code++ = token[0]; 604 ps.paren_indents[ps.p_l_follow - 1] = count_spaces_until(1, s_code, e_code) - 1; 605 if (sp_sw && ps.p_l_follow == 1 && opt.extra_expression_indent 606 && ps.paren_indents[0] < 2 * opt.ind_size) 607 ps.paren_indents[0] = 2 * opt.ind_size; 608 if (ps.in_or_st && *token == '(' && ps.tos <= 2) { 609 /* 610 * this is a kluge to make sure that declarations will be 611 * aligned right if proc decl has an explicit type on it, i.e. 612 * "int a(x) {..." 613 */ 614 parse(semicolon); /* I said this was a kluge... */ 615 ps.in_or_st = false; /* turn off flag for structure decl or 616 * initialization */ 617 } 618 /* parenthesized type following sizeof or offsetof is not a cast */ 619 if (ps.keyword == 1 || ps.keyword == 2) 620 ps.not_cast_mask |= 1 << ps.p_l_follow; 621 break; 622 623 case rparen: /* got a ')' or ']' */ 624 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) { 625 ps.last_u_d = true; 626 ps.cast_mask &= (1 << ps.p_l_follow) - 1; 627 ps.want_blank = opt.space_after_cast; 628 } else 629 ps.want_blank = true; 630 ps.not_cast_mask &= (1 << ps.p_l_follow) - 1; 631 if (--ps.p_l_follow < 0) { 632 ps.p_l_follow = 0; 633 diag3(0, "Extra %c", *token); 634 } 635 if (e_code == s_code) /* if the paren starts the line */ 636 ps.paren_level = ps.p_l_follow; /* then indent it */ 637 638 *e_code++ = token[0]; 639 640 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if 641 * (...), or some such */ 642 sp_sw = false; 643 force_nl = true;/* must force newline after if */ 644 ps.last_u_d = true; /* inform lexi that a following 645 * operator is unary */ 646 ps.in_stmt = false; /* dont use stmt continuation 647 * indentation */ 648 649 parse(hd_type); /* let parser worry about if, or whatever */ 650 } 651 ps.search_brace = opt.btype_2; /* this should ensure that 652 * constructs such as main(){...} 653 * and int[]{...} have their braces 654 * put in the right place */ 655 break; 656 657 case unary_op: /* this could be any unary operation */ 658 if (!ps.dumped_decl_indent && ps.in_decl && !ps.block_init && 659 ps.procname[0] == '\0' && ps.paren_level == 0) { 660 /* pointer declarations */ 661 662 /* 663 * if this is a unary op in a declaration, we should indent 664 * this token 665 */ 666 for (i = 0; token[i]; ++i) 667 /* find length of token */; 668 indent_declaration(dec_ind - i, tabs_to_var); 669 ps.dumped_decl_indent = true; 670 } 671 else if (ps.want_blank) 672 *e_code++ = ' '; 673 674 { 675 int len = e_token - s_token; 676 677 CHECK_SIZE_CODE(len); 678 memcpy(e_code, token, len); 679 e_code += len; 680 } 681 ps.want_blank = false; 682 break; 683 684 case binary_op: /* any binary operation */ 685 { 686 int len = e_token - s_token; 687 688 CHECK_SIZE_CODE(len + 1); 689 if (ps.want_blank) 690 *e_code++ = ' '; 691 memcpy(e_code, token, len); 692 e_code += len; 693 } 694 ps.want_blank = true; 695 break; 696 697 case postop: /* got a trailing ++ or -- */ 698 *e_code++ = token[0]; 699 *e_code++ = token[1]; 700 ps.want_blank = true; 701 break; 702 703 case question: /* got a ? */ 704 squest++; /* this will be used when a later colon 705 * appears so we can distinguish the 706 * <c>?<n>:<n> construct */ 707 if (ps.want_blank) 708 *e_code++ = ' '; 709 *e_code++ = '?'; 710 ps.want_blank = true; 711 break; 712 713 case casestmt: /* got word 'case' or 'default' */ 714 scase = true; /* so we can process the later colon properly */ 715 goto copy_id; 716 717 case colon: /* got a ':' */ 718 if (squest > 0) { /* it is part of the <c>?<n>: <n> construct */ 719 --squest; 720 if (ps.want_blank) 721 *e_code++ = ' '; 722 *e_code++ = ':'; 723 ps.want_blank = true; 724 break; 725 } 726 if (ps.in_or_st) { 727 *e_code++ = ':'; 728 ps.want_blank = false; 729 break; 730 } 731 ps.in_stmt = false; /* seeing a label does not imply we are in a 732 * stmt */ 733 /* 734 * turn everything so far into a label 735 */ 736 { 737 int len = e_code - s_code; 738 739 CHECK_SIZE_LAB(len + 3); 740 memcpy(e_lab, s_code, len); 741 e_lab += len; 742 *e_lab++ = ':'; 743 *e_lab = '\0'; 744 e_code = s_code; 745 } 746 force_nl = ps.pcase = scase; /* ps.pcase will be used by 747 * dump_line to decide how to 748 * indent the label. force_nl 749 * will force a case n: to be 750 * on a line by itself */ 751 scase = false; 752 ps.want_blank = false; 753 break; 754 755 case semicolon: /* got a ';' */ 756 if (ps.dec_nest == 0) 757 ps.in_or_st = false;/* we are not in an initialization or 758 * structure declaration */ 759 scase = false; /* these will only need resetting in an error */ 760 squest = 0; 761 if (ps.last_token == rparen) 762 ps.in_parameter_declaration = 0; 763 ps.cast_mask = 0; 764 ps.not_cast_mask = 0; 765 ps.block_init = 0; 766 ps.block_init_level = 0; 767 ps.just_saw_decl--; 768 769 if (ps.in_decl && s_code == e_code && !ps.block_init && 770 !ps.dumped_decl_indent && ps.paren_level == 0) { 771 /* indent stray semicolons in declarations */ 772 indent_declaration(dec_ind - 1, tabs_to_var); 773 ps.dumped_decl_indent = true; 774 } 775 776 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level 777 * structure declaration, we 778 * arent any more */ 779 780 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) { 781 782 /* 783 * This should be true iff there were unbalanced parens in the 784 * stmt. It is a bit complicated, because the semicolon might 785 * be in a for stmt 786 */ 787 diag2(1, "Unbalanced parens"); 788 ps.p_l_follow = 0; 789 if (sp_sw) { /* this is a check for an if, while, etc. with 790 * unbalanced parens */ 791 sp_sw = false; 792 parse(hd_type); /* dont lose the if, or whatever */ 793 } 794 } 795 *e_code++ = ';'; 796 ps.want_blank = true; 797 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the 798 * middle of a stmt */ 799 800 if (!sp_sw) { /* if not if for (;;) */ 801 parse(semicolon); /* let parser know about end of stmt */ 802 force_nl = true;/* force newline after an end of stmt */ 803 } 804 break; 805 806 case lbrace: /* got a '{' */ 807 ps.in_stmt = false; /* dont indent the {} */ 808 if (!ps.block_init) 809 force_nl = true;/* force other stuff on same line as '{' onto 810 * new line */ 811 else if (ps.block_init_level <= 0) 812 ps.block_init_level = 1; 813 else 814 ps.block_init_level++; 815 816 if (s_code != e_code && !ps.block_init) { 817 if (!opt.btype_2) { 818 dump_line(); 819 ps.want_blank = false; 820 } 821 else if (ps.in_parameter_declaration && !ps.in_or_st) { 822 ps.i_l_follow = 0; 823 if (opt.function_brace_split) { /* dump the line prior 824 * to the brace ... */ 825 dump_line(); 826 ps.want_blank = false; 827 } else /* add a space between the decl and brace */ 828 ps.want_blank = true; 829 } 830 } 831 if (ps.in_parameter_declaration) 832 prefix_blankline_requested = 0; 833 834 if (ps.p_l_follow > 0) { /* check for preceding unbalanced 835 * parens */ 836 diag2(1, "Unbalanced parens"); 837 ps.p_l_follow = 0; 838 if (sp_sw) { /* check for unclosed if, for, etc. */ 839 sp_sw = false; 840 parse(hd_type); 841 ps.ind_level = ps.i_l_follow; 842 } 843 } 844 if (s_code == e_code) 845 ps.ind_stmt = false; /* dont put extra indentation on line 846 * with '{' */ 847 if (ps.in_decl && ps.in_or_st) { /* this is either a structure 848 * declaration or an init */ 849 di_stack[ps.dec_nest] = dec_ind; 850 if (++ps.dec_nest == nitems(di_stack)) { 851 diag3(0, "Reached internal limit of %d struct levels", 852 nitems(di_stack)); 853 ps.dec_nest--; 854 } 855 /* ? dec_ind = 0; */ 856 } 857 else { 858 ps.decl_on_line = false; /* we can't be in the middle of 859 * a declaration, so don't do 860 * special indentation of 861 * comments */ 862 if (opt.blanklines_after_declarations_at_proctop 863 && ps.in_parameter_declaration) 864 postfix_blankline_requested = 1; 865 ps.in_parameter_declaration = 0; 866 ps.in_decl = false; 867 } 868 dec_ind = 0; 869 parse(lbrace); /* let parser know about this */ 870 if (ps.want_blank) /* put a blank before '{' if '{' is not at 871 * start of line */ 872 *e_code++ = ' '; 873 ps.want_blank = false; 874 *e_code++ = '{'; 875 ps.just_saw_decl = 0; 876 break; 877 878 case rbrace: /* got a '}' */ 879 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be 880 * omitted in 881 * declarations */ 882 parse(semicolon); 883 if (ps.p_l_follow) {/* check for unclosed if, for, else. */ 884 diag2(1, "Unbalanced parens"); 885 ps.p_l_follow = 0; 886 sp_sw = false; 887 } 888 ps.just_saw_decl = 0; 889 ps.block_init_level--; 890 if (s_code != e_code && !ps.block_init) { /* '}' must be first on 891 * line */ 892 if (opt.verbose) 893 diag2(0, "Line broken"); 894 dump_line(); 895 } 896 *e_code++ = '}'; 897 ps.want_blank = true; 898 ps.in_stmt = ps.ind_stmt = false; 899 if (ps.dec_nest > 0) { /* we are in multi-level structure 900 * declaration */ 901 dec_ind = di_stack[--ps.dec_nest]; 902 if (ps.dec_nest == 0 && !ps.in_parameter_declaration) 903 ps.just_saw_decl = 2; 904 ps.in_decl = true; 905 } 906 prefix_blankline_requested = 0; 907 parse(rbrace); /* let parser know about this */ 908 ps.search_brace = opt.cuddle_else && ps.p_stack[ps.tos] == ifhead 909 && ps.il[ps.tos] >= ps.ind_level; 910 if (ps.tos <= 1 && opt.blanklines_after_procs && ps.dec_nest <= 0) 911 postfix_blankline_requested = 1; 912 break; 913 914 case swstmt: /* got keyword "switch" */ 915 sp_sw = true; 916 hd_type = swstmt; /* keep this for when we have seen the 917 * expression */ 918 goto copy_id; /* go move the token into buffer */ 919 920 case sp_paren: /* token is if, while, for */ 921 sp_sw = true; /* the interesting stuff is done after the 922 * expression is scanned */ 923 hd_type = (*token == 'i' ? ifstmt : 924 (*token == 'w' ? whilestmt : forstmt)); 925 926 /* 927 * remember the type of header for later use by parser 928 */ 929 goto copy_id; /* copy the token into line */ 930 931 case sp_nparen: /* got else, do */ 932 ps.in_stmt = false; 933 if (*token == 'e') { 934 if (e_code != s_code && (!opt.cuddle_else || e_code[-1] != '}')) { 935 if (opt.verbose) 936 diag2(0, "Line broken"); 937 dump_line();/* make sure this starts a line */ 938 ps.want_blank = false; 939 } 940 force_nl = true;/* also, following stuff must go onto new line */ 941 last_else = 1; 942 parse(elselit); 943 } 944 else { 945 if (e_code != s_code) { /* make sure this starts a line */ 946 if (opt.verbose) 947 diag2(0, "Line broken"); 948 dump_line(); 949 ps.want_blank = false; 950 } 951 force_nl = true;/* also, following stuff must go onto new line */ 952 last_else = 0; 953 parse(dolit); 954 } 955 goto copy_id; /* move the token into line */ 956 957 case type_def: 958 case storage: 959 prefix_blankline_requested = 0; 960 goto copy_id; 961 962 case structure: 963 if (ps.p_l_follow > 0) 964 goto copy_id; 965 /* FALLTHROUGH */ 966 case decl: /* we have a declaration type (int, etc.) */ 967 parse(decl); /* let parser worry about indentation */ 968 if (ps.last_token == rparen && ps.tos <= 1) { 969 if (s_code != e_code) { 970 dump_line(); 971 ps.want_blank = 0; 972 } 973 } 974 if (ps.in_parameter_declaration && opt.indent_parameters && ps.dec_nest == 0) { 975 ps.ind_level = ps.i_l_follow = 1; 976 ps.ind_stmt = 0; 977 } 978 ps.in_or_st = true; /* this might be a structure or initialization 979 * declaration */ 980 ps.in_decl = ps.decl_on_line = ps.last_token != type_def; 981 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) 982 ps.just_saw_decl = 2; 983 prefix_blankline_requested = 0; 984 for (i = 0; token[i++];); /* get length of token */ 985 986 if (ps.ind_level == 0 || ps.dec_nest > 0) { 987 /* global variable or struct member in local variable */ 988 dec_ind = opt.decl_indent > 0 ? opt.decl_indent : i; 989 tabs_to_var = (opt.use_tabs ? opt.decl_indent > 0 : 0); 990 } else { 991 /* local variable */ 992 dec_ind = opt.local_decl_indent > 0 ? opt.local_decl_indent : i; 993 tabs_to_var = (opt.use_tabs ? opt.local_decl_indent > 0 : 0); 994 } 995 goto copy_id; 996 997 case funcname: 998 case ident: /* got an identifier or constant */ 999 if (ps.in_decl) { 1000 if (type_code == funcname) { 1001 ps.in_decl = false; 1002 if (opt.procnames_start_line && s_code != e_code) { 1003 *e_code = '\0'; 1004 dump_line(); 1005 } 1006 else if (ps.want_blank) { 1007 *e_code++ = ' '; 1008 } 1009 ps.want_blank = false; 1010 } 1011 else if (!ps.block_init && !ps.dumped_decl_indent && 1012 ps.paren_level == 0) { /* if we are in a declaration, we 1013 * must indent identifier */ 1014 indent_declaration(dec_ind, tabs_to_var); 1015 ps.dumped_decl_indent = true; 1016 ps.want_blank = false; 1017 } 1018 } 1019 else if (sp_sw && ps.p_l_follow == 0) { 1020 sp_sw = false; 1021 force_nl = true; 1022 ps.last_u_d = true; 1023 ps.in_stmt = false; 1024 parse(hd_type); 1025 } 1026 copy_id: 1027 { 1028 int len = e_token - s_token; 1029 1030 CHECK_SIZE_CODE(len + 1); 1031 if (ps.want_blank) 1032 *e_code++ = ' '; 1033 memcpy(e_code, s_token, len); 1034 e_code += len; 1035 } 1036 if (type_code != funcname) 1037 ps.want_blank = true; 1038 break; 1039 1040 case strpfx: 1041 { 1042 int len = e_token - s_token; 1043 1044 CHECK_SIZE_CODE(len + 1); 1045 if (ps.want_blank) 1046 *e_code++ = ' '; 1047 memcpy(e_code, token, len); 1048 e_code += len; 1049 } 1050 ps.want_blank = false; 1051 break; 1052 1053 case period: /* treat a period kind of like a binary 1054 * operation */ 1055 *e_code++ = '.'; /* move the period into line */ 1056 ps.want_blank = false; /* dont put a blank after a period */ 1057 break; 1058 1059 case comma: 1060 ps.want_blank = (s_code != e_code); /* only put blank after comma 1061 * if comma does not start the 1062 * line */ 1063 if (ps.in_decl && ps.procname[0] == '\0' && !ps.block_init && 1064 !ps.dumped_decl_indent && ps.paren_level == 0) { 1065 /* indent leading commas and not the actual identifiers */ 1066 indent_declaration(dec_ind - 1, tabs_to_var); 1067 ps.dumped_decl_indent = true; 1068 } 1069 *e_code++ = ','; 1070 if (ps.p_l_follow == 0) { 1071 if (ps.block_init_level <= 0) 1072 ps.block_init = 0; 1073 if (break_comma && (!opt.leave_comma || 1074 count_spaces_until(compute_code_target(), s_code, e_code) > 1075 opt.max_col - opt.tabsize)) 1076 force_nl = true; 1077 } 1078 break; 1079 1080 case preesc: /* got the character '#' */ 1081 if ((s_com != e_com) || 1082 (s_lab != e_lab) || 1083 (s_code != e_code)) 1084 dump_line(); 1085 CHECK_SIZE_LAB(1); 1086 *e_lab++ = '#'; /* move whole line to 'label' buffer */ 1087 { 1088 int in_comment = 0; 1089 int com_start = 0; 1090 char quote = 0; 1091 int com_end = 0; 1092 1093 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 1094 buf_ptr++; 1095 if (buf_ptr >= buf_end) 1096 fill_buffer(); 1097 } 1098 while (*buf_ptr != '\n' || (in_comment && !had_eof)) { 1099 CHECK_SIZE_LAB(2); 1100 *e_lab = *buf_ptr++; 1101 if (buf_ptr >= buf_end) 1102 fill_buffer(); 1103 switch (*e_lab++) { 1104 case BACKSLASH: 1105 if (!in_comment) { 1106 *e_lab++ = *buf_ptr++; 1107 if (buf_ptr >= buf_end) 1108 fill_buffer(); 1109 } 1110 break; 1111 case '/': 1112 if (*buf_ptr == '*' && !in_comment && !quote) { 1113 in_comment = 1; 1114 *e_lab++ = *buf_ptr++; 1115 com_start = e_lab - s_lab - 2; 1116 } 1117 break; 1118 case '"': 1119 if (quote == '"') 1120 quote = 0; 1121 break; 1122 case '\'': 1123 if (quote == '\'') 1124 quote = 0; 1125 break; 1126 case '*': 1127 if (*buf_ptr == '/' && in_comment) { 1128 in_comment = 0; 1129 *e_lab++ = *buf_ptr++; 1130 com_end = e_lab - s_lab; 1131 } 1132 break; 1133 } 1134 } 1135 1136 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1137 e_lab--; 1138 if (e_lab - s_lab == com_end && bp_save == NULL) { 1139 /* comment on preprocessor line */ 1140 if (sc_end == NULL) { /* if this is the first comment, 1141 * we must set up the buffer */ 1142 save_com = sc_buf; 1143 sc_end = &save_com[0]; 1144 } 1145 else { 1146 *sc_end++ = '\n'; /* add newline between 1147 * comments */ 1148 *sc_end++ = ' '; 1149 --line_no; 1150 } 1151 if (sc_end - save_com + com_end - com_start > sc_size) 1152 errx(1, "input too long"); 1153 memmove(sc_end, s_lab + com_start, com_end - com_start); 1154 sc_end += com_end - com_start; 1155 e_lab = s_lab + com_start; 1156 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1157 e_lab--; 1158 bp_save = buf_ptr; /* save current input buffer */ 1159 be_save = buf_end; 1160 buf_ptr = save_com; /* fix so that subsequent calls to 1161 * lexi will take tokens out of 1162 * save_com */ 1163 *sc_end++ = ' '; /* add trailing blank, just in case */ 1164 buf_end = sc_end; 1165 sc_end = NULL; 1166 } 1167 CHECK_SIZE_LAB(1); 1168 *e_lab = '\0'; /* null terminate line */ 1169 ps.pcase = false; 1170 } 1171 1172 if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */ 1173 if ((size_t)ifdef_level < nitems(state_stack)) { 1174 match_state[ifdef_level].tos = -1; 1175 state_stack[ifdef_level++] = ps; 1176 } 1177 else 1178 diag2(1, "#if stack overflow"); 1179 } 1180 else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */ 1181 if (ifdef_level <= 0) 1182 diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else"); 1183 else { 1184 match_state[ifdef_level - 1] = ps; 1185 ps = state_stack[ifdef_level - 1]; 1186 } 1187 } 1188 else if (strncmp(s_lab, "#endif", 6) == 0) { 1189 if (ifdef_level <= 0) 1190 diag2(1, "Unmatched #endif"); 1191 else 1192 ifdef_level--; 1193 } else { 1194 struct directives { 1195 int size; 1196 const char *string; 1197 } 1198 recognized[] = { 1199 {7, "include"}, 1200 {6, "define"}, 1201 {5, "undef"}, 1202 {4, "line"}, 1203 {5, "error"}, 1204 {6, "pragma"} 1205 }; 1206 int d = nitems(recognized); 1207 while (--d >= 0) 1208 if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0) 1209 break; 1210 if (d < 0) { 1211 diag2(1, "Unrecognized cpp directive"); 1212 break; 1213 } 1214 } 1215 if (opt.blanklines_around_conditional_compilation) { 1216 postfix_blankline_requested++; 1217 n_real_blanklines = 0; 1218 } 1219 else { 1220 postfix_blankline_requested = 0; 1221 prefix_blankline_requested = 0; 1222 } 1223 break; /* subsequent processing of the newline 1224 * character will cause the line to be printed */ 1225 1226 case comment: /* we have gotten a / followed by * this is a biggie */ 1227 pr_comment(); 1228 break; 1229 } /* end of big switch stmt */ 1230 1231 *e_code = '\0'; /* make sure code section is null terminated */ 1232 if (type_code != comment && type_code != newline && type_code != preesc) 1233 ps.last_token = type_code; 1234 } /* end of main while (1) loop */ 1235 } 1236 1237 /* 1238 * copy input file to backup file if in_name is /blah/blah/blah/file, then 1239 * backup file will be ".Bfile" then make the backup file the input and 1240 * original input file the output 1241 */ 1242 static void 1243 bakcopy(void) 1244 { 1245 int n, 1246 bakchn; 1247 char buff[8 * 1024]; 1248 const char *p; 1249 1250 /* construct file name .Bfile */ 1251 for (p = in_name; *p; p++); /* skip to end of string */ 1252 while (p > in_name && *p != '/') /* find last '/' */ 1253 p--; 1254 if (*p == '/') 1255 p++; 1256 sprintf(bakfile, "%s%s", p, simple_backup_suffix); 1257 1258 /* copy in_name to backup file */ 1259 bakchn = creat(bakfile, 0600); 1260 if (bakchn < 0) 1261 err(1, "%s", bakfile); 1262 while ((n = read(fileno(input), buff, sizeof(buff))) > 0) 1263 if (write(bakchn, buff, n) != n) 1264 err(1, "%s", bakfile); 1265 if (n < 0) 1266 err(1, "%s", in_name); 1267 close(bakchn); 1268 fclose(input); 1269 1270 /* re-open backup file as the input file */ 1271 input = fopen(bakfile, "r"); 1272 if (input == NULL) 1273 err(1, "%s", bakfile); 1274 /* now the original input file will be the output */ 1275 output = fopen(in_name, "w"); 1276 if (output == NULL) { 1277 unlink(bakfile); 1278 err(1, "%s", in_name); 1279 } 1280 } 1281 1282 static void 1283 indent_declaration(int cur_dec_ind, int tabs_to_var) 1284 { 1285 int pos = e_code - s_code; 1286 char *startpos = e_code; 1287 1288 /* 1289 * get the tab math right for indentations that are not multiples of tabsize 1290 */ 1291 if ((ps.ind_level * opt.ind_size) % opt.tabsize != 0) { 1292 pos += (ps.ind_level * opt.ind_size) % opt.tabsize; 1293 cur_dec_ind += (ps.ind_level * opt.ind_size) % opt.tabsize; 1294 } 1295 if (tabs_to_var) { 1296 int tpos; 1297 1298 CHECK_SIZE_CODE(cur_dec_ind / opt.tabsize); 1299 while ((tpos = opt.tabsize * (1 + pos / opt.tabsize)) <= cur_dec_ind) { 1300 *e_code++ = '\t'; 1301 pos = tpos; 1302 } 1303 } 1304 CHECK_SIZE_CODE(cur_dec_ind - pos + 1); 1305 while (pos < cur_dec_ind) { 1306 *e_code++ = ' '; 1307 pos++; 1308 } 1309 if (e_code == startpos && ps.want_blank) { 1310 *e_code++ = ' '; 1311 ps.want_blank = false; 1312 } 1313 } 1314