1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1985 Sun Microsystems, Inc. 5 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 6 * Copyright (c) 1980, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #if 0 39 #ifndef lint 40 static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93"; 41 #endif /* not lint */ 42 #endif 43 44 #include <sys/cdefs.h> 45 #include <sys/param.h> 46 #include <sys/capsicum.h> 47 #include <capsicum_helpers.h> 48 #include <err.h> 49 #include <errno.h> 50 #include <fcntl.h> 51 #include <unistd.h> 52 #include <stdio.h> 53 #include <stdlib.h> 54 #include <string.h> 55 #include <ctype.h> 56 #include "indent_globs.h" 57 #include "indent_codes.h" 58 #include "indent.h" 59 60 /* Globals */ 61 FILE *input, *output; 62 char *labbuf, *s_lab, *e_lab, *l_lab; 63 char *codebuf, *s_code, *e_code, *l_code; 64 char *combuf, *s_com, *e_com, *l_com; 65 char *tokenbuf, *s_token, *e_token, *l_token; 66 char *in_buffer, *in_buffer_limit; 67 char *buf_ptr, *buf_end; 68 69 char sc_buf[sc_size]; 70 71 char *save_com, *sc_end; 72 char *bp_save; 73 char *be_save; 74 75 struct options opt; 76 int line_no; 77 78 struct parser_state ps; 79 int ifdef_level; 80 struct parser_state state_stack[5]; 81 struct parser_state match_state[5]; 82 83 84 static void bakcopy(void); 85 static void indent_declaration(int, int); 86 87 const char *in_name = "Standard Input"; /* will always point to name of input 88 * file */ 89 const char *out_name = "Standard Output"; /* will always point to name 90 * of output file */ 91 const char *simple_backup_suffix = ".BAK"; /* Suffix to use for backup 92 * files */ 93 char bakfile[MAXPATHLEN] = ""; 94 95 int 96 main(int argc, char **argv) 97 { 98 cap_rights_t rights; 99 100 int dec_ind; /* current indentation for declarations */ 101 int di_stack[20]; /* a stack of structure indentation levels */ 102 int force_nl; /* when true, code must be broken */ 103 int hd_type = 0; /* used to store type of stmt for if (...), 104 * for (...), etc */ 105 int i; /* local loop counter */ 106 int scase; /* set to true when we see a case, so we will 107 * know what to do with the following colon */ 108 int sp_sw; /* when true, we are in the expression of 109 * if(...), while(...), etc. */ 110 int squest; /* when this is positive, we have seen a ? 111 * without the matching : in a <c>?<s>:<s> 112 * construct */ 113 const char *t_ptr; /* used for copying tokens */ 114 int tabs_to_var; /* true if using tabs to indent to var name */ 115 int type_code; /* the type of token, returned by lexi */ 116 117 int last_else = 0; /* true iff last keyword was an else */ 118 const char *profile_name = NULL; 119 const char *envval = NULL; 120 struct parser_state transient_state; /* a copy for lookup */ 121 122 /*-----------------------------------------------*\ 123 | INITIALIZATION | 124 \*-----------------------------------------------*/ 125 126 found_err = 0; 127 128 ps.p_stack[0] = stmt; /* this is the parser's stack */ 129 ps.last_nl = true; /* this is true if the last thing scanned was 130 * a newline */ 131 ps.last_token = semicolon; 132 combuf = (char *) malloc(bufsize); 133 if (combuf == NULL) 134 err(1, NULL); 135 labbuf = (char *) malloc(bufsize); 136 if (labbuf == NULL) 137 err(1, NULL); 138 codebuf = (char *) malloc(bufsize); 139 if (codebuf == NULL) 140 err(1, NULL); 141 tokenbuf = (char *) malloc(bufsize); 142 if (tokenbuf == NULL) 143 err(1, NULL); 144 alloc_typenames(); 145 init_constant_tt(); 146 l_com = combuf + bufsize - 5; 147 l_lab = labbuf + bufsize - 5; 148 l_code = codebuf + bufsize - 5; 149 l_token = tokenbuf + bufsize - 5; 150 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and 151 * comment buffers */ 152 combuf[1] = codebuf[1] = labbuf[1] = '\0'; 153 opt.else_if = 1; /* Default else-if special processing to on */ 154 s_lab = e_lab = labbuf + 1; 155 s_code = e_code = codebuf + 1; 156 s_com = e_com = combuf + 1; 157 s_token = e_token = tokenbuf + 1; 158 159 in_buffer = (char *) malloc(10); 160 if (in_buffer == NULL) 161 err(1, NULL); 162 in_buffer_limit = in_buffer + 8; 163 buf_ptr = buf_end = in_buffer; 164 line_no = 1; 165 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false; 166 sp_sw = force_nl = false; 167 ps.in_or_st = false; 168 ps.bl_line = true; 169 dec_ind = 0; 170 di_stack[ps.dec_nest = 0] = 0; 171 ps.want_blank = ps.in_stmt = ps.ind_stmt = false; 172 173 scase = ps.pcase = false; 174 squest = 0; 175 sc_end = NULL; 176 bp_save = NULL; 177 be_save = NULL; 178 179 output = NULL; 180 tabs_to_var = 0; 181 182 envval = getenv("SIMPLE_BACKUP_SUFFIX"); 183 if (envval) 184 simple_backup_suffix = envval; 185 186 /*--------------------------------------------------*\ 187 | COMMAND LINE SCAN | 188 \*--------------------------------------------------*/ 189 190 #ifdef undef 191 max_col = 78; /* -l78 */ 192 lineup_to_parens = 1; /* -lp */ 193 lineup_to_parens_always = 0; /* -nlpl */ 194 ps.ljust_decl = 0; /* -ndj */ 195 ps.com_ind = 33; /* -c33 */ 196 star_comment_cont = 1; /* -sc */ 197 ps.ind_size = 8; /* -i8 */ 198 verbose = 0; 199 ps.decl_indent = 16; /* -di16 */ 200 ps.local_decl_indent = -1; /* if this is not set to some nonnegative value 201 * by an arg, we will set this equal to 202 * ps.decl_ind */ 203 ps.indent_parameters = 1; /* -ip */ 204 ps.decl_com_ind = 0; /* if this is not set to some positive value 205 * by an arg, we will set this equal to 206 * ps.com_ind */ 207 btype_2 = 1; /* -br */ 208 cuddle_else = 1; /* -ce */ 209 ps.unindent_displace = 0; /* -d0 */ 210 ps.case_indent = 0; /* -cli0 */ 211 format_block_comments = 1; /* -fcb */ 212 format_col1_comments = 1; /* -fc1 */ 213 procnames_start_line = 1; /* -psl */ 214 proc_calls_space = 0; /* -npcs */ 215 comment_delimiter_on_blankline = 1; /* -cdb */ 216 ps.leave_comma = 1; /* -nbc */ 217 #endif 218 219 for (i = 1; i < argc; ++i) 220 if (strcmp(argv[i], "-npro") == 0) 221 break; 222 else if (argv[i][0] == '-' && argv[i][1] == 'P' && argv[i][2] != '\0') 223 profile_name = argv[i]; /* non-empty -P (set profile) */ 224 set_defaults(); 225 if (i >= argc) 226 set_profile(profile_name); 227 228 for (i = 1; i < argc; ++i) { 229 230 /* 231 * look thru args (if any) for changes to defaults 232 */ 233 if (argv[i][0] != '-') {/* no flag on parameter */ 234 if (input == NULL) { /* we must have the input file */ 235 in_name = argv[i]; /* remember name of input file */ 236 input = fopen(in_name, "r"); 237 if (input == NULL) /* check for open error */ 238 err(1, "%s", in_name); 239 continue; 240 } 241 else if (output == NULL) { /* we have the output file */ 242 out_name = argv[i]; /* remember name of output file */ 243 if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite 244 * the file */ 245 errx(1, "input and output files must be different"); 246 } 247 output = fopen(out_name, "w"); 248 if (output == NULL) /* check for create error */ 249 err(1, "%s", out_name); 250 continue; 251 } 252 errx(1, "unknown parameter: %s", argv[i]); 253 } 254 else 255 set_option(argv[i]); 256 } /* end of for */ 257 if (input == NULL) 258 input = stdin; 259 if (output == NULL) { 260 if (input == stdin) 261 output = stdout; 262 else { 263 out_name = in_name; 264 bakcopy(); 265 } 266 } 267 268 /* Restrict input/output descriptors and enter Capsicum sandbox. */ 269 cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE); 270 if (caph_rights_limit(fileno(output), &rights) < 0) 271 err(EXIT_FAILURE, "unable to limit rights for %s", out_name); 272 cap_rights_init(&rights, CAP_FSTAT, CAP_READ); 273 if (caph_rights_limit(fileno(input), &rights) < 0) 274 err(EXIT_FAILURE, "unable to limit rights for %s", in_name); 275 if (caph_enter() < 0) 276 err(EXIT_FAILURE, "unable to enter capability mode"); 277 278 if (opt.com_ind <= 1) 279 opt.com_ind = 2; /* don't put normal comments before column 2 */ 280 if (opt.block_comment_max_col <= 0) 281 opt.block_comment_max_col = opt.max_col; 282 if (opt.local_decl_indent < 0) /* if not specified by user, set this */ 283 opt.local_decl_indent = opt.decl_indent; 284 if (opt.decl_com_ind <= 0) /* if not specified by user, set this */ 285 opt.decl_com_ind = opt.ljust_decl ? (opt.com_ind <= 10 ? 2 : opt.com_ind - 8) : opt.com_ind; 286 if (opt.continuation_indent == 0) 287 opt.continuation_indent = opt.ind_size; 288 fill_buffer(); /* get first batch of stuff into input buffer */ 289 290 parse(semicolon); 291 { 292 char *p = buf_ptr; 293 int col = 1; 294 295 while (1) { 296 if (*p == ' ') 297 col++; 298 else if (*p == '\t') 299 col = opt.tabsize * (1 + (col - 1) / opt.tabsize) + 1; 300 else 301 break; 302 p++; 303 } 304 if (col > opt.ind_size) 305 ps.ind_level = ps.i_l_follow = col / opt.ind_size; 306 } 307 308 /* 309 * START OF MAIN LOOP 310 */ 311 312 while (1) { /* this is the main loop. it will go until we 313 * reach eof */ 314 int comment_buffered = false; 315 316 type_code = lexi(&ps); /* lexi reads one token. The actual 317 * characters read are stored in "token". lexi 318 * returns a code indicating the type of token */ 319 320 /* 321 * The following code moves newlines and comments following an if (), 322 * while (), else, etc. up to the start of the following stmt to 323 * a buffer. This allows proper handling of both kinds of brace 324 * placement (-br, -bl) and cuddling "else" (-ce). 325 */ 326 327 while (ps.search_brace) { 328 switch (type_code) { 329 case newline: 330 if (sc_end == NULL) { 331 save_com = sc_buf; 332 save_com[0] = save_com[1] = ' '; 333 sc_end = &save_com[2]; 334 } 335 *sc_end++ = '\n'; 336 /* 337 * We may have inherited a force_nl == true from the previous 338 * token (like a semicolon). But once we know that a newline 339 * has been scanned in this loop, force_nl should be false. 340 * 341 * However, the force_nl == true must be preserved if newline 342 * is never scanned in this loop, so this assignment cannot be 343 * done earlier. 344 */ 345 force_nl = false; 346 case form_feed: 347 break; 348 case comment: 349 if (sc_end == NULL) { 350 /* 351 * Copy everything from the start of the line, because 352 * pr_comment() will use that to calculate original 353 * indentation of a boxed comment. 354 */ 355 memcpy(sc_buf, in_buffer, buf_ptr - in_buffer - 4); 356 save_com = sc_buf + (buf_ptr - in_buffer - 4); 357 save_com[0] = save_com[1] = ' '; 358 sc_end = &save_com[2]; 359 } 360 comment_buffered = true; 361 *sc_end++ = '/'; /* copy in start of comment */ 362 *sc_end++ = '*'; 363 for (;;) { /* loop until we get to the end of the comment */ 364 *sc_end = *buf_ptr++; 365 if (buf_ptr >= buf_end) 366 fill_buffer(); 367 if (*sc_end++ == '*' && *buf_ptr == '/') 368 break; /* we are at end of comment */ 369 if (sc_end >= &save_com[sc_size]) { /* check for temp buffer 370 * overflow */ 371 diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever"); 372 fflush(output); 373 exit(1); 374 } 375 } 376 *sc_end++ = '/'; /* add ending slash */ 377 if (++buf_ptr >= buf_end) /* get past / in buffer */ 378 fill_buffer(); 379 break; 380 case lbrace: 381 /* 382 * Put KNF-style lbraces before the buffered up tokens and 383 * jump out of this loop in order to avoid copying the token 384 * again under the default case of the switch below. 385 */ 386 if (sc_end != NULL && opt.btype_2) { 387 save_com[0] = '{'; 388 /* 389 * Originally the lbrace may have been alone on its own 390 * line, but it will be moved into "the else's line", so 391 * if there was a newline resulting from the "{" before, 392 * it must be scanned now and ignored. 393 */ 394 while (isspace((unsigned char)*buf_ptr)) { 395 if (++buf_ptr >= buf_end) 396 fill_buffer(); 397 if (*buf_ptr == '\n') 398 break; 399 } 400 goto sw_buffer; 401 } 402 /* FALLTHROUGH */ 403 default: /* it is the start of a normal statement */ 404 { 405 int remove_newlines; 406 407 remove_newlines = 408 /* "} else" */ 409 (type_code == sp_nparen && *token == 'e' && 410 e_code != s_code && e_code[-1] == '}') 411 /* "else if" */ 412 || (type_code == sp_paren && *token == 'i' && 413 last_else && opt.else_if); 414 if (remove_newlines) 415 force_nl = false; 416 if (sc_end == NULL) { /* ignore buffering if 417 * comment wasn't saved up */ 418 ps.search_brace = false; 419 goto check_type; 420 } 421 while (sc_end > save_com && isblank((unsigned char)sc_end[-1])) { 422 sc_end--; 423 } 424 if (opt.swallow_optional_blanklines || 425 (!comment_buffered && remove_newlines)) { 426 force_nl = !remove_newlines; 427 while (sc_end > save_com && sc_end[-1] == '\n') { 428 sc_end--; 429 } 430 } 431 if (force_nl) { /* if we should insert a nl here, put 432 * it into the buffer */ 433 force_nl = false; 434 --line_no; /* this will be re-increased when the 435 * newline is read from the buffer */ 436 *sc_end++ = '\n'; 437 *sc_end++ = ' '; 438 if (opt.verbose) /* print error msg if the line was 439 * not already broken */ 440 diag2(0, "Line broken"); 441 } 442 for (t_ptr = token; *t_ptr; ++t_ptr) 443 *sc_end++ = *t_ptr; 444 445 sw_buffer: 446 ps.search_brace = false; /* stop looking for start of 447 * stmt */ 448 bp_save = buf_ptr; /* save current input buffer */ 449 be_save = buf_end; 450 buf_ptr = save_com; /* fix so that subsequent calls to 451 * lexi will take tokens out of 452 * save_com */ 453 *sc_end++ = ' ';/* add trailing blank, just in case */ 454 buf_end = sc_end; 455 sc_end = NULL; 456 break; 457 } 458 } /* end of switch */ 459 /* 460 * We must make this check, just in case there was an unexpected 461 * EOF. 462 */ 463 if (type_code != 0) { 464 /* 465 * The only intended purpose of calling lexi() below is to 466 * categorize the next token in order to decide whether to 467 * continue buffering forthcoming tokens. Once the buffering 468 * is over, lexi() will be called again elsewhere on all of 469 * the tokens - this time for normal processing. 470 * 471 * Calling it for this purpose is a bug, because lexi() also 472 * changes the parser state and discards leading whitespace, 473 * which is needed mostly for comment-related considerations. 474 * 475 * Work around the former problem by giving lexi() a copy of 476 * the current parser state and discard it if the call turned 477 * out to be just a look ahead. 478 * 479 * Work around the latter problem by copying all whitespace 480 * characters into the buffer so that the later lexi() call 481 * will read them. 482 */ 483 if (sc_end != NULL) { 484 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 485 *sc_end++ = *buf_ptr++; 486 if (sc_end >= &save_com[sc_size]) { 487 errx(1, "input too long"); 488 } 489 } 490 if (buf_ptr >= buf_end) { 491 fill_buffer(); 492 } 493 } 494 transient_state = ps; 495 type_code = lexi(&transient_state); /* read another token */ 496 if (type_code != newline && type_code != form_feed && 497 type_code != comment && !transient_state.search_brace) { 498 ps = transient_state; 499 } 500 } 501 } /* end of while (search_brace) */ 502 last_else = 0; 503 check_type: 504 if (type_code == 0) { /* we got eof */ 505 if (s_lab != e_lab || s_code != e_code 506 || s_com != e_com) /* must dump end of line */ 507 dump_line(); 508 if (ps.tos > 1) /* check for balanced braces */ 509 diag2(1, "Stuff missing from end of file"); 510 511 if (opt.verbose) { 512 printf("There were %d output lines and %d comments\n", 513 ps.out_lines, ps.out_coms); 514 printf("(Lines with comments)/(Lines with code): %6.3f\n", 515 (1.0 * ps.com_lines) / code_lines); 516 } 517 fflush(output); 518 exit(found_err); 519 } 520 if ( 521 (type_code != comment) && 522 (type_code != newline) && 523 (type_code != preesc) && 524 (type_code != form_feed)) { 525 if (force_nl && 526 (type_code != semicolon) && 527 (type_code != lbrace || !opt.btype_2)) { 528 /* we should force a broken line here */ 529 if (opt.verbose) 530 diag2(0, "Line broken"); 531 dump_line(); 532 ps.want_blank = false; /* dont insert blank at line start */ 533 force_nl = false; 534 } 535 ps.in_stmt = true; /* turn on flag which causes an extra level of 536 * indentation. this is turned off by a ; or 537 * '}' */ 538 if (s_com != e_com) { /* the turkey has embedded a comment 539 * in a line. fix it */ 540 int len = e_com - s_com; 541 542 CHECK_SIZE_CODE(len + 3); 543 *e_code++ = ' '; 544 memcpy(e_code, s_com, len); 545 e_code += len; 546 *e_code++ = ' '; 547 *e_code = '\0'; /* null terminate code sect */ 548 ps.want_blank = false; 549 e_com = s_com; 550 } 551 } 552 else if (type_code != comment) /* preserve force_nl thru a comment */ 553 force_nl = false; /* cancel forced newline after newline, form 554 * feed, etc */ 555 556 557 558 /*-----------------------------------------------------*\ 559 | do switch on type of token scanned | 560 \*-----------------------------------------------------*/ 561 CHECK_SIZE_CODE(3); /* maximum number of increments of e_code 562 * before the next CHECK_SIZE_CODE or 563 * dump_line() is 2. After that there's the 564 * final increment for the null character. */ 565 switch (type_code) { /* now, decide what to do with the token */ 566 567 case form_feed: /* found a form feed in line */ 568 ps.use_ff = true; /* a form feed is treated much like a newline */ 569 dump_line(); 570 ps.want_blank = false; 571 break; 572 573 case newline: 574 if (ps.last_token != comma || ps.p_l_follow > 0 575 || !opt.leave_comma || ps.block_init || !break_comma || s_com != e_com) { 576 dump_line(); 577 ps.want_blank = false; 578 } 579 ++line_no; /* keep track of input line number */ 580 break; 581 582 case lparen: /* got a '(' or '[' */ 583 /* count parens to make Healy happy */ 584 if (++ps.p_l_follow == nitems(ps.paren_indents)) { 585 diag3(0, "Reached internal limit of %d unclosed parens", 586 nitems(ps.paren_indents)); 587 ps.p_l_follow--; 588 } 589 if (*token == '[') 590 /* not a function pointer declaration or a function call */; 591 else if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent && 592 ps.procname[0] == '\0' && ps.paren_level == 0) { 593 /* function pointer declarations */ 594 indent_declaration(dec_ind, tabs_to_var); 595 ps.dumped_decl_indent = true; 596 } 597 else if (ps.want_blank && 598 ((ps.last_token != ident && ps.last_token != funcname) || 599 opt.proc_calls_space || 600 /* offsetof (1) is never allowed a space; sizeof (2) gets 601 * one iff -bs; all other keywords (>2) always get a space 602 * before lparen */ 603 ps.keyword + opt.Bill_Shannon > 2)) 604 *e_code++ = ' '; 605 ps.want_blank = false; 606 *e_code++ = token[0]; 607 ps.paren_indents[ps.p_l_follow - 1] = count_spaces_until(1, s_code, e_code) - 1; 608 if (sp_sw && ps.p_l_follow == 1 && opt.extra_expression_indent 609 && ps.paren_indents[0] < 2 * opt.ind_size) 610 ps.paren_indents[0] = 2 * opt.ind_size; 611 if (ps.in_or_st && *token == '(' && ps.tos <= 2) { 612 /* 613 * this is a kluge to make sure that declarations will be 614 * aligned right if proc decl has an explicit type on it, i.e. 615 * "int a(x) {..." 616 */ 617 parse(semicolon); /* I said this was a kluge... */ 618 ps.in_or_st = false; /* turn off flag for structure decl or 619 * initialization */ 620 } 621 /* parenthesized type following sizeof or offsetof is not a cast */ 622 if (ps.keyword == 1 || ps.keyword == 2) 623 ps.not_cast_mask |= 1 << ps.p_l_follow; 624 break; 625 626 case rparen: /* got a ')' or ']' */ 627 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) { 628 ps.last_u_d = true; 629 ps.cast_mask &= (1 << ps.p_l_follow) - 1; 630 ps.want_blank = opt.space_after_cast; 631 } else 632 ps.want_blank = true; 633 ps.not_cast_mask &= (1 << ps.p_l_follow) - 1; 634 if (--ps.p_l_follow < 0) { 635 ps.p_l_follow = 0; 636 diag3(0, "Extra %c", *token); 637 } 638 if (e_code == s_code) /* if the paren starts the line */ 639 ps.paren_level = ps.p_l_follow; /* then indent it */ 640 641 *e_code++ = token[0]; 642 643 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if 644 * (...), or some such */ 645 sp_sw = false; 646 force_nl = true;/* must force newline after if */ 647 ps.last_u_d = true; /* inform lexi that a following 648 * operator is unary */ 649 ps.in_stmt = false; /* dont use stmt continuation 650 * indentation */ 651 652 parse(hd_type); /* let parser worry about if, or whatever */ 653 } 654 ps.search_brace = opt.btype_2; /* this should ensure that 655 * constructs such as main(){...} 656 * and int[]{...} have their braces 657 * put in the right place */ 658 break; 659 660 case unary_op: /* this could be any unary operation */ 661 if (!ps.dumped_decl_indent && ps.in_decl && !ps.block_init && 662 ps.procname[0] == '\0' && ps.paren_level == 0) { 663 /* pointer declarations */ 664 665 /* 666 * if this is a unary op in a declaration, we should indent 667 * this token 668 */ 669 for (i = 0; token[i]; ++i) 670 /* find length of token */; 671 indent_declaration(dec_ind - i, tabs_to_var); 672 ps.dumped_decl_indent = true; 673 } 674 else if (ps.want_blank) 675 *e_code++ = ' '; 676 677 { 678 int len = e_token - s_token; 679 680 CHECK_SIZE_CODE(len); 681 memcpy(e_code, token, len); 682 e_code += len; 683 } 684 ps.want_blank = false; 685 break; 686 687 case binary_op: /* any binary operation */ 688 { 689 int len = e_token - s_token; 690 691 CHECK_SIZE_CODE(len + 1); 692 if (ps.want_blank) 693 *e_code++ = ' '; 694 memcpy(e_code, token, len); 695 e_code += len; 696 } 697 ps.want_blank = true; 698 break; 699 700 case postop: /* got a trailing ++ or -- */ 701 *e_code++ = token[0]; 702 *e_code++ = token[1]; 703 ps.want_blank = true; 704 break; 705 706 case question: /* got a ? */ 707 squest++; /* this will be used when a later colon 708 * appears so we can distinguish the 709 * <c>?<n>:<n> construct */ 710 if (ps.want_blank) 711 *e_code++ = ' '; 712 *e_code++ = '?'; 713 ps.want_blank = true; 714 break; 715 716 case casestmt: /* got word 'case' or 'default' */ 717 scase = true; /* so we can process the later colon properly */ 718 goto copy_id; 719 720 case colon: /* got a ':' */ 721 if (squest > 0) { /* it is part of the <c>?<n>: <n> construct */ 722 --squest; 723 if (ps.want_blank) 724 *e_code++ = ' '; 725 *e_code++ = ':'; 726 ps.want_blank = true; 727 break; 728 } 729 if (ps.in_or_st) { 730 *e_code++ = ':'; 731 ps.want_blank = false; 732 break; 733 } 734 ps.in_stmt = false; /* seeing a label does not imply we are in a 735 * stmt */ 736 /* 737 * turn everything so far into a label 738 */ 739 { 740 int len = e_code - s_code; 741 742 CHECK_SIZE_LAB(len + 3); 743 memcpy(e_lab, s_code, len); 744 e_lab += len; 745 *e_lab++ = ':'; 746 *e_lab = '\0'; 747 e_code = s_code; 748 } 749 force_nl = ps.pcase = scase; /* ps.pcase will be used by 750 * dump_line to decide how to 751 * indent the label. force_nl 752 * will force a case n: to be 753 * on a line by itself */ 754 scase = false; 755 ps.want_blank = false; 756 break; 757 758 case semicolon: /* got a ';' */ 759 if (ps.dec_nest == 0) 760 ps.in_or_st = false;/* we are not in an initialization or 761 * structure declaration */ 762 scase = false; /* these will only need resetting in an error */ 763 squest = 0; 764 if (ps.last_token == rparen) 765 ps.in_parameter_declaration = 0; 766 ps.cast_mask = 0; 767 ps.not_cast_mask = 0; 768 ps.block_init = 0; 769 ps.block_init_level = 0; 770 ps.just_saw_decl--; 771 772 if (ps.in_decl && s_code == e_code && !ps.block_init && 773 !ps.dumped_decl_indent && ps.paren_level == 0) { 774 /* indent stray semicolons in declarations */ 775 indent_declaration(dec_ind - 1, tabs_to_var); 776 ps.dumped_decl_indent = true; 777 } 778 779 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level 780 * structure declaration, we 781 * arent any more */ 782 783 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) { 784 785 /* 786 * This should be true iff there were unbalanced parens in the 787 * stmt. It is a bit complicated, because the semicolon might 788 * be in a for stmt 789 */ 790 diag2(1, "Unbalanced parens"); 791 ps.p_l_follow = 0; 792 if (sp_sw) { /* this is a check for an if, while, etc. with 793 * unbalanced parens */ 794 sp_sw = false; 795 parse(hd_type); /* dont lose the if, or whatever */ 796 } 797 } 798 *e_code++ = ';'; 799 ps.want_blank = true; 800 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the 801 * middle of a stmt */ 802 803 if (!sp_sw) { /* if not if for (;;) */ 804 parse(semicolon); /* let parser know about end of stmt */ 805 force_nl = true;/* force newline after an end of stmt */ 806 } 807 break; 808 809 case lbrace: /* got a '{' */ 810 ps.in_stmt = false; /* dont indent the {} */ 811 if (!ps.block_init) 812 force_nl = true;/* force other stuff on same line as '{' onto 813 * new line */ 814 else if (ps.block_init_level <= 0) 815 ps.block_init_level = 1; 816 else 817 ps.block_init_level++; 818 819 if (s_code != e_code && !ps.block_init) { 820 if (!opt.btype_2) { 821 dump_line(); 822 ps.want_blank = false; 823 } 824 else if (ps.in_parameter_declaration && !ps.in_or_st) { 825 ps.i_l_follow = 0; 826 if (opt.function_brace_split) { /* dump the line prior 827 * to the brace ... */ 828 dump_line(); 829 ps.want_blank = false; 830 } else /* add a space between the decl and brace */ 831 ps.want_blank = true; 832 } 833 } 834 if (ps.in_parameter_declaration) 835 prefix_blankline_requested = 0; 836 837 if (ps.p_l_follow > 0) { /* check for preceding unbalanced 838 * parens */ 839 diag2(1, "Unbalanced parens"); 840 ps.p_l_follow = 0; 841 if (sp_sw) { /* check for unclosed if, for, etc. */ 842 sp_sw = false; 843 parse(hd_type); 844 ps.ind_level = ps.i_l_follow; 845 } 846 } 847 if (s_code == e_code) 848 ps.ind_stmt = false; /* dont put extra indentation on line 849 * with '{' */ 850 if (ps.in_decl && ps.in_or_st) { /* this is either a structure 851 * declaration or an init */ 852 di_stack[ps.dec_nest] = dec_ind; 853 if (++ps.dec_nest == nitems(di_stack)) { 854 diag3(0, "Reached internal limit of %d struct levels", 855 nitems(di_stack)); 856 ps.dec_nest--; 857 } 858 /* ? dec_ind = 0; */ 859 } 860 else { 861 ps.decl_on_line = false; /* we can't be in the middle of 862 * a declaration, so don't do 863 * special indentation of 864 * comments */ 865 if (opt.blanklines_after_declarations_at_proctop 866 && ps.in_parameter_declaration) 867 postfix_blankline_requested = 1; 868 ps.in_parameter_declaration = 0; 869 ps.in_decl = false; 870 } 871 dec_ind = 0; 872 parse(lbrace); /* let parser know about this */ 873 if (ps.want_blank) /* put a blank before '{' if '{' is not at 874 * start of line */ 875 *e_code++ = ' '; 876 ps.want_blank = false; 877 *e_code++ = '{'; 878 ps.just_saw_decl = 0; 879 break; 880 881 case rbrace: /* got a '}' */ 882 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be 883 * omitted in 884 * declarations */ 885 parse(semicolon); 886 if (ps.p_l_follow) {/* check for unclosed if, for, else. */ 887 diag2(1, "Unbalanced parens"); 888 ps.p_l_follow = 0; 889 sp_sw = false; 890 } 891 ps.just_saw_decl = 0; 892 ps.block_init_level--; 893 if (s_code != e_code && !ps.block_init) { /* '}' must be first on 894 * line */ 895 if (opt.verbose) 896 diag2(0, "Line broken"); 897 dump_line(); 898 } 899 *e_code++ = '}'; 900 ps.want_blank = true; 901 ps.in_stmt = ps.ind_stmt = false; 902 if (ps.dec_nest > 0) { /* we are in multi-level structure 903 * declaration */ 904 dec_ind = di_stack[--ps.dec_nest]; 905 if (ps.dec_nest == 0 && !ps.in_parameter_declaration) 906 ps.just_saw_decl = 2; 907 ps.in_decl = true; 908 } 909 prefix_blankline_requested = 0; 910 parse(rbrace); /* let parser know about this */ 911 ps.search_brace = opt.cuddle_else && ps.p_stack[ps.tos] == ifhead 912 && ps.il[ps.tos] >= ps.ind_level; 913 if (ps.tos <= 1 && opt.blanklines_after_procs && ps.dec_nest <= 0) 914 postfix_blankline_requested = 1; 915 break; 916 917 case swstmt: /* got keyword "switch" */ 918 sp_sw = true; 919 hd_type = swstmt; /* keep this for when we have seen the 920 * expression */ 921 goto copy_id; /* go move the token into buffer */ 922 923 case sp_paren: /* token is if, while, for */ 924 sp_sw = true; /* the interesting stuff is done after the 925 * expression is scanned */ 926 hd_type = (*token == 'i' ? ifstmt : 927 (*token == 'w' ? whilestmt : forstmt)); 928 929 /* 930 * remember the type of header for later use by parser 931 */ 932 goto copy_id; /* copy the token into line */ 933 934 case sp_nparen: /* got else, do */ 935 ps.in_stmt = false; 936 if (*token == 'e') { 937 if (e_code != s_code && (!opt.cuddle_else || e_code[-1] != '}')) { 938 if (opt.verbose) 939 diag2(0, "Line broken"); 940 dump_line();/* make sure this starts a line */ 941 ps.want_blank = false; 942 } 943 force_nl = true;/* also, following stuff must go onto new line */ 944 last_else = 1; 945 parse(elselit); 946 } 947 else { 948 if (e_code != s_code) { /* make sure this starts a line */ 949 if (opt.verbose) 950 diag2(0, "Line broken"); 951 dump_line(); 952 ps.want_blank = false; 953 } 954 force_nl = true;/* also, following stuff must go onto new line */ 955 last_else = 0; 956 parse(dolit); 957 } 958 goto copy_id; /* move the token into line */ 959 960 case type_def: 961 case storage: 962 prefix_blankline_requested = 0; 963 goto copy_id; 964 965 case structure: 966 if (ps.p_l_follow > 0) 967 goto copy_id; 968 /* FALLTHROUGH */ 969 case decl: /* we have a declaration type (int, etc.) */ 970 parse(decl); /* let parser worry about indentation */ 971 if (ps.last_token == rparen && ps.tos <= 1) { 972 if (s_code != e_code) { 973 dump_line(); 974 ps.want_blank = 0; 975 } 976 } 977 if (ps.in_parameter_declaration && opt.indent_parameters && ps.dec_nest == 0) { 978 ps.ind_level = ps.i_l_follow = 1; 979 ps.ind_stmt = 0; 980 } 981 ps.in_or_st = true; /* this might be a structure or initialization 982 * declaration */ 983 ps.in_decl = ps.decl_on_line = ps.last_token != type_def; 984 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) 985 ps.just_saw_decl = 2; 986 prefix_blankline_requested = 0; 987 for (i = 0; token[i++];); /* get length of token */ 988 989 if (ps.ind_level == 0 || ps.dec_nest > 0) { 990 /* global variable or struct member in local variable */ 991 dec_ind = opt.decl_indent > 0 ? opt.decl_indent : i; 992 tabs_to_var = (opt.use_tabs ? opt.decl_indent > 0 : 0); 993 } else { 994 /* local variable */ 995 dec_ind = opt.local_decl_indent > 0 ? opt.local_decl_indent : i; 996 tabs_to_var = (opt.use_tabs ? opt.local_decl_indent > 0 : 0); 997 } 998 goto copy_id; 999 1000 case funcname: 1001 case ident: /* got an identifier or constant */ 1002 if (ps.in_decl) { 1003 if (type_code == funcname) { 1004 ps.in_decl = false; 1005 if (opt.procnames_start_line && s_code != e_code) { 1006 *e_code = '\0'; 1007 dump_line(); 1008 } 1009 else if (ps.want_blank) { 1010 *e_code++ = ' '; 1011 } 1012 ps.want_blank = false; 1013 } 1014 else if (!ps.block_init && !ps.dumped_decl_indent && 1015 ps.paren_level == 0) { /* if we are in a declaration, we 1016 * must indent identifier */ 1017 indent_declaration(dec_ind, tabs_to_var); 1018 ps.dumped_decl_indent = true; 1019 ps.want_blank = false; 1020 } 1021 } 1022 else if (sp_sw && ps.p_l_follow == 0) { 1023 sp_sw = false; 1024 force_nl = true; 1025 ps.last_u_d = true; 1026 ps.in_stmt = false; 1027 parse(hd_type); 1028 } 1029 copy_id: 1030 { 1031 int len = e_token - s_token; 1032 1033 CHECK_SIZE_CODE(len + 1); 1034 if (ps.want_blank) 1035 *e_code++ = ' '; 1036 memcpy(e_code, s_token, len); 1037 e_code += len; 1038 } 1039 if (type_code != funcname) 1040 ps.want_blank = true; 1041 break; 1042 1043 case strpfx: 1044 { 1045 int len = e_token - s_token; 1046 1047 CHECK_SIZE_CODE(len + 1); 1048 if (ps.want_blank) 1049 *e_code++ = ' '; 1050 memcpy(e_code, token, len); 1051 e_code += len; 1052 } 1053 ps.want_blank = false; 1054 break; 1055 1056 case period: /* treat a period kind of like a binary 1057 * operation */ 1058 *e_code++ = '.'; /* move the period into line */ 1059 ps.want_blank = false; /* dont put a blank after a period */ 1060 break; 1061 1062 case comma: 1063 ps.want_blank = (s_code != e_code); /* only put blank after comma 1064 * if comma does not start the 1065 * line */ 1066 if (ps.in_decl && ps.procname[0] == '\0' && !ps.block_init && 1067 !ps.dumped_decl_indent && ps.paren_level == 0) { 1068 /* indent leading commas and not the actual identifiers */ 1069 indent_declaration(dec_ind - 1, tabs_to_var); 1070 ps.dumped_decl_indent = true; 1071 } 1072 *e_code++ = ','; 1073 if (ps.p_l_follow == 0) { 1074 if (ps.block_init_level <= 0) 1075 ps.block_init = 0; 1076 if (break_comma && (!opt.leave_comma || 1077 count_spaces_until(compute_code_target(), s_code, e_code) > 1078 opt.max_col - opt.tabsize)) 1079 force_nl = true; 1080 } 1081 break; 1082 1083 case preesc: /* got the character '#' */ 1084 if ((s_com != e_com) || 1085 (s_lab != e_lab) || 1086 (s_code != e_code)) 1087 dump_line(); 1088 CHECK_SIZE_LAB(1); 1089 *e_lab++ = '#'; /* move whole line to 'label' buffer */ 1090 { 1091 int in_comment = 0; 1092 int com_start = 0; 1093 char quote = 0; 1094 int com_end = 0; 1095 1096 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 1097 buf_ptr++; 1098 if (buf_ptr >= buf_end) 1099 fill_buffer(); 1100 } 1101 while (*buf_ptr != '\n' || (in_comment && !had_eof)) { 1102 CHECK_SIZE_LAB(2); 1103 *e_lab = *buf_ptr++; 1104 if (buf_ptr >= buf_end) 1105 fill_buffer(); 1106 switch (*e_lab++) { 1107 case BACKSLASH: 1108 if (!in_comment) { 1109 *e_lab++ = *buf_ptr++; 1110 if (buf_ptr >= buf_end) 1111 fill_buffer(); 1112 } 1113 break; 1114 case '/': 1115 if (*buf_ptr == '*' && !in_comment && !quote) { 1116 in_comment = 1; 1117 *e_lab++ = *buf_ptr++; 1118 com_start = e_lab - s_lab - 2; 1119 } 1120 break; 1121 case '"': 1122 if (quote == '"') 1123 quote = 0; 1124 break; 1125 case '\'': 1126 if (quote == '\'') 1127 quote = 0; 1128 break; 1129 case '*': 1130 if (*buf_ptr == '/' && in_comment) { 1131 in_comment = 0; 1132 *e_lab++ = *buf_ptr++; 1133 com_end = e_lab - s_lab; 1134 } 1135 break; 1136 } 1137 } 1138 1139 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1140 e_lab--; 1141 if (e_lab - s_lab == com_end && bp_save == NULL) { 1142 /* comment on preprocessor line */ 1143 if (sc_end == NULL) { /* if this is the first comment, 1144 * we must set up the buffer */ 1145 save_com = sc_buf; 1146 sc_end = &save_com[0]; 1147 } 1148 else { 1149 *sc_end++ = '\n'; /* add newline between 1150 * comments */ 1151 *sc_end++ = ' '; 1152 --line_no; 1153 } 1154 if (sc_end - save_com + com_end - com_start > sc_size) 1155 errx(1, "input too long"); 1156 memmove(sc_end, s_lab + com_start, com_end - com_start); 1157 sc_end += com_end - com_start; 1158 e_lab = s_lab + com_start; 1159 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1160 e_lab--; 1161 bp_save = buf_ptr; /* save current input buffer */ 1162 be_save = buf_end; 1163 buf_ptr = save_com; /* fix so that subsequent calls to 1164 * lexi will take tokens out of 1165 * save_com */ 1166 *sc_end++ = ' '; /* add trailing blank, just in case */ 1167 buf_end = sc_end; 1168 sc_end = NULL; 1169 } 1170 CHECK_SIZE_LAB(1); 1171 *e_lab = '\0'; /* null terminate line */ 1172 ps.pcase = false; 1173 } 1174 1175 if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */ 1176 if ((size_t)ifdef_level < nitems(state_stack)) { 1177 match_state[ifdef_level].tos = -1; 1178 state_stack[ifdef_level++] = ps; 1179 } 1180 else 1181 diag2(1, "#if stack overflow"); 1182 } 1183 else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */ 1184 if (ifdef_level <= 0) 1185 diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else"); 1186 else { 1187 match_state[ifdef_level - 1] = ps; 1188 ps = state_stack[ifdef_level - 1]; 1189 } 1190 } 1191 else if (strncmp(s_lab, "#endif", 6) == 0) { 1192 if (ifdef_level <= 0) 1193 diag2(1, "Unmatched #endif"); 1194 else 1195 ifdef_level--; 1196 } else { 1197 struct directives { 1198 int size; 1199 const char *string; 1200 } 1201 recognized[] = { 1202 {7, "include"}, 1203 {6, "define"}, 1204 {5, "undef"}, 1205 {4, "line"}, 1206 {5, "error"}, 1207 {6, "pragma"} 1208 }; 1209 int d = nitems(recognized); 1210 while (--d >= 0) 1211 if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0) 1212 break; 1213 if (d < 0) { 1214 diag2(1, "Unrecognized cpp directive"); 1215 break; 1216 } 1217 } 1218 if (opt.blanklines_around_conditional_compilation) { 1219 postfix_blankline_requested++; 1220 n_real_blanklines = 0; 1221 } 1222 else { 1223 postfix_blankline_requested = 0; 1224 prefix_blankline_requested = 0; 1225 } 1226 break; /* subsequent processing of the newline 1227 * character will cause the line to be printed */ 1228 1229 case comment: /* we have gotten a / followed by * this is a biggie */ 1230 pr_comment(); 1231 break; 1232 } /* end of big switch stmt */ 1233 1234 *e_code = '\0'; /* make sure code section is null terminated */ 1235 if (type_code != comment && type_code != newline && type_code != preesc) 1236 ps.last_token = type_code; 1237 } /* end of main while (1) loop */ 1238 } 1239 1240 /* 1241 * copy input file to backup file if in_name is /blah/blah/blah/file, then 1242 * backup file will be ".Bfile" then make the backup file the input and 1243 * original input file the output 1244 */ 1245 static void 1246 bakcopy(void) 1247 { 1248 int n, 1249 bakchn; 1250 char buff[8 * 1024]; 1251 const char *p; 1252 1253 /* construct file name .Bfile */ 1254 for (p = in_name; *p; p++); /* skip to end of string */ 1255 while (p > in_name && *p != '/') /* find last '/' */ 1256 p--; 1257 if (*p == '/') 1258 p++; 1259 sprintf(bakfile, "%s%s", p, simple_backup_suffix); 1260 1261 /* copy in_name to backup file */ 1262 bakchn = creat(bakfile, 0600); 1263 if (bakchn < 0) 1264 err(1, "%s", bakfile); 1265 while ((n = read(fileno(input), buff, sizeof(buff))) > 0) 1266 if (write(bakchn, buff, n) != n) 1267 err(1, "%s", bakfile); 1268 if (n < 0) 1269 err(1, "%s", in_name); 1270 close(bakchn); 1271 fclose(input); 1272 1273 /* re-open backup file as the input file */ 1274 input = fopen(bakfile, "r"); 1275 if (input == NULL) 1276 err(1, "%s", bakfile); 1277 /* now the original input file will be the output */ 1278 output = fopen(in_name, "w"); 1279 if (output == NULL) { 1280 unlink(bakfile); 1281 err(1, "%s", in_name); 1282 } 1283 } 1284 1285 static void 1286 indent_declaration(int cur_dec_ind, int tabs_to_var) 1287 { 1288 int pos = e_code - s_code; 1289 char *startpos = e_code; 1290 1291 /* 1292 * get the tab math right for indentations that are not multiples of tabsize 1293 */ 1294 if ((ps.ind_level * opt.ind_size) % opt.tabsize != 0) { 1295 pos += (ps.ind_level * opt.ind_size) % opt.tabsize; 1296 cur_dec_ind += (ps.ind_level * opt.ind_size) % opt.tabsize; 1297 } 1298 if (tabs_to_var) { 1299 int tpos; 1300 1301 CHECK_SIZE_CODE(cur_dec_ind / opt.tabsize); 1302 while ((tpos = opt.tabsize * (1 + pos / opt.tabsize)) <= cur_dec_ind) { 1303 *e_code++ = '\t'; 1304 pos = tpos; 1305 } 1306 } 1307 CHECK_SIZE_CODE(cur_dec_ind - pos + 1); 1308 while (pos < cur_dec_ind) { 1309 *e_code++ = ' '; 1310 pos++; 1311 } 1312 if (e_code == startpos && ps.want_blank) { 1313 *e_code++ = ' '; 1314 ps.want_blank = false; 1315 } 1316 } 1317