1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1985 Sun Microsystems, Inc. 5 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 6 * Copyright (c) 1980, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #if 0 39 #ifndef lint 40 static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93"; 41 #endif /* not lint */ 42 #endif 43 44 #include <sys/cdefs.h> 45 __FBSDID("$FreeBSD$"); 46 47 #include <sys/param.h> 48 #include <sys/capsicum.h> 49 #include <capsicum_helpers.h> 50 #include <err.h> 51 #include <errno.h> 52 #include <fcntl.h> 53 #include <unistd.h> 54 #include <stdio.h> 55 #include <stdlib.h> 56 #include <string.h> 57 #include <ctype.h> 58 #include "indent_globs.h" 59 #include "indent_codes.h" 60 #include "indent.h" 61 62 /* Globals */ 63 FILE *input, *output; 64 char *labbuf, *s_lab, *e_lab, *l_lab; 65 char *codebuf, *s_code, *e_code, *l_code; 66 char *combuf, *s_com, *e_com, *l_com; 67 char *tokenbuf, *s_token, *e_token, *l_token; 68 char *in_buffer, *in_buffer_limit; 69 char *buf_ptr, *buf_end; 70 71 char sc_buf[sc_size]; 72 73 char *save_com, *sc_end; 74 char *bp_save; 75 char *be_save; 76 77 struct options opt; 78 int line_no; 79 80 struct parser_state ps; 81 int ifdef_level; 82 struct parser_state state_stack[5]; 83 struct parser_state match_state[5]; 84 85 86 static void bakcopy(void); 87 static void indent_declaration(int, int); 88 89 const char *in_name = "Standard Input"; /* will always point to name of input 90 * file */ 91 const char *out_name = "Standard Output"; /* will always point to name 92 * of output file */ 93 const char *simple_backup_suffix = ".BAK"; /* Suffix to use for backup 94 * files */ 95 char bakfile[MAXPATHLEN] = ""; 96 97 int 98 main(int argc, char **argv) 99 { 100 cap_rights_t rights; 101 102 int dec_ind; /* current indentation for declarations */ 103 int di_stack[20]; /* a stack of structure indentation levels */ 104 int force_nl; /* when true, code must be broken */ 105 int hd_type = 0; /* used to store type of stmt for if (...), 106 * for (...), etc */ 107 int i; /* local loop counter */ 108 int scase; /* set to true when we see a case, so we will 109 * know what to do with the following colon */ 110 int sp_sw; /* when true, we are in the expression of 111 * if(...), while(...), etc. */ 112 int squest; /* when this is positive, we have seen a ? 113 * without the matching : in a <c>?<s>:<s> 114 * construct */ 115 const char *t_ptr; /* used for copying tokens */ 116 int tabs_to_var; /* true if using tabs to indent to var name */ 117 int type_code; /* the type of token, returned by lexi */ 118 119 int last_else = 0; /* true iff last keyword was an else */ 120 const char *profile_name = NULL; 121 const char *envval = NULL; 122 struct parser_state transient_state; /* a copy for lookup */ 123 124 /*-----------------------------------------------*\ 125 | INITIALIZATION | 126 \*-----------------------------------------------*/ 127 128 found_err = 0; 129 130 ps.p_stack[0] = stmt; /* this is the parser's stack */ 131 ps.last_nl = true; /* this is true if the last thing scanned was 132 * a newline */ 133 ps.last_token = semicolon; 134 combuf = (char *) malloc(bufsize); 135 if (combuf == NULL) 136 err(1, NULL); 137 labbuf = (char *) malloc(bufsize); 138 if (labbuf == NULL) 139 err(1, NULL); 140 codebuf = (char *) malloc(bufsize); 141 if (codebuf == NULL) 142 err(1, NULL); 143 tokenbuf = (char *) malloc(bufsize); 144 if (tokenbuf == NULL) 145 err(1, NULL); 146 alloc_typenames(); 147 init_constant_tt(); 148 l_com = combuf + bufsize - 5; 149 l_lab = labbuf + bufsize - 5; 150 l_code = codebuf + bufsize - 5; 151 l_token = tokenbuf + bufsize - 5; 152 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and 153 * comment buffers */ 154 combuf[1] = codebuf[1] = labbuf[1] = '\0'; 155 opt.else_if = 1; /* Default else-if special processing to on */ 156 s_lab = e_lab = labbuf + 1; 157 s_code = e_code = codebuf + 1; 158 s_com = e_com = combuf + 1; 159 s_token = e_token = tokenbuf + 1; 160 161 in_buffer = (char *) malloc(10); 162 if (in_buffer == NULL) 163 err(1, NULL); 164 in_buffer_limit = in_buffer + 8; 165 buf_ptr = buf_end = in_buffer; 166 line_no = 1; 167 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false; 168 sp_sw = force_nl = false; 169 ps.in_or_st = false; 170 ps.bl_line = true; 171 dec_ind = 0; 172 di_stack[ps.dec_nest = 0] = 0; 173 ps.want_blank = ps.in_stmt = ps.ind_stmt = false; 174 175 scase = ps.pcase = false; 176 squest = 0; 177 sc_end = NULL; 178 bp_save = NULL; 179 be_save = NULL; 180 181 output = NULL; 182 tabs_to_var = 0; 183 184 envval = getenv("SIMPLE_BACKUP_SUFFIX"); 185 if (envval) 186 simple_backup_suffix = envval; 187 188 /*--------------------------------------------------*\ 189 | COMMAND LINE SCAN | 190 \*--------------------------------------------------*/ 191 192 #ifdef undef 193 max_col = 78; /* -l78 */ 194 lineup_to_parens = 1; /* -lp */ 195 lineup_to_parens_always = 0; /* -nlpl */ 196 ps.ljust_decl = 0; /* -ndj */ 197 ps.com_ind = 33; /* -c33 */ 198 star_comment_cont = 1; /* -sc */ 199 ps.ind_size = 8; /* -i8 */ 200 verbose = 0; 201 ps.decl_indent = 16; /* -di16 */ 202 ps.local_decl_indent = -1; /* if this is not set to some nonnegative value 203 * by an arg, we will set this equal to 204 * ps.decl_ind */ 205 ps.indent_parameters = 1; /* -ip */ 206 ps.decl_com_ind = 0; /* if this is not set to some positive value 207 * by an arg, we will set this equal to 208 * ps.com_ind */ 209 btype_2 = 1; /* -br */ 210 cuddle_else = 1; /* -ce */ 211 ps.unindent_displace = 0; /* -d0 */ 212 ps.case_indent = 0; /* -cli0 */ 213 format_block_comments = 1; /* -fcb */ 214 format_col1_comments = 1; /* -fc1 */ 215 procnames_start_line = 1; /* -psl */ 216 proc_calls_space = 0; /* -npcs */ 217 comment_delimiter_on_blankline = 1; /* -cdb */ 218 ps.leave_comma = 1; /* -nbc */ 219 #endif 220 221 for (i = 1; i < argc; ++i) 222 if (strcmp(argv[i], "-npro") == 0) 223 break; 224 else if (argv[i][0] == '-' && argv[i][1] == 'P' && argv[i][2] != '\0') 225 profile_name = argv[i]; /* non-empty -P (set profile) */ 226 set_defaults(); 227 if (i >= argc) 228 set_profile(profile_name); 229 230 for (i = 1; i < argc; ++i) { 231 232 /* 233 * look thru args (if any) for changes to defaults 234 */ 235 if (argv[i][0] != '-') {/* no flag on parameter */ 236 if (input == NULL) { /* we must have the input file */ 237 in_name = argv[i]; /* remember name of input file */ 238 input = fopen(in_name, "r"); 239 if (input == NULL) /* check for open error */ 240 err(1, "%s", in_name); 241 continue; 242 } 243 else if (output == NULL) { /* we have the output file */ 244 out_name = argv[i]; /* remember name of output file */ 245 if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite 246 * the file */ 247 errx(1, "input and output files must be different"); 248 } 249 output = fopen(out_name, "w"); 250 if (output == NULL) /* check for create error */ 251 err(1, "%s", out_name); 252 continue; 253 } 254 errx(1, "unknown parameter: %s", argv[i]); 255 } 256 else 257 set_option(argv[i]); 258 } /* end of for */ 259 if (input == NULL) 260 input = stdin; 261 if (output == NULL) { 262 if (input == stdin) 263 output = stdout; 264 else { 265 out_name = in_name; 266 bakcopy(); 267 } 268 } 269 270 /* Restrict input/output descriptors and enter Capsicum sandbox. */ 271 cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE); 272 if (caph_rights_limit(fileno(output), &rights) < 0) 273 err(EXIT_FAILURE, "unable to limit rights for %s", out_name); 274 cap_rights_init(&rights, CAP_FSTAT, CAP_READ); 275 if (caph_rights_limit(fileno(input), &rights) < 0) 276 err(EXIT_FAILURE, "unable to limit rights for %s", in_name); 277 if (caph_enter() < 0) 278 err(EXIT_FAILURE, "unable to enter capability mode"); 279 280 if (opt.com_ind <= 1) 281 opt.com_ind = 2; /* don't put normal comments before column 2 */ 282 if (opt.block_comment_max_col <= 0) 283 opt.block_comment_max_col = opt.max_col; 284 if (opt.local_decl_indent < 0) /* if not specified by user, set this */ 285 opt.local_decl_indent = opt.decl_indent; 286 if (opt.decl_com_ind <= 0) /* if not specified by user, set this */ 287 opt.decl_com_ind = opt.ljust_decl ? (opt.com_ind <= 10 ? 2 : opt.com_ind - 8) : opt.com_ind; 288 if (opt.continuation_indent == 0) 289 opt.continuation_indent = opt.ind_size; 290 fill_buffer(); /* get first batch of stuff into input buffer */ 291 292 parse(semicolon); 293 { 294 char *p = buf_ptr; 295 int col = 1; 296 297 while (1) { 298 if (*p == ' ') 299 col++; 300 else if (*p == '\t') 301 col = opt.tabsize * (1 + (col - 1) / opt.tabsize) + 1; 302 else 303 break; 304 p++; 305 } 306 if (col > opt.ind_size) 307 ps.ind_level = ps.i_l_follow = col / opt.ind_size; 308 } 309 310 /* 311 * START OF MAIN LOOP 312 */ 313 314 while (1) { /* this is the main loop. it will go until we 315 * reach eof */ 316 int comment_buffered = false; 317 318 type_code = lexi(&ps); /* lexi reads one token. The actual 319 * characters read are stored in "token". lexi 320 * returns a code indicating the type of token */ 321 322 /* 323 * The following code moves newlines and comments following an if (), 324 * while (), else, etc. up to the start of the following stmt to 325 * a buffer. This allows proper handling of both kinds of brace 326 * placement (-br, -bl) and cuddling "else" (-ce). 327 */ 328 329 while (ps.search_brace) { 330 switch (type_code) { 331 case newline: 332 if (sc_end == NULL) { 333 save_com = sc_buf; 334 save_com[0] = save_com[1] = ' '; 335 sc_end = &save_com[2]; 336 } 337 *sc_end++ = '\n'; 338 /* 339 * We may have inherited a force_nl == true from the previous 340 * token (like a semicolon). But once we know that a newline 341 * has been scanned in this loop, force_nl should be false. 342 * 343 * However, the force_nl == true must be preserved if newline 344 * is never scanned in this loop, so this assignment cannot be 345 * done earlier. 346 */ 347 force_nl = false; 348 case form_feed: 349 break; 350 case comment: 351 if (sc_end == NULL) { 352 /* 353 * Copy everything from the start of the line, because 354 * pr_comment() will use that to calculate original 355 * indentation of a boxed comment. 356 */ 357 memcpy(sc_buf, in_buffer, buf_ptr - in_buffer - 4); 358 save_com = sc_buf + (buf_ptr - in_buffer - 4); 359 save_com[0] = save_com[1] = ' '; 360 sc_end = &save_com[2]; 361 } 362 comment_buffered = true; 363 *sc_end++ = '/'; /* copy in start of comment */ 364 *sc_end++ = '*'; 365 for (;;) { /* loop until we get to the end of the comment */ 366 *sc_end = *buf_ptr++; 367 if (buf_ptr >= buf_end) 368 fill_buffer(); 369 if (*sc_end++ == '*' && *buf_ptr == '/') 370 break; /* we are at end of comment */ 371 if (sc_end >= &save_com[sc_size]) { /* check for temp buffer 372 * overflow */ 373 diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever"); 374 fflush(output); 375 exit(1); 376 } 377 } 378 *sc_end++ = '/'; /* add ending slash */ 379 if (++buf_ptr >= buf_end) /* get past / in buffer */ 380 fill_buffer(); 381 break; 382 case lbrace: 383 /* 384 * Put KNF-style lbraces before the buffered up tokens and 385 * jump out of this loop in order to avoid copying the token 386 * again under the default case of the switch below. 387 */ 388 if (sc_end != NULL && opt.btype_2) { 389 save_com[0] = '{'; 390 /* 391 * Originally the lbrace may have been alone on its own 392 * line, but it will be moved into "the else's line", so 393 * if there was a newline resulting from the "{" before, 394 * it must be scanned now and ignored. 395 */ 396 while (isspace((unsigned char)*buf_ptr)) { 397 if (++buf_ptr >= buf_end) 398 fill_buffer(); 399 if (*buf_ptr == '\n') 400 break; 401 } 402 goto sw_buffer; 403 } 404 /* FALLTHROUGH */ 405 default: /* it is the start of a normal statement */ 406 { 407 int remove_newlines; 408 409 remove_newlines = 410 /* "} else" */ 411 (type_code == sp_nparen && *token == 'e' && 412 e_code != s_code && e_code[-1] == '}') 413 /* "else if" */ 414 || (type_code == sp_paren && *token == 'i' && 415 last_else && opt.else_if); 416 if (remove_newlines) 417 force_nl = false; 418 if (sc_end == NULL) { /* ignore buffering if 419 * comment wasn't saved up */ 420 ps.search_brace = false; 421 goto check_type; 422 } 423 while (sc_end > save_com && isblank((unsigned char)sc_end[-1])) { 424 sc_end--; 425 } 426 if (opt.swallow_optional_blanklines || 427 (!comment_buffered && remove_newlines)) { 428 force_nl = !remove_newlines; 429 while (sc_end > save_com && sc_end[-1] == '\n') { 430 sc_end--; 431 } 432 } 433 if (force_nl) { /* if we should insert a nl here, put 434 * it into the buffer */ 435 force_nl = false; 436 --line_no; /* this will be re-increased when the 437 * newline is read from the buffer */ 438 *sc_end++ = '\n'; 439 *sc_end++ = ' '; 440 if (opt.verbose) /* print error msg if the line was 441 * not already broken */ 442 diag2(0, "Line broken"); 443 } 444 for (t_ptr = token; *t_ptr; ++t_ptr) 445 *sc_end++ = *t_ptr; 446 447 sw_buffer: 448 ps.search_brace = false; /* stop looking for start of 449 * stmt */ 450 bp_save = buf_ptr; /* save current input buffer */ 451 be_save = buf_end; 452 buf_ptr = save_com; /* fix so that subsequent calls to 453 * lexi will take tokens out of 454 * save_com */ 455 *sc_end++ = ' ';/* add trailing blank, just in case */ 456 buf_end = sc_end; 457 sc_end = NULL; 458 break; 459 } 460 } /* end of switch */ 461 /* 462 * We must make this check, just in case there was an unexpected 463 * EOF. 464 */ 465 if (type_code != 0) { 466 /* 467 * The only intended purpose of calling lexi() below is to 468 * categorize the next token in order to decide whether to 469 * continue buffering forthcoming tokens. Once the buffering 470 * is over, lexi() will be called again elsewhere on all of 471 * the tokens - this time for normal processing. 472 * 473 * Calling it for this purpose is a bug, because lexi() also 474 * changes the parser state and discards leading whitespace, 475 * which is needed mostly for comment-related considerations. 476 * 477 * Work around the former problem by giving lexi() a copy of 478 * the current parser state and discard it if the call turned 479 * out to be just a look ahead. 480 * 481 * Work around the latter problem by copying all whitespace 482 * characters into the buffer so that the later lexi() call 483 * will read them. 484 */ 485 if (sc_end != NULL) { 486 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 487 *sc_end++ = *buf_ptr++; 488 if (sc_end >= &save_com[sc_size]) { 489 errx(1, "input too long"); 490 } 491 } 492 if (buf_ptr >= buf_end) { 493 fill_buffer(); 494 } 495 } 496 transient_state = ps; 497 type_code = lexi(&transient_state); /* read another token */ 498 if (type_code != newline && type_code != form_feed && 499 type_code != comment && !transient_state.search_brace) { 500 ps = transient_state; 501 } 502 } 503 } /* end of while (search_brace) */ 504 last_else = 0; 505 check_type: 506 if (type_code == 0) { /* we got eof */ 507 if (s_lab != e_lab || s_code != e_code 508 || s_com != e_com) /* must dump end of line */ 509 dump_line(); 510 if (ps.tos > 1) /* check for balanced braces */ 511 diag2(1, "Stuff missing from end of file"); 512 513 if (opt.verbose) { 514 printf("There were %d output lines and %d comments\n", 515 ps.out_lines, ps.out_coms); 516 printf("(Lines with comments)/(Lines with code): %6.3f\n", 517 (1.0 * ps.com_lines) / code_lines); 518 } 519 fflush(output); 520 exit(found_err); 521 } 522 if ( 523 (type_code != comment) && 524 (type_code != newline) && 525 (type_code != preesc) && 526 (type_code != form_feed)) { 527 if (force_nl && 528 (type_code != semicolon) && 529 (type_code != lbrace || !opt.btype_2)) { 530 /* we should force a broken line here */ 531 if (opt.verbose) 532 diag2(0, "Line broken"); 533 dump_line(); 534 ps.want_blank = false; /* dont insert blank at line start */ 535 force_nl = false; 536 } 537 ps.in_stmt = true; /* turn on flag which causes an extra level of 538 * indentation. this is turned off by a ; or 539 * '}' */ 540 if (s_com != e_com) { /* the turkey has embedded a comment 541 * in a line. fix it */ 542 int len = e_com - s_com; 543 544 CHECK_SIZE_CODE(len + 3); 545 *e_code++ = ' '; 546 memcpy(e_code, s_com, len); 547 e_code += len; 548 *e_code++ = ' '; 549 *e_code = '\0'; /* null terminate code sect */ 550 ps.want_blank = false; 551 e_com = s_com; 552 } 553 } 554 else if (type_code != comment) /* preserve force_nl thru a comment */ 555 force_nl = false; /* cancel forced newline after newline, form 556 * feed, etc */ 557 558 559 560 /*-----------------------------------------------------*\ 561 | do switch on type of token scanned | 562 \*-----------------------------------------------------*/ 563 CHECK_SIZE_CODE(3); /* maximum number of increments of e_code 564 * before the next CHECK_SIZE_CODE or 565 * dump_line() is 2. After that there's the 566 * final increment for the null character. */ 567 switch (type_code) { /* now, decide what to do with the token */ 568 569 case form_feed: /* found a form feed in line */ 570 ps.use_ff = true; /* a form feed is treated much like a newline */ 571 dump_line(); 572 ps.want_blank = false; 573 break; 574 575 case newline: 576 if (ps.last_token != comma || ps.p_l_follow > 0 577 || !opt.leave_comma || ps.block_init || !break_comma || s_com != e_com) { 578 dump_line(); 579 ps.want_blank = false; 580 } 581 ++line_no; /* keep track of input line number */ 582 break; 583 584 case lparen: /* got a '(' or '[' */ 585 /* count parens to make Healy happy */ 586 if (++ps.p_l_follow == nitems(ps.paren_indents)) { 587 diag3(0, "Reached internal limit of %d unclosed parens", 588 nitems(ps.paren_indents)); 589 ps.p_l_follow--; 590 } 591 if (*token == '[') 592 /* not a function pointer declaration or a function call */; 593 else if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent && 594 ps.procname[0] == '\0' && ps.paren_level == 0) { 595 /* function pointer declarations */ 596 indent_declaration(dec_ind, tabs_to_var); 597 ps.dumped_decl_indent = true; 598 } 599 else if (ps.want_blank && 600 ((ps.last_token != ident && ps.last_token != funcname) || 601 opt.proc_calls_space || 602 /* offsetof (1) is never allowed a space; sizeof (2) gets 603 * one iff -bs; all other keywords (>2) always get a space 604 * before lparen */ 605 ps.keyword + opt.Bill_Shannon > 2)) 606 *e_code++ = ' '; 607 ps.want_blank = false; 608 *e_code++ = token[0]; 609 ps.paren_indents[ps.p_l_follow - 1] = count_spaces_until(1, s_code, e_code) - 1; 610 if (sp_sw && ps.p_l_follow == 1 && opt.extra_expression_indent 611 && ps.paren_indents[0] < 2 * opt.ind_size) 612 ps.paren_indents[0] = 2 * opt.ind_size; 613 if (ps.in_or_st && *token == '(' && ps.tos <= 2) { 614 /* 615 * this is a kluge to make sure that declarations will be 616 * aligned right if proc decl has an explicit type on it, i.e. 617 * "int a(x) {..." 618 */ 619 parse(semicolon); /* I said this was a kluge... */ 620 ps.in_or_st = false; /* turn off flag for structure decl or 621 * initialization */ 622 } 623 /* parenthesized type following sizeof or offsetof is not a cast */ 624 if (ps.keyword == 1 || ps.keyword == 2) 625 ps.not_cast_mask |= 1 << ps.p_l_follow; 626 break; 627 628 case rparen: /* got a ')' or ']' */ 629 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) { 630 ps.last_u_d = true; 631 ps.cast_mask &= (1 << ps.p_l_follow) - 1; 632 ps.want_blank = opt.space_after_cast; 633 } else 634 ps.want_blank = true; 635 ps.not_cast_mask &= (1 << ps.p_l_follow) - 1; 636 if (--ps.p_l_follow < 0) { 637 ps.p_l_follow = 0; 638 diag3(0, "Extra %c", *token); 639 } 640 if (e_code == s_code) /* if the paren starts the line */ 641 ps.paren_level = ps.p_l_follow; /* then indent it */ 642 643 *e_code++ = token[0]; 644 645 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if 646 * (...), or some such */ 647 sp_sw = false; 648 force_nl = true;/* must force newline after if */ 649 ps.last_u_d = true; /* inform lexi that a following 650 * operator is unary */ 651 ps.in_stmt = false; /* dont use stmt continuation 652 * indentation */ 653 654 parse(hd_type); /* let parser worry about if, or whatever */ 655 } 656 ps.search_brace = opt.btype_2; /* this should ensure that 657 * constructs such as main(){...} 658 * and int[]{...} have their braces 659 * put in the right place */ 660 break; 661 662 case unary_op: /* this could be any unary operation */ 663 if (!ps.dumped_decl_indent && ps.in_decl && !ps.block_init && 664 ps.procname[0] == '\0' && ps.paren_level == 0) { 665 /* pointer declarations */ 666 667 /* 668 * if this is a unary op in a declaration, we should indent 669 * this token 670 */ 671 for (i = 0; token[i]; ++i) 672 /* find length of token */; 673 indent_declaration(dec_ind - i, tabs_to_var); 674 ps.dumped_decl_indent = true; 675 } 676 else if (ps.want_blank) 677 *e_code++ = ' '; 678 679 { 680 int len = e_token - s_token; 681 682 CHECK_SIZE_CODE(len); 683 memcpy(e_code, token, len); 684 e_code += len; 685 } 686 ps.want_blank = false; 687 break; 688 689 case binary_op: /* any binary operation */ 690 { 691 int len = e_token - s_token; 692 693 CHECK_SIZE_CODE(len + 1); 694 if (ps.want_blank) 695 *e_code++ = ' '; 696 memcpy(e_code, token, len); 697 e_code += len; 698 } 699 ps.want_blank = true; 700 break; 701 702 case postop: /* got a trailing ++ or -- */ 703 *e_code++ = token[0]; 704 *e_code++ = token[1]; 705 ps.want_blank = true; 706 break; 707 708 case question: /* got a ? */ 709 squest++; /* this will be used when a later colon 710 * appears so we can distinguish the 711 * <c>?<n>:<n> construct */ 712 if (ps.want_blank) 713 *e_code++ = ' '; 714 *e_code++ = '?'; 715 ps.want_blank = true; 716 break; 717 718 case casestmt: /* got word 'case' or 'default' */ 719 scase = true; /* so we can process the later colon properly */ 720 goto copy_id; 721 722 case colon: /* got a ':' */ 723 if (squest > 0) { /* it is part of the <c>?<n>: <n> construct */ 724 --squest; 725 if (ps.want_blank) 726 *e_code++ = ' '; 727 *e_code++ = ':'; 728 ps.want_blank = true; 729 break; 730 } 731 if (ps.in_or_st) { 732 *e_code++ = ':'; 733 ps.want_blank = false; 734 break; 735 } 736 ps.in_stmt = false; /* seeing a label does not imply we are in a 737 * stmt */ 738 /* 739 * turn everything so far into a label 740 */ 741 { 742 int len = e_code - s_code; 743 744 CHECK_SIZE_LAB(len + 3); 745 memcpy(e_lab, s_code, len); 746 e_lab += len; 747 *e_lab++ = ':'; 748 *e_lab = '\0'; 749 e_code = s_code; 750 } 751 force_nl = ps.pcase = scase; /* ps.pcase will be used by 752 * dump_line to decide how to 753 * indent the label. force_nl 754 * will force a case n: to be 755 * on a line by itself */ 756 scase = false; 757 ps.want_blank = false; 758 break; 759 760 case semicolon: /* got a ';' */ 761 if (ps.dec_nest == 0) 762 ps.in_or_st = false;/* we are not in an initialization or 763 * structure declaration */ 764 scase = false; /* these will only need resetting in an error */ 765 squest = 0; 766 if (ps.last_token == rparen) 767 ps.in_parameter_declaration = 0; 768 ps.cast_mask = 0; 769 ps.not_cast_mask = 0; 770 ps.block_init = 0; 771 ps.block_init_level = 0; 772 ps.just_saw_decl--; 773 774 if (ps.in_decl && s_code == e_code && !ps.block_init && 775 !ps.dumped_decl_indent && ps.paren_level == 0) { 776 /* indent stray semicolons in declarations */ 777 indent_declaration(dec_ind - 1, tabs_to_var); 778 ps.dumped_decl_indent = true; 779 } 780 781 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level 782 * structure declaration, we 783 * arent any more */ 784 785 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) { 786 787 /* 788 * This should be true iff there were unbalanced parens in the 789 * stmt. It is a bit complicated, because the semicolon might 790 * be in a for stmt 791 */ 792 diag2(1, "Unbalanced parens"); 793 ps.p_l_follow = 0; 794 if (sp_sw) { /* this is a check for an if, while, etc. with 795 * unbalanced parens */ 796 sp_sw = false; 797 parse(hd_type); /* dont lose the if, or whatever */ 798 } 799 } 800 *e_code++ = ';'; 801 ps.want_blank = true; 802 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the 803 * middle of a stmt */ 804 805 if (!sp_sw) { /* if not if for (;;) */ 806 parse(semicolon); /* let parser know about end of stmt */ 807 force_nl = true;/* force newline after an end of stmt */ 808 } 809 break; 810 811 case lbrace: /* got a '{' */ 812 ps.in_stmt = false; /* dont indent the {} */ 813 if (!ps.block_init) 814 force_nl = true;/* force other stuff on same line as '{' onto 815 * new line */ 816 else if (ps.block_init_level <= 0) 817 ps.block_init_level = 1; 818 else 819 ps.block_init_level++; 820 821 if (s_code != e_code && !ps.block_init) { 822 if (!opt.btype_2) { 823 dump_line(); 824 ps.want_blank = false; 825 } 826 else if (ps.in_parameter_declaration && !ps.in_or_st) { 827 ps.i_l_follow = 0; 828 if (opt.function_brace_split) { /* dump the line prior 829 * to the brace ... */ 830 dump_line(); 831 ps.want_blank = false; 832 } else /* add a space between the decl and brace */ 833 ps.want_blank = true; 834 } 835 } 836 if (ps.in_parameter_declaration) 837 prefix_blankline_requested = 0; 838 839 if (ps.p_l_follow > 0) { /* check for preceding unbalanced 840 * parens */ 841 diag2(1, "Unbalanced parens"); 842 ps.p_l_follow = 0; 843 if (sp_sw) { /* check for unclosed if, for, etc. */ 844 sp_sw = false; 845 parse(hd_type); 846 ps.ind_level = ps.i_l_follow; 847 } 848 } 849 if (s_code == e_code) 850 ps.ind_stmt = false; /* dont put extra indentation on line 851 * with '{' */ 852 if (ps.in_decl && ps.in_or_st) { /* this is either a structure 853 * declaration or an init */ 854 di_stack[ps.dec_nest] = dec_ind; 855 if (++ps.dec_nest == nitems(di_stack)) { 856 diag3(0, "Reached internal limit of %d struct levels", 857 nitems(di_stack)); 858 ps.dec_nest--; 859 } 860 /* ? dec_ind = 0; */ 861 } 862 else { 863 ps.decl_on_line = false; /* we can't be in the middle of 864 * a declaration, so don't do 865 * special indentation of 866 * comments */ 867 if (opt.blanklines_after_declarations_at_proctop 868 && ps.in_parameter_declaration) 869 postfix_blankline_requested = 1; 870 ps.in_parameter_declaration = 0; 871 ps.in_decl = false; 872 } 873 dec_ind = 0; 874 parse(lbrace); /* let parser know about this */ 875 if (ps.want_blank) /* put a blank before '{' if '{' is not at 876 * start of line */ 877 *e_code++ = ' '; 878 ps.want_blank = false; 879 *e_code++ = '{'; 880 ps.just_saw_decl = 0; 881 break; 882 883 case rbrace: /* got a '}' */ 884 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be 885 * omitted in 886 * declarations */ 887 parse(semicolon); 888 if (ps.p_l_follow) {/* check for unclosed if, for, else. */ 889 diag2(1, "Unbalanced parens"); 890 ps.p_l_follow = 0; 891 sp_sw = false; 892 } 893 ps.just_saw_decl = 0; 894 ps.block_init_level--; 895 if (s_code != e_code && !ps.block_init) { /* '}' must be first on 896 * line */ 897 if (opt.verbose) 898 diag2(0, "Line broken"); 899 dump_line(); 900 } 901 *e_code++ = '}'; 902 ps.want_blank = true; 903 ps.in_stmt = ps.ind_stmt = false; 904 if (ps.dec_nest > 0) { /* we are in multi-level structure 905 * declaration */ 906 dec_ind = di_stack[--ps.dec_nest]; 907 if (ps.dec_nest == 0 && !ps.in_parameter_declaration) 908 ps.just_saw_decl = 2; 909 ps.in_decl = true; 910 } 911 prefix_blankline_requested = 0; 912 parse(rbrace); /* let parser know about this */ 913 ps.search_brace = opt.cuddle_else && ps.p_stack[ps.tos] == ifhead 914 && ps.il[ps.tos] >= ps.ind_level; 915 if (ps.tos <= 1 && opt.blanklines_after_procs && ps.dec_nest <= 0) 916 postfix_blankline_requested = 1; 917 break; 918 919 case swstmt: /* got keyword "switch" */ 920 sp_sw = true; 921 hd_type = swstmt; /* keep this for when we have seen the 922 * expression */ 923 goto copy_id; /* go move the token into buffer */ 924 925 case sp_paren: /* token is if, while, for */ 926 sp_sw = true; /* the interesting stuff is done after the 927 * expression is scanned */ 928 hd_type = (*token == 'i' ? ifstmt : 929 (*token == 'w' ? whilestmt : forstmt)); 930 931 /* 932 * remember the type of header for later use by parser 933 */ 934 goto copy_id; /* copy the token into line */ 935 936 case sp_nparen: /* got else, do */ 937 ps.in_stmt = false; 938 if (*token == 'e') { 939 if (e_code != s_code && (!opt.cuddle_else || e_code[-1] != '}')) { 940 if (opt.verbose) 941 diag2(0, "Line broken"); 942 dump_line();/* make sure this starts a line */ 943 ps.want_blank = false; 944 } 945 force_nl = true;/* also, following stuff must go onto new line */ 946 last_else = 1; 947 parse(elselit); 948 } 949 else { 950 if (e_code != s_code) { /* make sure this starts a line */ 951 if (opt.verbose) 952 diag2(0, "Line broken"); 953 dump_line(); 954 ps.want_blank = false; 955 } 956 force_nl = true;/* also, following stuff must go onto new line */ 957 last_else = 0; 958 parse(dolit); 959 } 960 goto copy_id; /* move the token into line */ 961 962 case type_def: 963 case storage: 964 prefix_blankline_requested = 0; 965 goto copy_id; 966 967 case structure: 968 if (ps.p_l_follow > 0) 969 goto copy_id; 970 case decl: /* we have a declaration type (int, etc.) */ 971 parse(decl); /* let parser worry about indentation */ 972 if (ps.last_token == rparen && ps.tos <= 1) { 973 if (s_code != e_code) { 974 dump_line(); 975 ps.want_blank = 0; 976 } 977 } 978 if (ps.in_parameter_declaration && opt.indent_parameters && ps.dec_nest == 0) { 979 ps.ind_level = ps.i_l_follow = 1; 980 ps.ind_stmt = 0; 981 } 982 ps.in_or_st = true; /* this might be a structure or initialization 983 * declaration */ 984 ps.in_decl = ps.decl_on_line = ps.last_token != type_def; 985 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) 986 ps.just_saw_decl = 2; 987 prefix_blankline_requested = 0; 988 for (i = 0; token[i++];); /* get length of token */ 989 990 if (ps.ind_level == 0 || ps.dec_nest > 0) { 991 /* global variable or struct member in local variable */ 992 dec_ind = opt.decl_indent > 0 ? opt.decl_indent : i; 993 tabs_to_var = (opt.use_tabs ? opt.decl_indent > 0 : 0); 994 } else { 995 /* local variable */ 996 dec_ind = opt.local_decl_indent > 0 ? opt.local_decl_indent : i; 997 tabs_to_var = (opt.use_tabs ? opt.local_decl_indent > 0 : 0); 998 } 999 goto copy_id; 1000 1001 case funcname: 1002 case ident: /* got an identifier or constant */ 1003 if (ps.in_decl) { 1004 if (type_code == funcname) { 1005 ps.in_decl = false; 1006 if (opt.procnames_start_line && s_code != e_code) { 1007 *e_code = '\0'; 1008 dump_line(); 1009 } 1010 else if (ps.want_blank) { 1011 *e_code++ = ' '; 1012 } 1013 ps.want_blank = false; 1014 } 1015 else if (!ps.block_init && !ps.dumped_decl_indent && 1016 ps.paren_level == 0) { /* if we are in a declaration, we 1017 * must indent identifier */ 1018 indent_declaration(dec_ind, tabs_to_var); 1019 ps.dumped_decl_indent = true; 1020 ps.want_blank = false; 1021 } 1022 } 1023 else if (sp_sw && ps.p_l_follow == 0) { 1024 sp_sw = false; 1025 force_nl = true; 1026 ps.last_u_d = true; 1027 ps.in_stmt = false; 1028 parse(hd_type); 1029 } 1030 copy_id: 1031 { 1032 int len = e_token - s_token; 1033 1034 CHECK_SIZE_CODE(len + 1); 1035 if (ps.want_blank) 1036 *e_code++ = ' '; 1037 memcpy(e_code, s_token, len); 1038 e_code += len; 1039 } 1040 if (type_code != funcname) 1041 ps.want_blank = true; 1042 break; 1043 1044 case strpfx: 1045 { 1046 int len = e_token - s_token; 1047 1048 CHECK_SIZE_CODE(len + 1); 1049 if (ps.want_blank) 1050 *e_code++ = ' '; 1051 memcpy(e_code, token, len); 1052 e_code += len; 1053 } 1054 ps.want_blank = false; 1055 break; 1056 1057 case period: /* treat a period kind of like a binary 1058 * operation */ 1059 *e_code++ = '.'; /* move the period into line */ 1060 ps.want_blank = false; /* dont put a blank after a period */ 1061 break; 1062 1063 case comma: 1064 ps.want_blank = (s_code != e_code); /* only put blank after comma 1065 * if comma does not start the 1066 * line */ 1067 if (ps.in_decl && ps.procname[0] == '\0' && !ps.block_init && 1068 !ps.dumped_decl_indent && ps.paren_level == 0) { 1069 /* indent leading commas and not the actual identifiers */ 1070 indent_declaration(dec_ind - 1, tabs_to_var); 1071 ps.dumped_decl_indent = true; 1072 } 1073 *e_code++ = ','; 1074 if (ps.p_l_follow == 0) { 1075 if (ps.block_init_level <= 0) 1076 ps.block_init = 0; 1077 if (break_comma && (!opt.leave_comma || 1078 count_spaces_until(compute_code_target(), s_code, e_code) > 1079 opt.max_col - opt.tabsize)) 1080 force_nl = true; 1081 } 1082 break; 1083 1084 case preesc: /* got the character '#' */ 1085 if ((s_com != e_com) || 1086 (s_lab != e_lab) || 1087 (s_code != e_code)) 1088 dump_line(); 1089 CHECK_SIZE_LAB(1); 1090 *e_lab++ = '#'; /* move whole line to 'label' buffer */ 1091 { 1092 int in_comment = 0; 1093 int com_start = 0; 1094 char quote = 0; 1095 int com_end = 0; 1096 1097 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 1098 buf_ptr++; 1099 if (buf_ptr >= buf_end) 1100 fill_buffer(); 1101 } 1102 while (*buf_ptr != '\n' || (in_comment && !had_eof)) { 1103 CHECK_SIZE_LAB(2); 1104 *e_lab = *buf_ptr++; 1105 if (buf_ptr >= buf_end) 1106 fill_buffer(); 1107 switch (*e_lab++) { 1108 case BACKSLASH: 1109 if (!in_comment) { 1110 *e_lab++ = *buf_ptr++; 1111 if (buf_ptr >= buf_end) 1112 fill_buffer(); 1113 } 1114 break; 1115 case '/': 1116 if (*buf_ptr == '*' && !in_comment && !quote) { 1117 in_comment = 1; 1118 *e_lab++ = *buf_ptr++; 1119 com_start = e_lab - s_lab - 2; 1120 } 1121 break; 1122 case '"': 1123 if (quote == '"') 1124 quote = 0; 1125 break; 1126 case '\'': 1127 if (quote == '\'') 1128 quote = 0; 1129 break; 1130 case '*': 1131 if (*buf_ptr == '/' && in_comment) { 1132 in_comment = 0; 1133 *e_lab++ = *buf_ptr++; 1134 com_end = e_lab - s_lab; 1135 } 1136 break; 1137 } 1138 } 1139 1140 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1141 e_lab--; 1142 if (e_lab - s_lab == com_end && bp_save == NULL) { 1143 /* comment on preprocessor line */ 1144 if (sc_end == NULL) { /* if this is the first comment, 1145 * we must set up the buffer */ 1146 save_com = sc_buf; 1147 sc_end = &save_com[0]; 1148 } 1149 else { 1150 *sc_end++ = '\n'; /* add newline between 1151 * comments */ 1152 *sc_end++ = ' '; 1153 --line_no; 1154 } 1155 if (sc_end - save_com + com_end - com_start > sc_size) 1156 errx(1, "input too long"); 1157 memmove(sc_end, s_lab + com_start, com_end - com_start); 1158 sc_end += com_end - com_start; 1159 e_lab = s_lab + com_start; 1160 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1161 e_lab--; 1162 bp_save = buf_ptr; /* save current input buffer */ 1163 be_save = buf_end; 1164 buf_ptr = save_com; /* fix so that subsequent calls to 1165 * lexi will take tokens out of 1166 * save_com */ 1167 *sc_end++ = ' '; /* add trailing blank, just in case */ 1168 buf_end = sc_end; 1169 sc_end = NULL; 1170 } 1171 CHECK_SIZE_LAB(1); 1172 *e_lab = '\0'; /* null terminate line */ 1173 ps.pcase = false; 1174 } 1175 1176 if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */ 1177 if ((size_t)ifdef_level < nitems(state_stack)) { 1178 match_state[ifdef_level].tos = -1; 1179 state_stack[ifdef_level++] = ps; 1180 } 1181 else 1182 diag2(1, "#if stack overflow"); 1183 } 1184 else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */ 1185 if (ifdef_level <= 0) 1186 diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else"); 1187 else { 1188 match_state[ifdef_level - 1] = ps; 1189 ps = state_stack[ifdef_level - 1]; 1190 } 1191 } 1192 else if (strncmp(s_lab, "#endif", 6) == 0) { 1193 if (ifdef_level <= 0) 1194 diag2(1, "Unmatched #endif"); 1195 else 1196 ifdef_level--; 1197 } else { 1198 struct directives { 1199 int size; 1200 const char *string; 1201 } 1202 recognized[] = { 1203 {7, "include"}, 1204 {6, "define"}, 1205 {5, "undef"}, 1206 {4, "line"}, 1207 {5, "error"}, 1208 {6, "pragma"} 1209 }; 1210 int d = nitems(recognized); 1211 while (--d >= 0) 1212 if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0) 1213 break; 1214 if (d < 0) { 1215 diag2(1, "Unrecognized cpp directive"); 1216 break; 1217 } 1218 } 1219 if (opt.blanklines_around_conditional_compilation) { 1220 postfix_blankline_requested++; 1221 n_real_blanklines = 0; 1222 } 1223 else { 1224 postfix_blankline_requested = 0; 1225 prefix_blankline_requested = 0; 1226 } 1227 break; /* subsequent processing of the newline 1228 * character will cause the line to be printed */ 1229 1230 case comment: /* we have gotten a / followed by * this is a biggie */ 1231 pr_comment(); 1232 break; 1233 } /* end of big switch stmt */ 1234 1235 *e_code = '\0'; /* make sure code section is null terminated */ 1236 if (type_code != comment && type_code != newline && type_code != preesc) 1237 ps.last_token = type_code; 1238 } /* end of main while (1) loop */ 1239 } 1240 1241 /* 1242 * copy input file to backup file if in_name is /blah/blah/blah/file, then 1243 * backup file will be ".Bfile" then make the backup file the input and 1244 * original input file the output 1245 */ 1246 static void 1247 bakcopy(void) 1248 { 1249 int n, 1250 bakchn; 1251 char buff[8 * 1024]; 1252 const char *p; 1253 1254 /* construct file name .Bfile */ 1255 for (p = in_name; *p; p++); /* skip to end of string */ 1256 while (p > in_name && *p != '/') /* find last '/' */ 1257 p--; 1258 if (*p == '/') 1259 p++; 1260 sprintf(bakfile, "%s%s", p, simple_backup_suffix); 1261 1262 /* copy in_name to backup file */ 1263 bakchn = creat(bakfile, 0600); 1264 if (bakchn < 0) 1265 err(1, "%s", bakfile); 1266 while ((n = read(fileno(input), buff, sizeof(buff))) > 0) 1267 if (write(bakchn, buff, n) != n) 1268 err(1, "%s", bakfile); 1269 if (n < 0) 1270 err(1, "%s", in_name); 1271 close(bakchn); 1272 fclose(input); 1273 1274 /* re-open backup file as the input file */ 1275 input = fopen(bakfile, "r"); 1276 if (input == NULL) 1277 err(1, "%s", bakfile); 1278 /* now the original input file will be the output */ 1279 output = fopen(in_name, "w"); 1280 if (output == NULL) { 1281 unlink(bakfile); 1282 err(1, "%s", in_name); 1283 } 1284 } 1285 1286 static void 1287 indent_declaration(int cur_dec_ind, int tabs_to_var) 1288 { 1289 int pos = e_code - s_code; 1290 char *startpos = e_code; 1291 1292 /* 1293 * get the tab math right for indentations that are not multiples of tabsize 1294 */ 1295 if ((ps.ind_level * opt.ind_size) % opt.tabsize != 0) { 1296 pos += (ps.ind_level * opt.ind_size) % opt.tabsize; 1297 cur_dec_ind += (ps.ind_level * opt.ind_size) % opt.tabsize; 1298 } 1299 if (tabs_to_var) { 1300 int tpos; 1301 1302 CHECK_SIZE_CODE(cur_dec_ind / opt.tabsize); 1303 while ((tpos = opt.tabsize * (1 + pos / opt.tabsize)) <= cur_dec_ind) { 1304 *e_code++ = '\t'; 1305 pos = tpos; 1306 } 1307 } 1308 CHECK_SIZE_CODE(cur_dec_ind - pos + 1); 1309 while (pos < cur_dec_ind) { 1310 *e_code++ = ' '; 1311 pos++; 1312 } 1313 if (e_code == startpos && ps.want_blank) { 1314 *e_code++ = ' '; 1315 ps.want_blank = false; 1316 } 1317 } 1318