1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1985 Sun Microsystems, Inc. 5 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 6 * Copyright (c) 1980, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #if 0 39 #ifndef lint 40 static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93"; 41 #endif /* not lint */ 42 #endif 43 44 #include <sys/cdefs.h> 45 __FBSDID("$FreeBSD$"); 46 47 #include <sys/param.h> 48 #include <sys/capsicum.h> 49 #include <err.h> 50 #include <errno.h> 51 #include <fcntl.h> 52 #include <unistd.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <string.h> 56 #include <ctype.h> 57 #include "indent_globs.h" 58 #include "indent_codes.h" 59 #include "indent.h" 60 61 static void bakcopy(void); 62 static void indent_declaration(int, int); 63 64 const char *in_name = "Standard Input"; /* will always point to name of input 65 * file */ 66 const char *out_name = "Standard Output"; /* will always point to name 67 * of output file */ 68 const char *simple_backup_suffix = ".BAK"; /* Suffix to use for backup 69 * files */ 70 char bakfile[MAXPATHLEN] = ""; 71 72 int 73 main(int argc, char **argv) 74 { 75 cap_rights_t rights; 76 77 int dec_ind; /* current indentation for declarations */ 78 int di_stack[20]; /* a stack of structure indentation levels */ 79 int force_nl; /* when true, code must be broken */ 80 int hd_type = 0; /* used to store type of stmt for if (...), 81 * for (...), etc */ 82 int i; /* local loop counter */ 83 int scase; /* set to true when we see a case, so we will 84 * know what to do with the following colon */ 85 int sp_sw; /* when true, we are in the expression of 86 * if(...), while(...), etc. */ 87 int squest; /* when this is positive, we have seen a ? 88 * without the matching : in a <c>?<s>:<s> 89 * construct */ 90 const char *t_ptr; /* used for copying tokens */ 91 int tabs_to_var; /* true if using tabs to indent to var name */ 92 int type_code; /* the type of token, returned by lexi */ 93 94 int last_else = 0; /* true iff last keyword was an else */ 95 const char *profile_name = NULL; 96 const char *envval = NULL; 97 struct parser_state transient_state; /* a copy for lookup */ 98 99 /*-----------------------------------------------*\ 100 | INITIALIZATION | 101 \*-----------------------------------------------*/ 102 103 found_err = 0; 104 105 ps.p_stack[0] = stmt; /* this is the parser's stack */ 106 ps.last_nl = true; /* this is true if the last thing scanned was 107 * a newline */ 108 ps.last_token = semicolon; 109 combuf = (char *) malloc(bufsize); 110 if (combuf == NULL) 111 err(1, NULL); 112 labbuf = (char *) malloc(bufsize); 113 if (labbuf == NULL) 114 err(1, NULL); 115 codebuf = (char *) malloc(bufsize); 116 if (codebuf == NULL) 117 err(1, NULL); 118 tokenbuf = (char *) malloc(bufsize); 119 if (tokenbuf == NULL) 120 err(1, NULL); 121 alloc_typenames(); 122 l_com = combuf + bufsize - 5; 123 l_lab = labbuf + bufsize - 5; 124 l_code = codebuf + bufsize - 5; 125 l_token = tokenbuf + bufsize - 5; 126 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and 127 * comment buffers */ 128 combuf[1] = codebuf[1] = labbuf[1] = '\0'; 129 ps.else_if = 1; /* Default else-if special processing to on */ 130 s_lab = e_lab = labbuf + 1; 131 s_code = e_code = codebuf + 1; 132 s_com = e_com = combuf + 1; 133 s_token = e_token = tokenbuf + 1; 134 135 in_buffer = (char *) malloc(10); 136 if (in_buffer == NULL) 137 err(1, NULL); 138 in_buffer_limit = in_buffer + 8; 139 buf_ptr = buf_end = in_buffer; 140 line_no = 1; 141 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false; 142 sp_sw = force_nl = false; 143 ps.in_or_st = false; 144 ps.bl_line = true; 145 dec_ind = 0; 146 di_stack[ps.dec_nest = 0] = 0; 147 ps.want_blank = ps.in_stmt = ps.ind_stmt = false; 148 149 scase = ps.pcase = false; 150 squest = 0; 151 sc_end = NULL; 152 bp_save = NULL; 153 be_save = NULL; 154 155 output = NULL; 156 tabs_to_var = 0; 157 158 envval = getenv("SIMPLE_BACKUP_SUFFIX"); 159 if (envval) 160 simple_backup_suffix = envval; 161 162 /*--------------------------------------------------*\ 163 | COMMAND LINE SCAN | 164 \*--------------------------------------------------*/ 165 166 #ifdef undef 167 max_col = 78; /* -l78 */ 168 lineup_to_parens = 1; /* -lp */ 169 lineup_to_parens_always = 0; /* -nlpl */ 170 ps.ljust_decl = 0; /* -ndj */ 171 ps.com_ind = 33; /* -c33 */ 172 star_comment_cont = 1; /* -sc */ 173 ps.ind_size = 8; /* -i8 */ 174 verbose = 0; 175 ps.decl_indent = 16; /* -di16 */ 176 ps.local_decl_indent = -1; /* if this is not set to some nonnegative value 177 * by an arg, we will set this equal to 178 * ps.decl_ind */ 179 ps.indent_parameters = 1; /* -ip */ 180 ps.decl_com_ind = 0; /* if this is not set to some positive value 181 * by an arg, we will set this equal to 182 * ps.com_ind */ 183 btype_2 = 1; /* -br */ 184 cuddle_else = 1; /* -ce */ 185 ps.unindent_displace = 0; /* -d0 */ 186 ps.case_indent = 0; /* -cli0 */ 187 format_block_comments = 1; /* -fcb */ 188 format_col1_comments = 1; /* -fc1 */ 189 procnames_start_line = 1; /* -psl */ 190 proc_calls_space = 0; /* -npcs */ 191 comment_delimiter_on_blankline = 1; /* -cdb */ 192 ps.leave_comma = 1; /* -nbc */ 193 #endif 194 195 for (i = 1; i < argc; ++i) 196 if (strcmp(argv[i], "-npro") == 0) 197 break; 198 else if (argv[i][0] == '-' && argv[i][1] == 'P' && argv[i][2] != '\0') 199 profile_name = argv[i]; /* non-empty -P (set profile) */ 200 set_defaults(); 201 if (i >= argc) 202 set_profile(profile_name); 203 204 for (i = 1; i < argc; ++i) { 205 206 /* 207 * look thru args (if any) for changes to defaults 208 */ 209 if (argv[i][0] != '-') {/* no flag on parameter */ 210 if (input == NULL) { /* we must have the input file */ 211 in_name = argv[i]; /* remember name of input file */ 212 input = fopen(in_name, "r"); 213 if (input == NULL) /* check for open error */ 214 err(1, "%s", in_name); 215 continue; 216 } 217 else if (output == NULL) { /* we have the output file */ 218 out_name = argv[i]; /* remember name of output file */ 219 if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite 220 * the file */ 221 errx(1, "input and output files must be different"); 222 } 223 output = fopen(out_name, "w"); 224 if (output == NULL) /* check for create error */ 225 err(1, "%s", out_name); 226 continue; 227 } 228 errx(1, "unknown parameter: %s", argv[i]); 229 } 230 else 231 set_option(argv[i]); 232 } /* end of for */ 233 if (input == NULL) 234 input = stdin; 235 if (output == NULL) { 236 if (input == stdin) 237 output = stdout; 238 else { 239 out_name = in_name; 240 bakcopy(); 241 } 242 } 243 244 /* Restrict input/output descriptors and enter Capsicum sandbox. */ 245 cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE); 246 if (cap_rights_limit(fileno(output), &rights) < 0 && errno != ENOSYS) 247 err(EXIT_FAILURE, "unable to limit rights for %s", out_name); 248 cap_rights_init(&rights, CAP_FSTAT, CAP_READ); 249 if (cap_rights_limit(fileno(input), &rights) < 0 && errno != ENOSYS) 250 err(EXIT_FAILURE, "unable to limit rights for %s", in_name); 251 if (cap_enter() < 0 && errno != ENOSYS) 252 err(EXIT_FAILURE, "unable to enter capability mode"); 253 254 if (ps.com_ind <= 1) 255 ps.com_ind = 2; /* dont put normal comments before column 2 */ 256 if (block_comment_max_col <= 0) 257 block_comment_max_col = max_col; 258 if (ps.local_decl_indent < 0) /* if not specified by user, set this */ 259 ps.local_decl_indent = ps.decl_indent; 260 if (ps.decl_com_ind <= 0) /* if not specified by user, set this */ 261 ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind; 262 if (continuation_indent == 0) 263 continuation_indent = ps.ind_size; 264 fill_buffer(); /* get first batch of stuff into input buffer */ 265 266 parse(semicolon); 267 { 268 char *p = buf_ptr; 269 int col = 1; 270 271 while (1) { 272 if (*p == ' ') 273 col++; 274 else if (*p == '\t') 275 col = tabsize * (1 + (col - 1) / tabsize) + 1; 276 else 277 break; 278 p++; 279 } 280 if (col > ps.ind_size) 281 ps.ind_level = ps.i_l_follow = col / ps.ind_size; 282 } 283 284 /* 285 * START OF MAIN LOOP 286 */ 287 288 while (1) { /* this is the main loop. it will go until we 289 * reach eof */ 290 int comment_buffered = false; 291 292 type_code = lexi(&ps); /* lexi reads one token. The actual 293 * characters read are stored in "token". lexi 294 * returns a code indicating the type of token */ 295 296 /* 297 * The following code moves newlines and comments following an if (), 298 * while (), else, etc. up to the start of the following stmt to 299 * a buffer. This allows proper handling of both kinds of brace 300 * placement (-br, -bl) and cuddling "else" (-ce). 301 */ 302 303 while (ps.search_brace) { 304 switch (type_code) { 305 case newline: 306 if (sc_end == NULL) { 307 save_com = sc_buf; 308 save_com[0] = save_com[1] = ' '; 309 sc_end = &save_com[2]; 310 } 311 *sc_end++ = '\n'; 312 /* 313 * We may have inherited a force_nl == true from the previous 314 * token (like a semicolon). But once we know that a newline 315 * has been scanned in this loop, force_nl should be false. 316 * 317 * However, the force_nl == true must be preserved if newline 318 * is never scanned in this loop, so this assignment cannot be 319 * done earlier. 320 */ 321 force_nl = false; 322 case form_feed: 323 break; 324 case comment: 325 if (sc_end == NULL) { 326 /* 327 * Copy everything from the start of the line, because 328 * pr_comment() will use that to calculate original 329 * indentation of a boxed comment. 330 */ 331 memcpy(sc_buf, in_buffer, buf_ptr - in_buffer - 4); 332 save_com = sc_buf + (buf_ptr - in_buffer - 4); 333 save_com[0] = save_com[1] = ' '; 334 sc_end = &save_com[2]; 335 } 336 comment_buffered = true; 337 *sc_end++ = '/'; /* copy in start of comment */ 338 *sc_end++ = '*'; 339 for (;;) { /* loop until we get to the end of the comment */ 340 *sc_end = *buf_ptr++; 341 if (buf_ptr >= buf_end) 342 fill_buffer(); 343 if (*sc_end++ == '*' && *buf_ptr == '/') 344 break; /* we are at end of comment */ 345 if (sc_end >= &save_com[sc_size]) { /* check for temp buffer 346 * overflow */ 347 diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever"); 348 fflush(output); 349 exit(1); 350 } 351 } 352 *sc_end++ = '/'; /* add ending slash */ 353 if (++buf_ptr >= buf_end) /* get past / in buffer */ 354 fill_buffer(); 355 break; 356 case lbrace: 357 /* 358 * Put KNF-style lbraces before the buffered up tokens and 359 * jump out of this loop in order to avoid copying the token 360 * again under the default case of the switch below. 361 */ 362 if (sc_end != NULL && btype_2) { 363 save_com[0] = '{'; 364 /* 365 * Originally the lbrace may have been alone on its own 366 * line, but it will be moved into "the else's line", so 367 * if there was a newline resulting from the "{" before, 368 * it must be scanned now and ignored. 369 */ 370 while (isspace((unsigned char)*buf_ptr)) { 371 if (++buf_ptr >= buf_end) 372 fill_buffer(); 373 if (*buf_ptr == '\n') 374 break; 375 } 376 goto sw_buffer; 377 } 378 /* FALLTHROUGH */ 379 default: /* it is the start of a normal statement */ 380 { 381 int remove_newlines; 382 383 remove_newlines = 384 /* "} else" */ 385 (type_code == sp_nparen && *token == 'e' && 386 e_code != s_code && e_code[-1] == '}') 387 /* "else if" */ 388 || (type_code == sp_paren && *token == 'i' && 389 last_else && ps.else_if); 390 if (remove_newlines) 391 force_nl = false; 392 if (sc_end == NULL) { /* ignore buffering if 393 * comment wasn't saved up */ 394 ps.search_brace = false; 395 goto check_type; 396 } 397 while (sc_end > save_com && isblank((unsigned char)sc_end[-1])) { 398 sc_end--; 399 } 400 if (swallow_optional_blanklines || 401 (!comment_buffered && remove_newlines)) { 402 force_nl = !remove_newlines; 403 while (sc_end > save_com && sc_end[-1] == '\n') { 404 sc_end--; 405 } 406 } 407 if (force_nl) { /* if we should insert a nl here, put 408 * it into the buffer */ 409 force_nl = false; 410 --line_no; /* this will be re-increased when the 411 * newline is read from the buffer */ 412 *sc_end++ = '\n'; 413 *sc_end++ = ' '; 414 if (verbose) /* print error msg if the line was 415 * not already broken */ 416 diag2(0, "Line broken"); 417 } 418 for (t_ptr = token; *t_ptr; ++t_ptr) 419 *sc_end++ = *t_ptr; 420 421 sw_buffer: 422 ps.search_brace = false; /* stop looking for start of 423 * stmt */ 424 bp_save = buf_ptr; /* save current input buffer */ 425 be_save = buf_end; 426 buf_ptr = save_com; /* fix so that subsequent calls to 427 * lexi will take tokens out of 428 * save_com */ 429 *sc_end++ = ' ';/* add trailing blank, just in case */ 430 buf_end = sc_end; 431 sc_end = NULL; 432 break; 433 } 434 } /* end of switch */ 435 /* 436 * We must make this check, just in case there was an unexpected 437 * EOF. 438 */ 439 if (type_code != 0) { 440 /* 441 * The only intended purpose of calling lexi() below is to 442 * categorize the next token in order to decide whether to 443 * continue buffering forthcoming tokens. Once the buffering 444 * is over, lexi() will be called again elsewhere on all of 445 * the tokens - this time for normal processing. 446 * 447 * Calling it for this purpose is a bug, because lexi() also 448 * changes the parser state and discards leading whitespace, 449 * which is needed mostly for comment-related considerations. 450 * 451 * Work around the former problem by giving lexi() a copy of 452 * the current parser state and discard it if the call turned 453 * out to be just a look ahead. 454 * 455 * Work around the latter problem by copying all whitespace 456 * characters into the buffer so that the later lexi() call 457 * will read them. 458 */ 459 if (sc_end != NULL) { 460 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 461 *sc_end++ = *buf_ptr++; 462 if (sc_end >= &save_com[sc_size]) { 463 errx(1, "input too long"); 464 } 465 } 466 if (buf_ptr >= buf_end) { 467 fill_buffer(); 468 } 469 } 470 transient_state = ps; 471 type_code = lexi(&transient_state); /* read another token */ 472 if (type_code != newline && type_code != form_feed && 473 type_code != comment && !transient_state.search_brace) { 474 ps = transient_state; 475 } 476 } 477 } /* end of while (search_brace) */ 478 last_else = 0; 479 check_type: 480 if (type_code == 0) { /* we got eof */ 481 if (s_lab != e_lab || s_code != e_code 482 || s_com != e_com) /* must dump end of line */ 483 dump_line(); 484 if (ps.tos > 1) /* check for balanced braces */ 485 diag2(1, "Stuff missing from end of file"); 486 487 if (verbose) { 488 printf("There were %d output lines and %d comments\n", 489 ps.out_lines, ps.out_coms); 490 printf("(Lines with comments)/(Lines with code): %6.3f\n", 491 (1.0 * ps.com_lines) / code_lines); 492 } 493 fflush(output); 494 exit(found_err); 495 } 496 if ( 497 (type_code != comment) && 498 (type_code != newline) && 499 (type_code != preesc) && 500 (type_code != form_feed)) { 501 if (force_nl && 502 (type_code != semicolon) && 503 (type_code != lbrace || !btype_2)) { 504 /* we should force a broken line here */ 505 if (verbose) 506 diag2(0, "Line broken"); 507 dump_line(); 508 ps.want_blank = false; /* dont insert blank at line start */ 509 force_nl = false; 510 } 511 ps.in_stmt = true; /* turn on flag which causes an extra level of 512 * indentation. this is turned off by a ; or 513 * '}' */ 514 if (s_com != e_com) { /* the turkey has embedded a comment 515 * in a line. fix it */ 516 int len = e_com - s_com; 517 518 CHECK_SIZE_CODE(len + 3); 519 *e_code++ = ' '; 520 memcpy(e_code, s_com, len); 521 e_code += len; 522 *e_code++ = ' '; 523 *e_code = '\0'; /* null terminate code sect */ 524 ps.want_blank = false; 525 e_com = s_com; 526 } 527 } 528 else if (type_code != comment) /* preserve force_nl thru a comment */ 529 force_nl = false; /* cancel forced newline after newline, form 530 * feed, etc */ 531 532 533 534 /*-----------------------------------------------------*\ 535 | do switch on type of token scanned | 536 \*-----------------------------------------------------*/ 537 CHECK_SIZE_CODE(3); /* maximum number of increments of e_code 538 * before the next CHECK_SIZE_CODE or 539 * dump_line() is 2. After that there's the 540 * final increment for the null character. */ 541 switch (type_code) { /* now, decide what to do with the token */ 542 543 case form_feed: /* found a form feed in line */ 544 ps.use_ff = true; /* a form feed is treated much like a newline */ 545 dump_line(); 546 ps.want_blank = false; 547 break; 548 549 case newline: 550 if (ps.last_token != comma || ps.p_l_follow > 0 551 || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) { 552 dump_line(); 553 ps.want_blank = false; 554 } 555 ++line_no; /* keep track of input line number */ 556 break; 557 558 case lparen: /* got a '(' or '[' */ 559 /* count parens to make Healy happy */ 560 if (++ps.p_l_follow == nitems(ps.paren_indents)) { 561 diag3(0, "Reached internal limit of %d unclosed parens", 562 nitems(ps.paren_indents)); 563 ps.p_l_follow--; 564 } 565 if (*token == '[') 566 /* not a function pointer declaration or a function call */; 567 else if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent && 568 ps.procname[0] == '\0' && ps.paren_level == 0) { 569 /* function pointer declarations */ 570 indent_declaration(dec_ind, tabs_to_var); 571 ps.dumped_decl_indent = true; 572 } 573 else if (ps.want_blank && 574 ((ps.last_token != ident && ps.last_token != funcname) || 575 proc_calls_space || 576 /* offsetof (1) is never allowed a space; sizeof (2) gets 577 * one iff -bs; all other keywords (>2) always get a space 578 * before lparen */ 579 ps.keyword + Bill_Shannon > 2)) 580 *e_code++ = ' '; 581 ps.want_blank = false; 582 *e_code++ = token[0]; 583 ps.paren_indents[ps.p_l_follow - 1] = count_spaces_until(1, s_code, e_code) - 1; 584 if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent 585 && ps.paren_indents[0] < 2 * ps.ind_size) 586 ps.paren_indents[0] = 2 * ps.ind_size; 587 if (ps.in_or_st && *token == '(' && ps.tos <= 2) { 588 /* 589 * this is a kluge to make sure that declarations will be 590 * aligned right if proc decl has an explicit type on it, i.e. 591 * "int a(x) {..." 592 */ 593 parse(semicolon); /* I said this was a kluge... */ 594 ps.in_or_st = false; /* turn off flag for structure decl or 595 * initialization */ 596 } 597 /* parenthesized type following sizeof or offsetof is not a cast */ 598 if (ps.keyword == 1 || ps.keyword == 2) 599 ps.not_cast_mask |= 1 << ps.p_l_follow; 600 break; 601 602 case rparen: /* got a ')' or ']' */ 603 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) { 604 ps.last_u_d = true; 605 ps.cast_mask &= (1 << ps.p_l_follow) - 1; 606 ps.want_blank = space_after_cast; 607 } else 608 ps.want_blank = true; 609 ps.not_cast_mask &= (1 << ps.p_l_follow) - 1; 610 if (--ps.p_l_follow < 0) { 611 ps.p_l_follow = 0; 612 diag3(0, "Extra %c", *token); 613 } 614 if (e_code == s_code) /* if the paren starts the line */ 615 ps.paren_level = ps.p_l_follow; /* then indent it */ 616 617 *e_code++ = token[0]; 618 619 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if 620 * (...), or some such */ 621 sp_sw = false; 622 force_nl = true;/* must force newline after if */ 623 ps.last_u_d = true; /* inform lexi that a following 624 * operator is unary */ 625 ps.in_stmt = false; /* dont use stmt continuation 626 * indentation */ 627 628 parse(hd_type); /* let parser worry about if, or whatever */ 629 } 630 ps.search_brace = btype_2; /* this should insure that constructs 631 * such as main(){...} and int[]{...} 632 * have their braces put in the right 633 * place */ 634 break; 635 636 case unary_op: /* this could be any unary operation */ 637 if (!ps.dumped_decl_indent && ps.in_decl && !ps.block_init && 638 ps.procname[0] == '\0' && ps.paren_level == 0) { 639 /* pointer declarations */ 640 641 /* 642 * if this is a unary op in a declaration, we should indent 643 * this token 644 */ 645 for (i = 0; token[i]; ++i) 646 /* find length of token */; 647 indent_declaration(dec_ind - i, tabs_to_var); 648 ps.dumped_decl_indent = true; 649 } 650 else if (ps.want_blank) 651 *e_code++ = ' '; 652 653 { 654 int len = e_token - s_token; 655 656 CHECK_SIZE_CODE(len); 657 memcpy(e_code, token, len); 658 e_code += len; 659 } 660 ps.want_blank = false; 661 break; 662 663 case binary_op: /* any binary operation */ 664 { 665 int len = e_token - s_token; 666 667 CHECK_SIZE_CODE(len + 1); 668 if (ps.want_blank) 669 *e_code++ = ' '; 670 memcpy(e_code, token, len); 671 e_code += len; 672 } 673 ps.want_blank = true; 674 break; 675 676 case postop: /* got a trailing ++ or -- */ 677 *e_code++ = token[0]; 678 *e_code++ = token[1]; 679 ps.want_blank = true; 680 break; 681 682 case question: /* got a ? */ 683 squest++; /* this will be used when a later colon 684 * appears so we can distinguish the 685 * <c>?<n>:<n> construct */ 686 if (ps.want_blank) 687 *e_code++ = ' '; 688 *e_code++ = '?'; 689 ps.want_blank = true; 690 break; 691 692 case casestmt: /* got word 'case' or 'default' */ 693 scase = true; /* so we can process the later colon properly */ 694 goto copy_id; 695 696 case colon: /* got a ':' */ 697 if (squest > 0) { /* it is part of the <c>?<n>: <n> construct */ 698 --squest; 699 if (ps.want_blank) 700 *e_code++ = ' '; 701 *e_code++ = ':'; 702 ps.want_blank = true; 703 break; 704 } 705 if (ps.in_or_st) { 706 *e_code++ = ':'; 707 ps.want_blank = false; 708 break; 709 } 710 ps.in_stmt = false; /* seeing a label does not imply we are in a 711 * stmt */ 712 /* 713 * turn everything so far into a label 714 */ 715 { 716 int len = e_code - s_code; 717 718 CHECK_SIZE_LAB(len + 3); 719 memcpy(e_lab, s_code, len); 720 e_lab += len; 721 *e_lab++ = ':'; 722 *e_lab = '\0'; 723 e_code = s_code; 724 } 725 force_nl = ps.pcase = scase; /* ps.pcase will be used by 726 * dump_line to decide how to 727 * indent the label. force_nl 728 * will force a case n: to be 729 * on a line by itself */ 730 scase = false; 731 ps.want_blank = false; 732 break; 733 734 case semicolon: /* got a ';' */ 735 if (ps.dec_nest == 0) 736 ps.in_or_st = false;/* we are not in an initialization or 737 * structure declaration */ 738 scase = false; /* these will only need resetting in an error */ 739 squest = 0; 740 if (ps.last_token == rparen) 741 ps.in_parameter_declaration = 0; 742 ps.cast_mask = 0; 743 ps.not_cast_mask = 0; 744 ps.block_init = 0; 745 ps.block_init_level = 0; 746 ps.just_saw_decl--; 747 748 if (ps.in_decl && s_code == e_code && !ps.block_init && 749 !ps.dumped_decl_indent && ps.paren_level == 0) { 750 /* indent stray semicolons in declarations */ 751 indent_declaration(dec_ind - 1, tabs_to_var); 752 ps.dumped_decl_indent = true; 753 } 754 755 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level 756 * structure declaration, we 757 * arent any more */ 758 759 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) { 760 761 /* 762 * This should be true iff there were unbalanced parens in the 763 * stmt. It is a bit complicated, because the semicolon might 764 * be in a for stmt 765 */ 766 diag2(1, "Unbalanced parens"); 767 ps.p_l_follow = 0; 768 if (sp_sw) { /* this is a check for an if, while, etc. with 769 * unbalanced parens */ 770 sp_sw = false; 771 parse(hd_type); /* dont lose the if, or whatever */ 772 } 773 } 774 *e_code++ = ';'; 775 ps.want_blank = true; 776 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the 777 * middle of a stmt */ 778 779 if (!sp_sw) { /* if not if for (;;) */ 780 parse(semicolon); /* let parser know about end of stmt */ 781 force_nl = true;/* force newline after an end of stmt */ 782 } 783 break; 784 785 case lbrace: /* got a '{' */ 786 ps.in_stmt = false; /* dont indent the {} */ 787 if (!ps.block_init) 788 force_nl = true;/* force other stuff on same line as '{' onto 789 * new line */ 790 else if (ps.block_init_level <= 0) 791 ps.block_init_level = 1; 792 else 793 ps.block_init_level++; 794 795 if (s_code != e_code && !ps.block_init) { 796 if (!btype_2) { 797 dump_line(); 798 ps.want_blank = false; 799 } 800 else if (ps.in_parameter_declaration && !ps.in_or_st) { 801 ps.i_l_follow = 0; 802 if (function_brace_split) { /* dump the line prior to the 803 * brace ... */ 804 dump_line(); 805 ps.want_blank = false; 806 } else /* add a space between the decl and brace */ 807 ps.want_blank = true; 808 } 809 } 810 if (ps.in_parameter_declaration) 811 prefix_blankline_requested = 0; 812 813 if (ps.p_l_follow > 0) { /* check for preceding unbalanced 814 * parens */ 815 diag2(1, "Unbalanced parens"); 816 ps.p_l_follow = 0; 817 if (sp_sw) { /* check for unclosed if, for, etc. */ 818 sp_sw = false; 819 parse(hd_type); 820 ps.ind_level = ps.i_l_follow; 821 } 822 } 823 if (s_code == e_code) 824 ps.ind_stmt = false; /* dont put extra indentation on line 825 * with '{' */ 826 if (ps.in_decl && ps.in_or_st) { /* this is either a structure 827 * declaration or an init */ 828 di_stack[ps.dec_nest] = dec_ind; 829 if (++ps.dec_nest == nitems(di_stack)) { 830 diag3(0, "Reached internal limit of %d struct levels", 831 nitems(di_stack)); 832 ps.dec_nest--; 833 } 834 /* ? dec_ind = 0; */ 835 } 836 else { 837 ps.decl_on_line = false; /* we can't be in the middle of 838 * a declaration, so don't do 839 * special indentation of 840 * comments */ 841 if (blanklines_after_declarations_at_proctop 842 && ps.in_parameter_declaration) 843 postfix_blankline_requested = 1; 844 ps.in_parameter_declaration = 0; 845 ps.in_decl = false; 846 } 847 dec_ind = 0; 848 parse(lbrace); /* let parser know about this */ 849 if (ps.want_blank) /* put a blank before '{' if '{' is not at 850 * start of line */ 851 *e_code++ = ' '; 852 ps.want_blank = false; 853 *e_code++ = '{'; 854 ps.just_saw_decl = 0; 855 break; 856 857 case rbrace: /* got a '}' */ 858 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be 859 * omitted in 860 * declarations */ 861 parse(semicolon); 862 if (ps.p_l_follow) {/* check for unclosed if, for, else. */ 863 diag2(1, "Unbalanced parens"); 864 ps.p_l_follow = 0; 865 sp_sw = false; 866 } 867 ps.just_saw_decl = 0; 868 ps.block_init_level--; 869 if (s_code != e_code && !ps.block_init) { /* '}' must be first on 870 * line */ 871 if (verbose) 872 diag2(0, "Line broken"); 873 dump_line(); 874 } 875 *e_code++ = '}'; 876 ps.want_blank = true; 877 ps.in_stmt = ps.ind_stmt = false; 878 if (ps.dec_nest > 0) { /* we are in multi-level structure 879 * declaration */ 880 dec_ind = di_stack[--ps.dec_nest]; 881 if (ps.dec_nest == 0 && !ps.in_parameter_declaration) 882 ps.just_saw_decl = 2; 883 ps.in_decl = true; 884 } 885 prefix_blankline_requested = 0; 886 parse(rbrace); /* let parser know about this */ 887 ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead 888 && ps.il[ps.tos] >= ps.ind_level; 889 if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0) 890 postfix_blankline_requested = 1; 891 break; 892 893 case swstmt: /* got keyword "switch" */ 894 sp_sw = true; 895 hd_type = swstmt; /* keep this for when we have seen the 896 * expression */ 897 goto copy_id; /* go move the token into buffer */ 898 899 case sp_paren: /* token is if, while, for */ 900 sp_sw = true; /* the interesting stuff is done after the 901 * expression is scanned */ 902 hd_type = (*token == 'i' ? ifstmt : 903 (*token == 'w' ? whilestmt : forstmt)); 904 905 /* 906 * remember the type of header for later use by parser 907 */ 908 goto copy_id; /* copy the token into line */ 909 910 case sp_nparen: /* got else, do */ 911 ps.in_stmt = false; 912 if (*token == 'e') { 913 if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) { 914 if (verbose) 915 diag2(0, "Line broken"); 916 dump_line();/* make sure this starts a line */ 917 ps.want_blank = false; 918 } 919 force_nl = true;/* also, following stuff must go onto new line */ 920 last_else = 1; 921 parse(elselit); 922 } 923 else { 924 if (e_code != s_code) { /* make sure this starts a line */ 925 if (verbose) 926 diag2(0, "Line broken"); 927 dump_line(); 928 ps.want_blank = false; 929 } 930 force_nl = true;/* also, following stuff must go onto new line */ 931 last_else = 0; 932 parse(dolit); 933 } 934 goto copy_id; /* move the token into line */ 935 936 case type_def: 937 case storage: 938 prefix_blankline_requested = 0; 939 goto copy_id; 940 941 case structure: 942 if (ps.p_l_follow > 0) 943 goto copy_id; 944 case decl: /* we have a declaration type (int, etc.) */ 945 parse(decl); /* let parser worry about indentation */ 946 if (ps.last_token == rparen && ps.tos <= 1) { 947 if (s_code != e_code) { 948 dump_line(); 949 ps.want_blank = 0; 950 } 951 } 952 if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) { 953 ps.ind_level = ps.i_l_follow = 1; 954 ps.ind_stmt = 0; 955 } 956 ps.in_or_st = true; /* this might be a structure or initialization 957 * declaration */ 958 ps.in_decl = ps.decl_on_line = ps.last_token != type_def; 959 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) 960 ps.just_saw_decl = 2; 961 prefix_blankline_requested = 0; 962 for (i = 0; token[i++];); /* get length of token */ 963 964 if (ps.ind_level == 0 || ps.dec_nest > 0) { 965 /* global variable or struct member in local variable */ 966 dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i; 967 tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0); 968 } else { 969 /* local variable */ 970 dec_ind = ps.local_decl_indent > 0 ? ps.local_decl_indent : i; 971 tabs_to_var = (use_tabs ? ps.local_decl_indent > 0 : 0); 972 } 973 goto copy_id; 974 975 case funcname: 976 case ident: /* got an identifier or constant */ 977 if (ps.in_decl) { 978 if (type_code == funcname) { 979 ps.in_decl = false; 980 if (procnames_start_line && s_code != e_code) { 981 *e_code = '\0'; 982 dump_line(); 983 } 984 else if (ps.want_blank) { 985 *e_code++ = ' '; 986 } 987 ps.want_blank = false; 988 } 989 else if (!ps.block_init && !ps.dumped_decl_indent && 990 ps.paren_level == 0) { /* if we are in a declaration, we 991 * must indent identifier */ 992 indent_declaration(dec_ind, tabs_to_var); 993 ps.dumped_decl_indent = true; 994 ps.want_blank = false; 995 } 996 } 997 else if (sp_sw && ps.p_l_follow == 0) { 998 sp_sw = false; 999 force_nl = true; 1000 ps.last_u_d = true; 1001 ps.in_stmt = false; 1002 parse(hd_type); 1003 } 1004 copy_id: 1005 { 1006 int len = e_token - s_token; 1007 1008 CHECK_SIZE_CODE(len + 1); 1009 if (ps.want_blank) 1010 *e_code++ = ' '; 1011 memcpy(e_code, s_token, len); 1012 e_code += len; 1013 } 1014 if (type_code != funcname) 1015 ps.want_blank = true; 1016 break; 1017 1018 case strpfx: 1019 { 1020 int len = e_token - s_token; 1021 1022 CHECK_SIZE_CODE(len + 1); 1023 if (ps.want_blank) 1024 *e_code++ = ' '; 1025 memcpy(e_code, token, len); 1026 e_code += len; 1027 } 1028 ps.want_blank = false; 1029 break; 1030 1031 case period: /* treat a period kind of like a binary 1032 * operation */ 1033 *e_code++ = '.'; /* move the period into line */ 1034 ps.want_blank = false; /* dont put a blank after a period */ 1035 break; 1036 1037 case comma: 1038 ps.want_blank = (s_code != e_code); /* only put blank after comma 1039 * if comma does not start the 1040 * line */ 1041 if (ps.in_decl && ps.procname[0] == '\0' && !ps.block_init && 1042 !ps.dumped_decl_indent && ps.paren_level == 0) { 1043 /* indent leading commas and not the actual identifiers */ 1044 indent_declaration(dec_ind - 1, tabs_to_var); 1045 ps.dumped_decl_indent = true; 1046 } 1047 *e_code++ = ','; 1048 if (ps.p_l_follow == 0) { 1049 if (ps.block_init_level <= 0) 1050 ps.block_init = 0; 1051 if (break_comma && (!ps.leave_comma || 1052 count_spaces_until(compute_code_target(), s_code, e_code) > 1053 max_col - tabsize)) 1054 force_nl = true; 1055 } 1056 break; 1057 1058 case preesc: /* got the character '#' */ 1059 if ((s_com != e_com) || 1060 (s_lab != e_lab) || 1061 (s_code != e_code)) 1062 dump_line(); 1063 CHECK_SIZE_LAB(1); 1064 *e_lab++ = '#'; /* move whole line to 'label' buffer */ 1065 { 1066 int in_comment = 0; 1067 int com_start = 0; 1068 char quote = 0; 1069 int com_end = 0; 1070 1071 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 1072 buf_ptr++; 1073 if (buf_ptr >= buf_end) 1074 fill_buffer(); 1075 } 1076 while (*buf_ptr != '\n' || (in_comment && !had_eof)) { 1077 CHECK_SIZE_LAB(2); 1078 *e_lab = *buf_ptr++; 1079 if (buf_ptr >= buf_end) 1080 fill_buffer(); 1081 switch (*e_lab++) { 1082 case BACKSLASH: 1083 if (!in_comment) { 1084 *e_lab++ = *buf_ptr++; 1085 if (buf_ptr >= buf_end) 1086 fill_buffer(); 1087 } 1088 break; 1089 case '/': 1090 if (*buf_ptr == '*' && !in_comment && !quote) { 1091 in_comment = 1; 1092 *e_lab++ = *buf_ptr++; 1093 com_start = e_lab - s_lab - 2; 1094 } 1095 break; 1096 case '"': 1097 if (quote == '"') 1098 quote = 0; 1099 break; 1100 case '\'': 1101 if (quote == '\'') 1102 quote = 0; 1103 break; 1104 case '*': 1105 if (*buf_ptr == '/' && in_comment) { 1106 in_comment = 0; 1107 *e_lab++ = *buf_ptr++; 1108 com_end = e_lab - s_lab; 1109 } 1110 break; 1111 } 1112 } 1113 1114 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1115 e_lab--; 1116 if (e_lab - s_lab == com_end && bp_save == NULL) { 1117 /* comment on preprocessor line */ 1118 if (sc_end == NULL) { /* if this is the first comment, 1119 * we must set up the buffer */ 1120 save_com = sc_buf; 1121 sc_end = &save_com[0]; 1122 } 1123 else { 1124 *sc_end++ = '\n'; /* add newline between 1125 * comments */ 1126 *sc_end++ = ' '; 1127 --line_no; 1128 } 1129 if (sc_end - save_com + com_end - com_start > sc_size) 1130 errx(1, "input too long"); 1131 memmove(sc_end, s_lab + com_start, com_end - com_start); 1132 sc_end += com_end - com_start; 1133 e_lab = s_lab + com_start; 1134 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1135 e_lab--; 1136 bp_save = buf_ptr; /* save current input buffer */ 1137 be_save = buf_end; 1138 buf_ptr = save_com; /* fix so that subsequent calls to 1139 * lexi will take tokens out of 1140 * save_com */ 1141 *sc_end++ = ' '; /* add trailing blank, just in case */ 1142 buf_end = sc_end; 1143 sc_end = NULL; 1144 } 1145 CHECK_SIZE_LAB(1); 1146 *e_lab = '\0'; /* null terminate line */ 1147 ps.pcase = false; 1148 } 1149 1150 if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */ 1151 if ((size_t)ifdef_level < nitems(state_stack)) { 1152 match_state[ifdef_level].tos = -1; 1153 state_stack[ifdef_level++] = ps; 1154 } 1155 else 1156 diag2(1, "#if stack overflow"); 1157 } 1158 else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */ 1159 if (ifdef_level <= 0) 1160 diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else"); 1161 else { 1162 match_state[ifdef_level - 1] = ps; 1163 ps = state_stack[ifdef_level - 1]; 1164 } 1165 } 1166 else if (strncmp(s_lab, "#endif", 6) == 0) { 1167 if (ifdef_level <= 0) 1168 diag2(1, "Unmatched #endif"); 1169 else 1170 ifdef_level--; 1171 } else { 1172 struct directives { 1173 int size; 1174 const char *string; 1175 } 1176 recognized[] = { 1177 {7, "include"}, 1178 {6, "define"}, 1179 {5, "undef"}, 1180 {4, "line"}, 1181 {5, "error"}, 1182 {6, "pragma"} 1183 }; 1184 int d = nitems(recognized); 1185 while (--d >= 0) 1186 if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0) 1187 break; 1188 if (d < 0) { 1189 diag2(1, "Unrecognized cpp directive"); 1190 break; 1191 } 1192 } 1193 if (blanklines_around_conditional_compilation) { 1194 postfix_blankline_requested++; 1195 n_real_blanklines = 0; 1196 } 1197 else { 1198 postfix_blankline_requested = 0; 1199 prefix_blankline_requested = 0; 1200 } 1201 break; /* subsequent processing of the newline 1202 * character will cause the line to be printed */ 1203 1204 case comment: /* we have gotten a / followed by * this is a biggie */ 1205 pr_comment(); 1206 break; 1207 } /* end of big switch stmt */ 1208 1209 *e_code = '\0'; /* make sure code section is null terminated */ 1210 if (type_code != comment && type_code != newline && type_code != preesc) 1211 ps.last_token = type_code; 1212 } /* end of main while (1) loop */ 1213 } 1214 1215 /* 1216 * copy input file to backup file if in_name is /blah/blah/blah/file, then 1217 * backup file will be ".Bfile" then make the backup file the input and 1218 * original input file the output 1219 */ 1220 static void 1221 bakcopy(void) 1222 { 1223 int n, 1224 bakchn; 1225 char buff[8 * 1024]; 1226 const char *p; 1227 1228 /* construct file name .Bfile */ 1229 for (p = in_name; *p; p++); /* skip to end of string */ 1230 while (p > in_name && *p != '/') /* find last '/' */ 1231 p--; 1232 if (*p == '/') 1233 p++; 1234 sprintf(bakfile, "%s%s", p, simple_backup_suffix); 1235 1236 /* copy in_name to backup file */ 1237 bakchn = creat(bakfile, 0600); 1238 if (bakchn < 0) 1239 err(1, "%s", bakfile); 1240 while ((n = read(fileno(input), buff, sizeof(buff))) > 0) 1241 if (write(bakchn, buff, n) != n) 1242 err(1, "%s", bakfile); 1243 if (n < 0) 1244 err(1, "%s", in_name); 1245 close(bakchn); 1246 fclose(input); 1247 1248 /* re-open backup file as the input file */ 1249 input = fopen(bakfile, "r"); 1250 if (input == NULL) 1251 err(1, "%s", bakfile); 1252 /* now the original input file will be the output */ 1253 output = fopen(in_name, "w"); 1254 if (output == NULL) { 1255 unlink(bakfile); 1256 err(1, "%s", in_name); 1257 } 1258 } 1259 1260 static void 1261 indent_declaration(int cur_dec_ind, int tabs_to_var) 1262 { 1263 int pos = e_code - s_code; 1264 char *startpos = e_code; 1265 1266 /* 1267 * get the tab math right for indentations that are not multiples of tabsize 1268 */ 1269 if ((ps.ind_level * ps.ind_size) % tabsize != 0) { 1270 pos += (ps.ind_level * ps.ind_size) % tabsize; 1271 cur_dec_ind += (ps.ind_level * ps.ind_size) % tabsize; 1272 } 1273 if (tabs_to_var) { 1274 int tpos; 1275 1276 CHECK_SIZE_CODE(cur_dec_ind / tabsize); 1277 while ((tpos = tabsize * (1 + pos / tabsize)) <= cur_dec_ind) { 1278 *e_code++ = '\t'; 1279 pos = tpos; 1280 } 1281 } 1282 CHECK_SIZE_CODE(cur_dec_ind - pos + 1); 1283 while (pos < cur_dec_ind) { 1284 *e_code++ = ' '; 1285 pos++; 1286 } 1287 if (e_code == startpos && ps.want_blank) { 1288 *e_code++ = ' '; 1289 ps.want_blank = false; 1290 } 1291 } 1292