1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1985 Sun Microsystems, Inc. 5 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 6 * Copyright (c) 1980, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #ifndef lint 39 static const char copyright[] = 40 "@(#) Copyright (c) 1985 Sun Microsystems, Inc.\n\ 41 @(#) Copyright (c) 1976 Board of Trustees of the University of Illinois.\n\ 42 @(#) Copyright (c) 1980, 1993\n\ 43 The Regents of the University of California. All rights reserved.\n"; 44 #endif /* not lint */ 45 46 #if 0 47 #ifndef lint 48 static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93"; 49 #endif /* not lint */ 50 #endif 51 52 #include <sys/cdefs.h> 53 __FBSDID("$FreeBSD$"); 54 55 #include <sys/param.h> 56 #include <sys/capsicum.h> 57 #include <err.h> 58 #include <errno.h> 59 #include <fcntl.h> 60 #include <unistd.h> 61 #include <stdio.h> 62 #include <stdlib.h> 63 #include <string.h> 64 #include <ctype.h> 65 #include "indent_globs.h" 66 #include "indent_codes.h" 67 #include "indent.h" 68 69 static void bakcopy(void); 70 static void indent_declaration(int, int); 71 72 const char *in_name = "Standard Input"; /* will always point to name of input 73 * file */ 74 const char *out_name = "Standard Output"; /* will always point to name 75 * of output file */ 76 const char *simple_backup_suffix = ".BAK"; /* Suffix to use for backup 77 * files */ 78 char bakfile[MAXPATHLEN] = ""; 79 80 int 81 main(int argc, char **argv) 82 { 83 cap_rights_t rights; 84 85 int dec_ind; /* current indentation for declarations */ 86 int di_stack[20]; /* a stack of structure indentation levels */ 87 int force_nl; /* when true, code must be broken */ 88 int hd_type = 0; /* used to store type of stmt for if (...), 89 * for (...), etc */ 90 int i; /* local loop counter */ 91 int scase; /* set to true when we see a case, so we will 92 * know what to do with the following colon */ 93 int sp_sw; /* when true, we are in the expression of 94 * if(...), while(...), etc. */ 95 int squest; /* when this is positive, we have seen a ? 96 * without the matching : in a <c>?<s>:<s> 97 * construct */ 98 const char *t_ptr; /* used for copying tokens */ 99 int tabs_to_var; /* true if using tabs to indent to var name */ 100 int type_code; /* the type of token, returned by lexi */ 101 102 int last_else = 0; /* true iff last keyword was an else */ 103 const char *profile_name = NULL; 104 const char *envval = NULL; 105 struct parser_state transient_state; /* a copy for lookup */ 106 107 /*-----------------------------------------------*\ 108 | INITIALIZATION | 109 \*-----------------------------------------------*/ 110 111 found_err = 0; 112 113 ps.p_stack[0] = stmt; /* this is the parser's stack */ 114 ps.last_nl = true; /* this is true if the last thing scanned was 115 * a newline */ 116 ps.last_token = semicolon; 117 combuf = (char *) malloc(bufsize); 118 if (combuf == NULL) 119 err(1, NULL); 120 labbuf = (char *) malloc(bufsize); 121 if (labbuf == NULL) 122 err(1, NULL); 123 codebuf = (char *) malloc(bufsize); 124 if (codebuf == NULL) 125 err(1, NULL); 126 tokenbuf = (char *) malloc(bufsize); 127 if (tokenbuf == NULL) 128 err(1, NULL); 129 alloc_typenames(); 130 l_com = combuf + bufsize - 5; 131 l_lab = labbuf + bufsize - 5; 132 l_code = codebuf + bufsize - 5; 133 l_token = tokenbuf + bufsize - 5; 134 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and 135 * comment buffers */ 136 combuf[1] = codebuf[1] = labbuf[1] = '\0'; 137 ps.else_if = 1; /* Default else-if special processing to on */ 138 s_lab = e_lab = labbuf + 1; 139 s_code = e_code = codebuf + 1; 140 s_com = e_com = combuf + 1; 141 s_token = e_token = tokenbuf + 1; 142 143 in_buffer = (char *) malloc(10); 144 if (in_buffer == NULL) 145 err(1, NULL); 146 in_buffer_limit = in_buffer + 8; 147 buf_ptr = buf_end = in_buffer; 148 line_no = 1; 149 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false; 150 sp_sw = force_nl = false; 151 ps.in_or_st = false; 152 ps.bl_line = true; 153 dec_ind = 0; 154 di_stack[ps.dec_nest = 0] = 0; 155 ps.want_blank = ps.in_stmt = ps.ind_stmt = false; 156 157 scase = ps.pcase = false; 158 squest = 0; 159 sc_end = NULL; 160 bp_save = NULL; 161 be_save = NULL; 162 163 output = NULL; 164 tabs_to_var = 0; 165 166 envval = getenv("SIMPLE_BACKUP_SUFFIX"); 167 if (envval) 168 simple_backup_suffix = envval; 169 170 /*--------------------------------------------------*\ 171 | COMMAND LINE SCAN | 172 \*--------------------------------------------------*/ 173 174 #ifdef undef 175 max_col = 78; /* -l78 */ 176 lineup_to_parens = 1; /* -lp */ 177 lineup_to_parens_always = 0; /* -nlpl */ 178 ps.ljust_decl = 0; /* -ndj */ 179 ps.com_ind = 33; /* -c33 */ 180 star_comment_cont = 1; /* -sc */ 181 ps.ind_size = 8; /* -i8 */ 182 verbose = 0; 183 ps.decl_indent = 16; /* -di16 */ 184 ps.local_decl_indent = -1; /* if this is not set to some nonnegative value 185 * by an arg, we will set this equal to 186 * ps.decl_ind */ 187 ps.indent_parameters = 1; /* -ip */ 188 ps.decl_com_ind = 0; /* if this is not set to some positive value 189 * by an arg, we will set this equal to 190 * ps.com_ind */ 191 btype_2 = 1; /* -br */ 192 cuddle_else = 1; /* -ce */ 193 ps.unindent_displace = 0; /* -d0 */ 194 ps.case_indent = 0; /* -cli0 */ 195 format_block_comments = 1; /* -fcb */ 196 format_col1_comments = 1; /* -fc1 */ 197 procnames_start_line = 1; /* -psl */ 198 proc_calls_space = 0; /* -npcs */ 199 comment_delimiter_on_blankline = 1; /* -cdb */ 200 ps.leave_comma = 1; /* -nbc */ 201 #endif 202 203 for (i = 1; i < argc; ++i) 204 if (strcmp(argv[i], "-npro") == 0) 205 break; 206 else if (argv[i][0] == '-' && argv[i][1] == 'P' && argv[i][2] != '\0') 207 profile_name = argv[i]; /* non-empty -P (set profile) */ 208 set_defaults(); 209 if (i >= argc) 210 set_profile(profile_name); 211 212 for (i = 1; i < argc; ++i) { 213 214 /* 215 * look thru args (if any) for changes to defaults 216 */ 217 if (argv[i][0] != '-') {/* no flag on parameter */ 218 if (input == NULL) { /* we must have the input file */ 219 in_name = argv[i]; /* remember name of input file */ 220 input = fopen(in_name, "r"); 221 if (input == NULL) /* check for open error */ 222 err(1, "%s", in_name); 223 continue; 224 } 225 else if (output == NULL) { /* we have the output file */ 226 out_name = argv[i]; /* remember name of output file */ 227 if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite 228 * the file */ 229 errx(1, "input and output files must be different"); 230 } 231 output = fopen(out_name, "w"); 232 if (output == NULL) /* check for create error */ 233 err(1, "%s", out_name); 234 continue; 235 } 236 errx(1, "unknown parameter: %s", argv[i]); 237 } 238 else 239 set_option(argv[i]); 240 } /* end of for */ 241 if (input == NULL) 242 input = stdin; 243 if (output == NULL) { 244 if (input == stdin) 245 output = stdout; 246 else { 247 out_name = in_name; 248 bakcopy(); 249 } 250 } 251 252 /* Restrict input/output descriptors and enter Capsicum sandbox. */ 253 cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE); 254 if (cap_rights_limit(fileno(output), &rights) < 0 && errno != ENOSYS) 255 err(EXIT_FAILURE, "unable to limit rights for %s", out_name); 256 cap_rights_init(&rights, CAP_FSTAT, CAP_READ); 257 if (cap_rights_limit(fileno(input), &rights) < 0 && errno != ENOSYS) 258 err(EXIT_FAILURE, "unable to limit rights for %s", in_name); 259 if (cap_enter() < 0 && errno != ENOSYS) 260 err(EXIT_FAILURE, "unable to enter capability mode"); 261 262 if (ps.com_ind <= 1) 263 ps.com_ind = 2; /* dont put normal comments before column 2 */ 264 if (block_comment_max_col <= 0) 265 block_comment_max_col = max_col; 266 if (ps.local_decl_indent < 0) /* if not specified by user, set this */ 267 ps.local_decl_indent = ps.decl_indent; 268 if (ps.decl_com_ind <= 0) /* if not specified by user, set this */ 269 ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind; 270 if (continuation_indent == 0) 271 continuation_indent = ps.ind_size; 272 fill_buffer(); /* get first batch of stuff into input buffer */ 273 274 parse(semicolon); 275 { 276 char *p = buf_ptr; 277 int col = 1; 278 279 while (1) { 280 if (*p == ' ') 281 col++; 282 else if (*p == '\t') 283 col = tabsize * (1 + (col - 1) / tabsize) + 1; 284 else 285 break; 286 p++; 287 } 288 if (col > ps.ind_size) 289 ps.ind_level = ps.i_l_follow = col / ps.ind_size; 290 } 291 292 /* 293 * START OF MAIN LOOP 294 */ 295 296 while (1) { /* this is the main loop. it will go until we 297 * reach eof */ 298 int comment_buffered = false; 299 300 type_code = lexi(&ps); /* lexi reads one token. The actual 301 * characters read are stored in "token". lexi 302 * returns a code indicating the type of token */ 303 304 /* 305 * The following code moves newlines and comments following an if (), 306 * while (), else, etc. up to the start of the following stmt to 307 * a buffer. This allows proper handling of both kinds of brace 308 * placement (-br, -bl) and cuddling "else" (-ce). 309 */ 310 311 while (ps.search_brace) { 312 switch (type_code) { 313 case newline: 314 if (sc_end == NULL) { 315 save_com = sc_buf; 316 save_com[0] = save_com[1] = ' '; 317 sc_end = &save_com[2]; 318 } 319 *sc_end++ = '\n'; 320 /* 321 * We may have inherited a force_nl == true from the previous 322 * token (like a semicolon). But once we know that a newline 323 * has been scanned in this loop, force_nl should be false. 324 * 325 * However, the force_nl == true must be preserved if newline 326 * is never scanned in this loop, so this assignment cannot be 327 * done earlier. 328 */ 329 force_nl = false; 330 case form_feed: 331 break; 332 case comment: 333 if (sc_end == NULL) { 334 /* 335 * Copy everything from the start of the line, because 336 * pr_comment() will use that to calculate original 337 * indentation of a boxed comment. 338 */ 339 memcpy(sc_buf, in_buffer, buf_ptr - in_buffer - 4); 340 save_com = sc_buf + (buf_ptr - in_buffer - 4); 341 save_com[0] = save_com[1] = ' '; 342 sc_end = &save_com[2]; 343 } 344 comment_buffered = true; 345 *sc_end++ = '/'; /* copy in start of comment */ 346 *sc_end++ = '*'; 347 for (;;) { /* loop until we get to the end of the comment */ 348 *sc_end = *buf_ptr++; 349 if (buf_ptr >= buf_end) 350 fill_buffer(); 351 if (*sc_end++ == '*' && *buf_ptr == '/') 352 break; /* we are at end of comment */ 353 if (sc_end >= &save_com[sc_size]) { /* check for temp buffer 354 * overflow */ 355 diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever"); 356 fflush(output); 357 exit(1); 358 } 359 } 360 *sc_end++ = '/'; /* add ending slash */ 361 if (++buf_ptr >= buf_end) /* get past / in buffer */ 362 fill_buffer(); 363 break; 364 case lbrace: 365 /* 366 * Put KNF-style lbraces before the buffered up tokens and 367 * jump out of this loop in order to avoid copying the token 368 * again under the default case of the switch below. 369 */ 370 if (sc_end != NULL && btype_2) { 371 save_com[0] = '{'; 372 /* 373 * Originally the lbrace may have been alone on its own 374 * line, but it will be moved into "the else's line", so 375 * if there was a newline resulting from the "{" before, 376 * it must be scanned now and ignored. 377 */ 378 while (isspace((unsigned char)*buf_ptr)) { 379 if (++buf_ptr >= buf_end) 380 fill_buffer(); 381 if (*buf_ptr == '\n') 382 break; 383 } 384 goto sw_buffer; 385 } 386 /* FALLTHROUGH */ 387 default: /* it is the start of a normal statement */ 388 { 389 int remove_newlines; 390 391 remove_newlines = 392 /* "} else" */ 393 (type_code == sp_nparen && *token == 'e' && 394 e_code != s_code && e_code[-1] == '}') 395 /* "else if" */ 396 || (type_code == sp_paren && *token == 'i' && 397 last_else && ps.else_if); 398 if (remove_newlines) 399 force_nl = false; 400 if (sc_end == NULL) { /* ignore buffering if 401 * comment wasn't saved up */ 402 ps.search_brace = false; 403 goto check_type; 404 } 405 while (sc_end > save_com && isblank((unsigned char)sc_end[-1])) { 406 sc_end--; 407 } 408 if (swallow_optional_blanklines || 409 (!comment_buffered && remove_newlines)) { 410 force_nl = !remove_newlines; 411 while (sc_end > save_com && sc_end[-1] == '\n') { 412 sc_end--; 413 } 414 } 415 if (force_nl) { /* if we should insert a nl here, put 416 * it into the buffer */ 417 force_nl = false; 418 --line_no; /* this will be re-increased when the 419 * newline is read from the buffer */ 420 *sc_end++ = '\n'; 421 *sc_end++ = ' '; 422 if (verbose) /* print error msg if the line was 423 * not already broken */ 424 diag2(0, "Line broken"); 425 } 426 for (t_ptr = token; *t_ptr; ++t_ptr) 427 *sc_end++ = *t_ptr; 428 429 sw_buffer: 430 ps.search_brace = false; /* stop looking for start of 431 * stmt */ 432 bp_save = buf_ptr; /* save current input buffer */ 433 be_save = buf_end; 434 buf_ptr = save_com; /* fix so that subsequent calls to 435 * lexi will take tokens out of 436 * save_com */ 437 *sc_end++ = ' ';/* add trailing blank, just in case */ 438 buf_end = sc_end; 439 sc_end = NULL; 440 break; 441 } 442 } /* end of switch */ 443 /* 444 * We must make this check, just in case there was an unexpected 445 * EOF. 446 */ 447 if (type_code != 0) { 448 /* 449 * The only intended purpose of calling lexi() below is to 450 * categorize the next token in order to decide whether to 451 * continue buffering forthcoming tokens. Once the buffering 452 * is over, lexi() will be called again elsewhere on all of 453 * the tokens - this time for normal processing. 454 * 455 * Calling it for this purpose is a bug, because lexi() also 456 * changes the parser state and discards leading whitespace, 457 * which is needed mostly for comment-related considerations. 458 * 459 * Work around the former problem by giving lexi() a copy of 460 * the current parser state and discard it if the call turned 461 * out to be just a look ahead. 462 * 463 * Work around the latter problem by copying all whitespace 464 * characters into the buffer so that the later lexi() call 465 * will read them. 466 */ 467 if (sc_end != NULL) { 468 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 469 *sc_end++ = *buf_ptr++; 470 if (sc_end >= &save_com[sc_size]) { 471 errx(1, "input too long"); 472 } 473 } 474 if (buf_ptr >= buf_end) { 475 fill_buffer(); 476 } 477 } 478 transient_state = ps; 479 type_code = lexi(&transient_state); /* read another token */ 480 if (type_code != newline && type_code != form_feed && 481 type_code != comment && !transient_state.search_brace) { 482 ps = transient_state; 483 } 484 } 485 } /* end of while (search_brace) */ 486 last_else = 0; 487 check_type: 488 if (type_code == 0) { /* we got eof */ 489 if (s_lab != e_lab || s_code != e_code 490 || s_com != e_com) /* must dump end of line */ 491 dump_line(); 492 if (ps.tos > 1) /* check for balanced braces */ 493 diag2(1, "Stuff missing from end of file"); 494 495 if (verbose) { 496 printf("There were %d output lines and %d comments\n", 497 ps.out_lines, ps.out_coms); 498 printf("(Lines with comments)/(Lines with code): %6.3f\n", 499 (1.0 * ps.com_lines) / code_lines); 500 } 501 fflush(output); 502 exit(found_err); 503 } 504 if ( 505 (type_code != comment) && 506 (type_code != newline) && 507 (type_code != preesc) && 508 (type_code != form_feed)) { 509 if (force_nl && 510 (type_code != semicolon) && 511 (type_code != lbrace || !btype_2)) { 512 /* we should force a broken line here */ 513 if (verbose) 514 diag2(0, "Line broken"); 515 dump_line(); 516 ps.want_blank = false; /* dont insert blank at line start */ 517 force_nl = false; 518 } 519 ps.in_stmt = true; /* turn on flag which causes an extra level of 520 * indentation. this is turned off by a ; or 521 * '}' */ 522 if (s_com != e_com) { /* the turkey has embedded a comment 523 * in a line. fix it */ 524 int len = e_com - s_com; 525 526 CHECK_SIZE_CODE(len + 3); 527 *e_code++ = ' '; 528 memcpy(e_code, s_com, len); 529 e_code += len; 530 *e_code++ = ' '; 531 *e_code = '\0'; /* null terminate code sect */ 532 ps.want_blank = false; 533 e_com = s_com; 534 } 535 } 536 else if (type_code != comment) /* preserve force_nl thru a comment */ 537 force_nl = false; /* cancel forced newline after newline, form 538 * feed, etc */ 539 540 541 542 /*-----------------------------------------------------*\ 543 | do switch on type of token scanned | 544 \*-----------------------------------------------------*/ 545 CHECK_SIZE_CODE(3); /* maximum number of increments of e_code 546 * before the next CHECK_SIZE_CODE or 547 * dump_line() is 2. After that there's the 548 * final increment for the null character. */ 549 switch (type_code) { /* now, decide what to do with the token */ 550 551 case form_feed: /* found a form feed in line */ 552 ps.use_ff = true; /* a form feed is treated much like a newline */ 553 dump_line(); 554 ps.want_blank = false; 555 break; 556 557 case newline: 558 if (ps.last_token != comma || ps.p_l_follow > 0 559 || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) { 560 dump_line(); 561 ps.want_blank = false; 562 } 563 ++line_no; /* keep track of input line number */ 564 break; 565 566 case lparen: /* got a '(' or '[' */ 567 /* count parens to make Healy happy */ 568 if (++ps.p_l_follow == nitems(ps.paren_indents)) { 569 diag3(0, "Reached internal limit of %d unclosed parens", 570 nitems(ps.paren_indents)); 571 ps.p_l_follow--; 572 } 573 if (*token == '[') 574 /* not a function pointer declaration or a function call */; 575 else if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent && 576 ps.procname[0] == '\0' && ps.paren_level == 0) { 577 /* function pointer declarations */ 578 indent_declaration(dec_ind, tabs_to_var); 579 ps.dumped_decl_indent = true; 580 } 581 else if (ps.want_blank && 582 ((ps.last_token != ident && ps.last_token != funcname) || 583 proc_calls_space || 584 /* offsetof (1) is never allowed a space; sizeof (2) gets 585 * one iff -bs; all other keywords (>2) always get a space 586 * before lparen */ 587 ps.keyword + Bill_Shannon > 2)) 588 *e_code++ = ' '; 589 ps.want_blank = false; 590 *e_code++ = token[0]; 591 ps.paren_indents[ps.p_l_follow - 1] = count_spaces_until(1, s_code, e_code) - 1; 592 if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent 593 && ps.paren_indents[0] < 2 * ps.ind_size) 594 ps.paren_indents[0] = 2 * ps.ind_size; 595 if (ps.in_or_st && *token == '(' && ps.tos <= 2) { 596 /* 597 * this is a kluge to make sure that declarations will be 598 * aligned right if proc decl has an explicit type on it, i.e. 599 * "int a(x) {..." 600 */ 601 parse(semicolon); /* I said this was a kluge... */ 602 ps.in_or_st = false; /* turn off flag for structure decl or 603 * initialization */ 604 } 605 /* parenthesized type following sizeof or offsetof is not a cast */ 606 if (ps.keyword == 1 || ps.keyword == 2) 607 ps.not_cast_mask |= 1 << ps.p_l_follow; 608 break; 609 610 case rparen: /* got a ')' or ']' */ 611 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) { 612 ps.last_u_d = true; 613 ps.cast_mask &= (1 << ps.p_l_follow) - 1; 614 ps.want_blank = space_after_cast; 615 } else 616 ps.want_blank = true; 617 ps.not_cast_mask &= (1 << ps.p_l_follow) - 1; 618 if (--ps.p_l_follow < 0) { 619 ps.p_l_follow = 0; 620 diag3(0, "Extra %c", *token); 621 } 622 if (e_code == s_code) /* if the paren starts the line */ 623 ps.paren_level = ps.p_l_follow; /* then indent it */ 624 625 *e_code++ = token[0]; 626 627 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if 628 * (...), or some such */ 629 sp_sw = false; 630 force_nl = true;/* must force newline after if */ 631 ps.last_u_d = true; /* inform lexi that a following 632 * operator is unary */ 633 ps.in_stmt = false; /* dont use stmt continuation 634 * indentation */ 635 636 parse(hd_type); /* let parser worry about if, or whatever */ 637 } 638 ps.search_brace = btype_2; /* this should insure that constructs 639 * such as main(){...} and int[]{...} 640 * have their braces put in the right 641 * place */ 642 break; 643 644 case unary_op: /* this could be any unary operation */ 645 if (!ps.dumped_decl_indent && ps.in_decl && !ps.block_init && 646 ps.procname[0] == '\0' && ps.paren_level == 0) { 647 /* pointer declarations */ 648 649 /* 650 * if this is a unary op in a declaration, we should indent 651 * this token 652 */ 653 for (i = 0; token[i]; ++i) 654 /* find length of token */; 655 indent_declaration(dec_ind - i, tabs_to_var); 656 ps.dumped_decl_indent = true; 657 } 658 else if (ps.want_blank) 659 *e_code++ = ' '; 660 661 { 662 int len = e_token - s_token; 663 664 CHECK_SIZE_CODE(len); 665 memcpy(e_code, token, len); 666 e_code += len; 667 } 668 ps.want_blank = false; 669 break; 670 671 case binary_op: /* any binary operation */ 672 { 673 int len = e_token - s_token; 674 675 CHECK_SIZE_CODE(len + 1); 676 if (ps.want_blank) 677 *e_code++ = ' '; 678 memcpy(e_code, token, len); 679 e_code += len; 680 } 681 ps.want_blank = true; 682 break; 683 684 case postop: /* got a trailing ++ or -- */ 685 *e_code++ = token[0]; 686 *e_code++ = token[1]; 687 ps.want_blank = true; 688 break; 689 690 case question: /* got a ? */ 691 squest++; /* this will be used when a later colon 692 * appears so we can distinguish the 693 * <c>?<n>:<n> construct */ 694 if (ps.want_blank) 695 *e_code++ = ' '; 696 *e_code++ = '?'; 697 ps.want_blank = true; 698 break; 699 700 case casestmt: /* got word 'case' or 'default' */ 701 scase = true; /* so we can process the later colon properly */ 702 goto copy_id; 703 704 case colon: /* got a ':' */ 705 if (squest > 0) { /* it is part of the <c>?<n>: <n> construct */ 706 --squest; 707 if (ps.want_blank) 708 *e_code++ = ' '; 709 *e_code++ = ':'; 710 ps.want_blank = true; 711 break; 712 } 713 if (ps.in_or_st) { 714 *e_code++ = ':'; 715 ps.want_blank = false; 716 break; 717 } 718 ps.in_stmt = false; /* seeing a label does not imply we are in a 719 * stmt */ 720 /* 721 * turn everything so far into a label 722 */ 723 { 724 int len = e_code - s_code; 725 726 CHECK_SIZE_LAB(len + 3); 727 memcpy(e_lab, s_code, len); 728 e_lab += len; 729 *e_lab++ = ':'; 730 *e_lab = '\0'; 731 e_code = s_code; 732 } 733 force_nl = ps.pcase = scase; /* ps.pcase will be used by 734 * dump_line to decide how to 735 * indent the label. force_nl 736 * will force a case n: to be 737 * on a line by itself */ 738 scase = false; 739 ps.want_blank = false; 740 break; 741 742 case semicolon: /* got a ';' */ 743 if (ps.dec_nest == 0) 744 ps.in_or_st = false;/* we are not in an initialization or 745 * structure declaration */ 746 scase = false; /* these will only need resetting in an error */ 747 squest = 0; 748 if (ps.last_token == rparen) 749 ps.in_parameter_declaration = 0; 750 ps.cast_mask = 0; 751 ps.not_cast_mask = 0; 752 ps.block_init = 0; 753 ps.block_init_level = 0; 754 ps.just_saw_decl--; 755 756 if (ps.in_decl && s_code == e_code && !ps.block_init && 757 !ps.dumped_decl_indent && ps.paren_level == 0) { 758 /* indent stray semicolons in declarations */ 759 indent_declaration(dec_ind - 1, tabs_to_var); 760 ps.dumped_decl_indent = true; 761 } 762 763 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level 764 * structure declaration, we 765 * arent any more */ 766 767 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) { 768 769 /* 770 * This should be true iff there were unbalanced parens in the 771 * stmt. It is a bit complicated, because the semicolon might 772 * be in a for stmt 773 */ 774 diag2(1, "Unbalanced parens"); 775 ps.p_l_follow = 0; 776 if (sp_sw) { /* this is a check for an if, while, etc. with 777 * unbalanced parens */ 778 sp_sw = false; 779 parse(hd_type); /* dont lose the if, or whatever */ 780 } 781 } 782 *e_code++ = ';'; 783 ps.want_blank = true; 784 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the 785 * middle of a stmt */ 786 787 if (!sp_sw) { /* if not if for (;;) */ 788 parse(semicolon); /* let parser know about end of stmt */ 789 force_nl = true;/* force newline after an end of stmt */ 790 } 791 break; 792 793 case lbrace: /* got a '{' */ 794 ps.in_stmt = false; /* dont indent the {} */ 795 if (!ps.block_init) 796 force_nl = true;/* force other stuff on same line as '{' onto 797 * new line */ 798 else if (ps.block_init_level <= 0) 799 ps.block_init_level = 1; 800 else 801 ps.block_init_level++; 802 803 if (s_code != e_code && !ps.block_init) { 804 if (!btype_2) { 805 dump_line(); 806 ps.want_blank = false; 807 } 808 else if (ps.in_parameter_declaration && !ps.in_or_st) { 809 ps.i_l_follow = 0; 810 if (function_brace_split) { /* dump the line prior to the 811 * brace ... */ 812 dump_line(); 813 ps.want_blank = false; 814 } else /* add a space between the decl and brace */ 815 ps.want_blank = true; 816 } 817 } 818 if (ps.in_parameter_declaration) 819 prefix_blankline_requested = 0; 820 821 if (ps.p_l_follow > 0) { /* check for preceding unbalanced 822 * parens */ 823 diag2(1, "Unbalanced parens"); 824 ps.p_l_follow = 0; 825 if (sp_sw) { /* check for unclosed if, for, etc. */ 826 sp_sw = false; 827 parse(hd_type); 828 ps.ind_level = ps.i_l_follow; 829 } 830 } 831 if (s_code == e_code) 832 ps.ind_stmt = false; /* dont put extra indentation on line 833 * with '{' */ 834 if (ps.in_decl && ps.in_or_st) { /* this is either a structure 835 * declaration or an init */ 836 di_stack[ps.dec_nest] = dec_ind; 837 if (++ps.dec_nest == nitems(di_stack)) { 838 diag3(0, "Reached internal limit of %d struct levels", 839 nitems(di_stack)); 840 ps.dec_nest--; 841 } 842 /* ? dec_ind = 0; */ 843 } 844 else { 845 ps.decl_on_line = false; /* we can't be in the middle of 846 * a declaration, so don't do 847 * special indentation of 848 * comments */ 849 if (blanklines_after_declarations_at_proctop 850 && ps.in_parameter_declaration) 851 postfix_blankline_requested = 1; 852 ps.in_parameter_declaration = 0; 853 ps.in_decl = false; 854 } 855 dec_ind = 0; 856 parse(lbrace); /* let parser know about this */ 857 if (ps.want_blank) /* put a blank before '{' if '{' is not at 858 * start of line */ 859 *e_code++ = ' '; 860 ps.want_blank = false; 861 *e_code++ = '{'; 862 ps.just_saw_decl = 0; 863 break; 864 865 case rbrace: /* got a '}' */ 866 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be 867 * omitted in 868 * declarations */ 869 parse(semicolon); 870 if (ps.p_l_follow) {/* check for unclosed if, for, else. */ 871 diag2(1, "Unbalanced parens"); 872 ps.p_l_follow = 0; 873 sp_sw = false; 874 } 875 ps.just_saw_decl = 0; 876 ps.block_init_level--; 877 if (s_code != e_code && !ps.block_init) { /* '}' must be first on 878 * line */ 879 if (verbose) 880 diag2(0, "Line broken"); 881 dump_line(); 882 } 883 *e_code++ = '}'; 884 ps.want_blank = true; 885 ps.in_stmt = ps.ind_stmt = false; 886 if (ps.dec_nest > 0) { /* we are in multi-level structure 887 * declaration */ 888 dec_ind = di_stack[--ps.dec_nest]; 889 if (ps.dec_nest == 0 && !ps.in_parameter_declaration) 890 ps.just_saw_decl = 2; 891 ps.in_decl = true; 892 } 893 prefix_blankline_requested = 0; 894 parse(rbrace); /* let parser know about this */ 895 ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead 896 && ps.il[ps.tos] >= ps.ind_level; 897 if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0) 898 postfix_blankline_requested = 1; 899 break; 900 901 case swstmt: /* got keyword "switch" */ 902 sp_sw = true; 903 hd_type = swstmt; /* keep this for when we have seen the 904 * expression */ 905 goto copy_id; /* go move the token into buffer */ 906 907 case sp_paren: /* token is if, while, for */ 908 sp_sw = true; /* the interesting stuff is done after the 909 * expression is scanned */ 910 hd_type = (*token == 'i' ? ifstmt : 911 (*token == 'w' ? whilestmt : forstmt)); 912 913 /* 914 * remember the type of header for later use by parser 915 */ 916 goto copy_id; /* copy the token into line */ 917 918 case sp_nparen: /* got else, do */ 919 ps.in_stmt = false; 920 if (*token == 'e') { 921 if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) { 922 if (verbose) 923 diag2(0, "Line broken"); 924 dump_line();/* make sure this starts a line */ 925 ps.want_blank = false; 926 } 927 force_nl = true;/* also, following stuff must go onto new line */ 928 last_else = 1; 929 parse(elselit); 930 } 931 else { 932 if (e_code != s_code) { /* make sure this starts a line */ 933 if (verbose) 934 diag2(0, "Line broken"); 935 dump_line(); 936 ps.want_blank = false; 937 } 938 force_nl = true;/* also, following stuff must go onto new line */ 939 last_else = 0; 940 parse(dolit); 941 } 942 goto copy_id; /* move the token into line */ 943 944 case type_def: 945 case storage: 946 prefix_blankline_requested = 0; 947 goto copy_id; 948 949 case structure: 950 if (ps.p_l_follow > 0) 951 goto copy_id; 952 case decl: /* we have a declaration type (int, etc.) */ 953 parse(decl); /* let parser worry about indentation */ 954 if (ps.last_token == rparen && ps.tos <= 1) { 955 if (s_code != e_code) { 956 dump_line(); 957 ps.want_blank = 0; 958 } 959 } 960 if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) { 961 ps.ind_level = ps.i_l_follow = 1; 962 ps.ind_stmt = 0; 963 } 964 ps.in_or_st = true; /* this might be a structure or initialization 965 * declaration */ 966 ps.in_decl = ps.decl_on_line = ps.last_token != type_def; 967 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) 968 ps.just_saw_decl = 2; 969 prefix_blankline_requested = 0; 970 for (i = 0; token[i++];); /* get length of token */ 971 972 if (ps.ind_level == 0 || ps.dec_nest > 0) { 973 /* global variable or struct member in local variable */ 974 dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i; 975 tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0); 976 } else { 977 /* local variable */ 978 dec_ind = ps.local_decl_indent > 0 ? ps.local_decl_indent : i; 979 tabs_to_var = (use_tabs ? ps.local_decl_indent > 0 : 0); 980 } 981 goto copy_id; 982 983 case funcname: 984 case ident: /* got an identifier or constant */ 985 if (ps.in_decl) { 986 if (type_code == funcname) { 987 ps.in_decl = false; 988 if (procnames_start_line && s_code != e_code) { 989 *e_code = '\0'; 990 dump_line(); 991 } 992 else if (ps.want_blank) { 993 *e_code++ = ' '; 994 } 995 ps.want_blank = false; 996 } 997 else if (!ps.block_init && !ps.dumped_decl_indent && 998 ps.paren_level == 0) { /* if we are in a declaration, we 999 * must indent identifier */ 1000 indent_declaration(dec_ind, tabs_to_var); 1001 ps.dumped_decl_indent = true; 1002 ps.want_blank = false; 1003 } 1004 } 1005 else if (sp_sw && ps.p_l_follow == 0) { 1006 sp_sw = false; 1007 force_nl = true; 1008 ps.last_u_d = true; 1009 ps.in_stmt = false; 1010 parse(hd_type); 1011 } 1012 copy_id: 1013 { 1014 int len = e_token - s_token; 1015 1016 CHECK_SIZE_CODE(len + 1); 1017 if (ps.want_blank) 1018 *e_code++ = ' '; 1019 memcpy(e_code, s_token, len); 1020 e_code += len; 1021 } 1022 if (type_code != funcname) 1023 ps.want_blank = true; 1024 break; 1025 1026 case strpfx: 1027 { 1028 int len = e_token - s_token; 1029 1030 CHECK_SIZE_CODE(len + 1); 1031 if (ps.want_blank) 1032 *e_code++ = ' '; 1033 memcpy(e_code, token, len); 1034 e_code += len; 1035 } 1036 ps.want_blank = false; 1037 break; 1038 1039 case period: /* treat a period kind of like a binary 1040 * operation */ 1041 *e_code++ = '.'; /* move the period into line */ 1042 ps.want_blank = false; /* dont put a blank after a period */ 1043 break; 1044 1045 case comma: 1046 ps.want_blank = (s_code != e_code); /* only put blank after comma 1047 * if comma does not start the 1048 * line */ 1049 if (ps.in_decl && ps.procname[0] == '\0' && !ps.block_init && 1050 !ps.dumped_decl_indent && ps.paren_level == 0) { 1051 /* indent leading commas and not the actual identifiers */ 1052 indent_declaration(dec_ind - 1, tabs_to_var); 1053 ps.dumped_decl_indent = true; 1054 } 1055 *e_code++ = ','; 1056 if (ps.p_l_follow == 0) { 1057 if (ps.block_init_level <= 0) 1058 ps.block_init = 0; 1059 if (break_comma && (!ps.leave_comma || 1060 count_spaces_until(compute_code_target(), s_code, e_code) > 1061 max_col - tabsize)) 1062 force_nl = true; 1063 } 1064 break; 1065 1066 case preesc: /* got the character '#' */ 1067 if ((s_com != e_com) || 1068 (s_lab != e_lab) || 1069 (s_code != e_code)) 1070 dump_line(); 1071 CHECK_SIZE_LAB(1); 1072 *e_lab++ = '#'; /* move whole line to 'label' buffer */ 1073 { 1074 int in_comment = 0; 1075 int com_start = 0; 1076 char quote = 0; 1077 int com_end = 0; 1078 1079 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 1080 buf_ptr++; 1081 if (buf_ptr >= buf_end) 1082 fill_buffer(); 1083 } 1084 while (*buf_ptr != '\n' || (in_comment && !had_eof)) { 1085 CHECK_SIZE_LAB(2); 1086 *e_lab = *buf_ptr++; 1087 if (buf_ptr >= buf_end) 1088 fill_buffer(); 1089 switch (*e_lab++) { 1090 case BACKSLASH: 1091 if (!in_comment) { 1092 *e_lab++ = *buf_ptr++; 1093 if (buf_ptr >= buf_end) 1094 fill_buffer(); 1095 } 1096 break; 1097 case '/': 1098 if (*buf_ptr == '*' && !in_comment && !quote) { 1099 in_comment = 1; 1100 *e_lab++ = *buf_ptr++; 1101 com_start = e_lab - s_lab - 2; 1102 } 1103 break; 1104 case '"': 1105 if (quote == '"') 1106 quote = 0; 1107 break; 1108 case '\'': 1109 if (quote == '\'') 1110 quote = 0; 1111 break; 1112 case '*': 1113 if (*buf_ptr == '/' && in_comment) { 1114 in_comment = 0; 1115 *e_lab++ = *buf_ptr++; 1116 com_end = e_lab - s_lab; 1117 } 1118 break; 1119 } 1120 } 1121 1122 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1123 e_lab--; 1124 if (e_lab - s_lab == com_end && bp_save == NULL) { 1125 /* comment on preprocessor line */ 1126 if (sc_end == NULL) { /* if this is the first comment, 1127 * we must set up the buffer */ 1128 save_com = sc_buf; 1129 sc_end = &save_com[0]; 1130 } 1131 else { 1132 *sc_end++ = '\n'; /* add newline between 1133 * comments */ 1134 *sc_end++ = ' '; 1135 --line_no; 1136 } 1137 if (sc_end - save_com + com_end - com_start > sc_size) 1138 errx(1, "input too long"); 1139 bcopy(s_lab + com_start, sc_end, com_end - com_start); 1140 sc_end += com_end - com_start; 1141 e_lab = s_lab + com_start; 1142 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1143 e_lab--; 1144 bp_save = buf_ptr; /* save current input buffer */ 1145 be_save = buf_end; 1146 buf_ptr = save_com; /* fix so that subsequent calls to 1147 * lexi will take tokens out of 1148 * save_com */ 1149 *sc_end++ = ' '; /* add trailing blank, just in case */ 1150 buf_end = sc_end; 1151 sc_end = NULL; 1152 } 1153 CHECK_SIZE_LAB(1); 1154 *e_lab = '\0'; /* null terminate line */ 1155 ps.pcase = false; 1156 } 1157 1158 if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */ 1159 if ((size_t)ifdef_level < nitems(state_stack)) { 1160 match_state[ifdef_level].tos = -1; 1161 state_stack[ifdef_level++] = ps; 1162 } 1163 else 1164 diag2(1, "#if stack overflow"); 1165 } 1166 else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */ 1167 if (ifdef_level <= 0) 1168 diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else"); 1169 else { 1170 match_state[ifdef_level - 1] = ps; 1171 ps = state_stack[ifdef_level - 1]; 1172 } 1173 } 1174 else if (strncmp(s_lab, "#endif", 6) == 0) { 1175 if (ifdef_level <= 0) 1176 diag2(1, "Unmatched #endif"); 1177 else 1178 ifdef_level--; 1179 } else { 1180 struct directives { 1181 int size; 1182 const char *string; 1183 } 1184 recognized[] = { 1185 {7, "include"}, 1186 {6, "define"}, 1187 {5, "undef"}, 1188 {4, "line"}, 1189 {5, "error"}, 1190 {6, "pragma"} 1191 }; 1192 int d = nitems(recognized); 1193 while (--d >= 0) 1194 if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0) 1195 break; 1196 if (d < 0) { 1197 diag2(1, "Unrecognized cpp directive"); 1198 break; 1199 } 1200 } 1201 if (blanklines_around_conditional_compilation) { 1202 postfix_blankline_requested++; 1203 n_real_blanklines = 0; 1204 } 1205 else { 1206 postfix_blankline_requested = 0; 1207 prefix_blankline_requested = 0; 1208 } 1209 break; /* subsequent processing of the newline 1210 * character will cause the line to be printed */ 1211 1212 case comment: /* we have gotten a / followed by * this is a biggie */ 1213 pr_comment(); 1214 break; 1215 } /* end of big switch stmt */ 1216 1217 *e_code = '\0'; /* make sure code section is null terminated */ 1218 if (type_code != comment && type_code != newline && type_code != preesc) 1219 ps.last_token = type_code; 1220 } /* end of main while (1) loop */ 1221 } 1222 1223 /* 1224 * copy input file to backup file if in_name is /blah/blah/blah/file, then 1225 * backup file will be ".Bfile" then make the backup file the input and 1226 * original input file the output 1227 */ 1228 static void 1229 bakcopy(void) 1230 { 1231 int n, 1232 bakchn; 1233 char buff[8 * 1024]; 1234 const char *p; 1235 1236 /* construct file name .Bfile */ 1237 for (p = in_name; *p; p++); /* skip to end of string */ 1238 while (p > in_name && *p != '/') /* find last '/' */ 1239 p--; 1240 if (*p == '/') 1241 p++; 1242 sprintf(bakfile, "%s%s", p, simple_backup_suffix); 1243 1244 /* copy in_name to backup file */ 1245 bakchn = creat(bakfile, 0600); 1246 if (bakchn < 0) 1247 err(1, "%s", bakfile); 1248 while ((n = read(fileno(input), buff, sizeof(buff))) > 0) 1249 if (write(bakchn, buff, n) != n) 1250 err(1, "%s", bakfile); 1251 if (n < 0) 1252 err(1, "%s", in_name); 1253 close(bakchn); 1254 fclose(input); 1255 1256 /* re-open backup file as the input file */ 1257 input = fopen(bakfile, "r"); 1258 if (input == NULL) 1259 err(1, "%s", bakfile); 1260 /* now the original input file will be the output */ 1261 output = fopen(in_name, "w"); 1262 if (output == NULL) { 1263 unlink(bakfile); 1264 err(1, "%s", in_name); 1265 } 1266 } 1267 1268 static void 1269 indent_declaration(int cur_dec_ind, int tabs_to_var) 1270 { 1271 int pos = e_code - s_code; 1272 char *startpos = e_code; 1273 1274 /* 1275 * get the tab math right for indentations that are not multiples of tabsize 1276 */ 1277 if ((ps.ind_level * ps.ind_size) % tabsize != 0) { 1278 pos += (ps.ind_level * ps.ind_size) % tabsize; 1279 cur_dec_ind += (ps.ind_level * ps.ind_size) % tabsize; 1280 } 1281 if (tabs_to_var) { 1282 int tpos; 1283 1284 CHECK_SIZE_CODE(cur_dec_ind / tabsize); 1285 while ((tpos = tabsize * (1 + pos / tabsize)) <= cur_dec_ind) { 1286 *e_code++ = '\t'; 1287 pos = tpos; 1288 } 1289 } 1290 CHECK_SIZE_CODE(cur_dec_ind - pos + 1); 1291 while (pos < cur_dec_ind) { 1292 *e_code++ = ' '; 1293 pos++; 1294 } 1295 if (e_code == startpos && ps.want_blank) { 1296 *e_code++ = ' '; 1297 ps.want_blank = false; 1298 } 1299 } 1300