1 /*- 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #ifndef lint 37 static const char copyright[] = 38 "@(#) Copyright (c) 1985 Sun Microsystems, Inc.\n\ 39 @(#) Copyright (c) 1976 Board of Trustees of the University of Illinois.\n\ 40 @(#) Copyright (c) 1980, 1993\n\ 41 The Regents of the University of California. All rights reserved.\n"; 42 #endif /* not lint */ 43 44 #if 0 45 #ifndef lint 46 static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93"; 47 #endif /* not lint */ 48 #endif 49 50 #include <sys/cdefs.h> 51 __FBSDID("$FreeBSD$"); 52 53 #include <sys/param.h> 54 #include <sys/capsicum.h> 55 #include <err.h> 56 #include <errno.h> 57 #include <fcntl.h> 58 #include <unistd.h> 59 #include <stdio.h> 60 #include <stdlib.h> 61 #include <string.h> 62 #include <ctype.h> 63 #include "indent_globs.h" 64 #include "indent_codes.h" 65 #include "indent.h" 66 67 static void bakcopy(void); 68 static void indent_declaration(int, int); 69 70 const char *in_name = "Standard Input"; /* will always point to name of input 71 * file */ 72 const char *out_name = "Standard Output"; /* will always point to name 73 * of output file */ 74 char bakfile[MAXPATHLEN] = ""; 75 76 int 77 main(int argc, char **argv) 78 { 79 cap_rights_t rights; 80 81 int dec_ind; /* current indentation for declarations */ 82 int di_stack[20]; /* a stack of structure indentation levels */ 83 int flushed_nl; /* used when buffering up comments to remember 84 * that a newline was passed over */ 85 int force_nl; /* when true, code must be broken */ 86 int hd_type = 0; /* used to store type of stmt for if (...), 87 * for (...), etc */ 88 int i; /* local loop counter */ 89 int scase; /* set to true when we see a case, so we will 90 * know what to do with the following colon */ 91 int sp_sw; /* when true, we are in the expression of 92 * if(...), while(...), etc. */ 93 int squest; /* when this is positive, we have seen a ? 94 * without the matching : in a <c>?<s>:<s> 95 * construct */ 96 const char *t_ptr; /* used for copying tokens */ 97 int tabs_to_var; /* true if using tabs to indent to var name */ 98 int type_code; /* the type of token, returned by lexi */ 99 100 int last_else = 0; /* true iff last keyword was an else */ 101 const char *profile_name = NULL; 102 103 104 /*-----------------------------------------------*\ 105 | INITIALIZATION | 106 \*-----------------------------------------------*/ 107 108 found_err = 0; 109 110 ps.p_stack[0] = stmt; /* this is the parser's stack */ 111 ps.last_nl = true; /* this is true if the last thing scanned was 112 * a newline */ 113 ps.last_token = semicolon; 114 combuf = (char *) malloc(bufsize); 115 if (combuf == NULL) 116 err(1, NULL); 117 labbuf = (char *) malloc(bufsize); 118 if (labbuf == NULL) 119 err(1, NULL); 120 codebuf = (char *) malloc(bufsize); 121 if (codebuf == NULL) 122 err(1, NULL); 123 tokenbuf = (char *) malloc(bufsize); 124 if (tokenbuf == NULL) 125 err(1, NULL); 126 alloc_typenames(); 127 l_com = combuf + bufsize - 5; 128 l_lab = labbuf + bufsize - 5; 129 l_code = codebuf + bufsize - 5; 130 l_token = tokenbuf + bufsize - 5; 131 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and 132 * comment buffers */ 133 combuf[1] = codebuf[1] = labbuf[1] = '\0'; 134 ps.else_if = 1; /* Default else-if special processing to on */ 135 s_lab = e_lab = labbuf + 1; 136 s_code = e_code = codebuf + 1; 137 s_com = e_com = combuf + 1; 138 s_token = e_token = tokenbuf + 1; 139 140 in_buffer = (char *) malloc(10); 141 if (in_buffer == NULL) 142 err(1, NULL); 143 in_buffer_limit = in_buffer + 8; 144 buf_ptr = buf_end = in_buffer; 145 line_no = 1; 146 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false; 147 sp_sw = force_nl = false; 148 ps.in_or_st = false; 149 ps.bl_line = true; 150 dec_ind = 0; 151 di_stack[ps.dec_nest = 0] = 0; 152 ps.want_blank = ps.in_stmt = ps.ind_stmt = false; 153 154 scase = ps.pcase = false; 155 squest = 0; 156 sc_end = NULL; 157 bp_save = NULL; 158 be_save = NULL; 159 160 output = NULL; 161 tabs_to_var = 0; 162 163 /*--------------------------------------------------*\ 164 | COMMAND LINE SCAN | 165 \*--------------------------------------------------*/ 166 167 #ifdef undef 168 max_col = 78; /* -l78 */ 169 lineup_to_parens = 1; /* -lp */ 170 ps.ljust_decl = 0; /* -ndj */ 171 ps.com_ind = 33; /* -c33 */ 172 star_comment_cont = 1; /* -sc */ 173 ps.ind_size = 8; /* -i8 */ 174 verbose = 0; 175 ps.decl_indent = 16; /* -di16 */ 176 ps.local_decl_indent = -1; /* if this is not set to some nonnegative value 177 * by an arg, we will set this equal to 178 * ps.decl_ind */ 179 ps.indent_parameters = 1; /* -ip */ 180 ps.decl_com_ind = 0; /* if this is not set to some positive value 181 * by an arg, we will set this equal to 182 * ps.com_ind */ 183 btype_2 = 1; /* -br */ 184 cuddle_else = 1; /* -ce */ 185 ps.unindent_displace = 0; /* -d0 */ 186 ps.case_indent = 0; /* -cli0 */ 187 format_block_comments = 1; /* -fcb */ 188 format_col1_comments = 1; /* -fc1 */ 189 procnames_start_line = 1; /* -psl */ 190 proc_calls_space = 0; /* -npcs */ 191 comment_delimiter_on_blankline = 1; /* -cdb */ 192 ps.leave_comma = 1; /* -nbc */ 193 #endif 194 195 for (i = 1; i < argc; ++i) 196 if (strcmp(argv[i], "-npro") == 0) 197 break; 198 else if (argv[i][0] == '-' && argv[i][1] == 'P' && argv[i][2] != '\0') 199 profile_name = argv[i]; /* non-empty -P (set profile) */ 200 set_defaults(); 201 if (i >= argc) 202 set_profile(profile_name); 203 204 for (i = 1; i < argc; ++i) { 205 206 /* 207 * look thru args (if any) for changes to defaults 208 */ 209 if (argv[i][0] != '-') {/* no flag on parameter */ 210 if (input == NULL) { /* we must have the input file */ 211 in_name = argv[i]; /* remember name of input file */ 212 input = fopen(in_name, "r"); 213 if (input == NULL) /* check for open error */ 214 err(1, "%s", in_name); 215 continue; 216 } 217 else if (output == NULL) { /* we have the output file */ 218 out_name = argv[i]; /* remember name of output file */ 219 if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite 220 * the file */ 221 errx(1, "input and output files must be different"); 222 } 223 output = fopen(out_name, "w"); 224 if (output == NULL) /* check for create error */ 225 err(1, "%s", out_name); 226 continue; 227 } 228 errx(1, "unknown parameter: %s", argv[i]); 229 } 230 else 231 set_option(argv[i]); 232 } /* end of for */ 233 if (input == NULL) 234 input = stdin; 235 if (output == NULL) { 236 if (troff || input == stdin) 237 output = stdout; 238 else { 239 out_name = in_name; 240 bakcopy(); 241 } 242 } 243 244 /* Restrict input/output descriptors and enter Capsicum sandbox. */ 245 cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE); 246 if (cap_rights_limit(fileno(output), &rights) < 0 && errno != ENOSYS) 247 err(EXIT_FAILURE, "unable to limit rights for %s", out_name); 248 cap_rights_init(&rights, CAP_FSTAT, CAP_READ); 249 if (cap_rights_limit(fileno(input), &rights) < 0 && errno != ENOSYS) 250 err(EXIT_FAILURE, "unable to limit rights for %s", in_name); 251 if (cap_enter() < 0 && errno != ENOSYS) 252 err(EXIT_FAILURE, "unable to enter capability mode"); 253 254 if (ps.com_ind <= 1) 255 ps.com_ind = 2; /* dont put normal comments before column 2 */ 256 if (troff) { 257 if (bodyf.font[0] == 0) 258 parsefont(&bodyf, "R"); 259 if (scomf.font[0] == 0) 260 parsefont(&scomf, "I"); 261 if (blkcomf.font[0] == 0) 262 blkcomf = scomf, blkcomf.size += 2; 263 if (boxcomf.font[0] == 0) 264 boxcomf = blkcomf; 265 if (stringf.font[0] == 0) 266 parsefont(&stringf, "L"); 267 if (keywordf.font[0] == 0) 268 parsefont(&keywordf, "B"); 269 writefdef(&bodyf, 'B'); 270 writefdef(&scomf, 'C'); 271 writefdef(&blkcomf, 'L'); 272 writefdef(&boxcomf, 'X'); 273 writefdef(&stringf, 'S'); 274 writefdef(&keywordf, 'K'); 275 } 276 if (block_comment_max_col <= 0) 277 block_comment_max_col = max_col; 278 if (ps.local_decl_indent < 0) /* if not specified by user, set this */ 279 ps.local_decl_indent = ps.decl_indent; 280 if (ps.decl_com_ind <= 0) /* if not specified by user, set this */ 281 ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind; 282 if (continuation_indent == 0) 283 continuation_indent = ps.ind_size; 284 fill_buffer(); /* get first batch of stuff into input buffer */ 285 286 parse(semicolon); 287 { 288 char *p = buf_ptr; 289 int col = 1; 290 291 while (1) { 292 if (*p == ' ') 293 col++; 294 else if (*p == '\t') 295 col = ((col - 1) & ~7) + 9; 296 else 297 break; 298 p++; 299 } 300 if (col > ps.ind_size) 301 ps.ind_level = ps.i_l_follow = col / ps.ind_size; 302 } 303 if (troff) { 304 const char *p = in_name, 305 *beg = in_name; 306 307 while (*p) 308 if (*p++ == '/') 309 beg = p; 310 fprintf(output, ".Fn \"%s\"\n", beg); 311 } 312 /* 313 * START OF MAIN LOOP 314 */ 315 316 while (1) { /* this is the main loop. it will go until we 317 * reach eof */ 318 int is_procname; 319 320 type_code = lexi(); /* lexi reads one token. The actual 321 * characters read are stored in "token". lexi 322 * returns a code indicating the type of token */ 323 is_procname = ps.procname[0]; 324 325 /* 326 * The following code moves everything following an if (), while (), 327 * else, etc. up to the start of the following stmt to a buffer. This 328 * allows proper handling of both kinds of brace placement. 329 */ 330 331 flushed_nl = false; 332 while (ps.search_brace) { /* if we scanned an if(), while(), 333 * etc., we might need to copy stuff 334 * into a buffer we must loop, copying 335 * stuff into save_com, until we find 336 * the start of the stmt which follows 337 * the if, or whatever */ 338 switch (type_code) { 339 case newline: 340 ++line_no; 341 if (sc_end != NULL) { /* dump comment, if any */ 342 *sc_end++ = '\n'; /* newlines are needed in this case */ 343 goto sw_buffer; 344 } 345 flushed_nl = true; 346 case form_feed: 347 break; /* form feeds and newlines found here will be 348 * ignored */ 349 350 case lbrace: /* this is a brace that starts the compound 351 * stmt */ 352 if (sc_end == NULL) { /* ignore buffering if a comment wasn't 353 * stored up */ 354 ps.search_brace = false; 355 goto check_type; 356 } 357 if (btype_2) { 358 save_com[0] = '{'; /* we either want to put the brace 359 * right after the if */ 360 goto sw_buffer; /* go to common code to get out of 361 * this loop */ 362 } 363 case comment: /* we have a comment, so we must copy it into 364 * the buffer */ 365 if (!flushed_nl || sc_end != NULL) { 366 if (sc_end == NULL) { /* if this is the first comment, we 367 * must set up the buffer */ 368 save_com[0] = save_com[1] = ' '; 369 sc_end = &(save_com[2]); 370 } 371 else { 372 *sc_end++ = '\n'; /* add newline between 373 * comments */ 374 *sc_end++ = ' '; 375 --line_no; 376 } 377 *sc_end++ = '/'; /* copy in start of comment */ 378 *sc_end++ = '*'; 379 380 for (;;) { /* loop until we get to the end of the comment */ 381 *sc_end = *buf_ptr++; 382 if (buf_ptr >= buf_end) 383 fill_buffer(); 384 385 if (*sc_end++ == '*' && *buf_ptr == '/') 386 break; /* we are at end of comment */ 387 388 if (sc_end >= &(save_com[sc_size])) { /* check for temp buffer 389 * overflow */ 390 diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever"); 391 fflush(output); 392 exit(1); 393 } 394 } 395 *sc_end++ = '/'; /* add ending slash */ 396 if (++buf_ptr >= buf_end) /* get past / in buffer */ 397 fill_buffer(); 398 break; 399 } 400 default: /* it is the start of a normal statement */ 401 if (flushed_nl) /* if we flushed a newline, make sure it is 402 * put back */ 403 force_nl = true; 404 if ((type_code == sp_paren && *token == 'i' 405 && last_else && ps.else_if) 406 || (type_code == sp_nparen && *token == 'e' 407 && e_code != s_code && e_code[-1] == '}')) 408 force_nl = false; 409 410 if (sc_end == NULL) { /* ignore buffering if comment wasn't 411 * saved up */ 412 ps.search_brace = false; 413 goto check_type; 414 } 415 if (force_nl) { /* if we should insert a nl here, put it into 416 * the buffer */ 417 force_nl = false; 418 --line_no; /* this will be re-increased when the nl is 419 * read from the buffer */ 420 *sc_end++ = '\n'; 421 *sc_end++ = ' '; 422 if (verbose && !flushed_nl) /* print error msg if the line 423 * was not already broken */ 424 diag2(0, "Line broken"); 425 flushed_nl = false; 426 } 427 for (t_ptr = token; *t_ptr; ++t_ptr) 428 *sc_end++ = *t_ptr; /* copy token into temp buffer */ 429 ps.procname[0] = 0; 430 431 sw_buffer: 432 ps.search_brace = false; /* stop looking for start of 433 * stmt */ 434 bp_save = buf_ptr; /* save current input buffer */ 435 be_save = buf_end; 436 buf_ptr = save_com; /* fix so that subsequent calls to 437 * lexi will take tokens out of 438 * save_com */ 439 *sc_end++ = ' ';/* add trailing blank, just in case */ 440 buf_end = sc_end; 441 sc_end = NULL; 442 break; 443 } /* end of switch */ 444 if (type_code != 0) /* we must make this check, just in case there 445 * was an unexpected EOF */ 446 type_code = lexi(); /* read another token */ 447 /* if (ps.search_brace) ps.procname[0] = 0; */ 448 if ((is_procname = ps.procname[0]) && flushed_nl 449 && !procnames_start_line && ps.in_decl 450 && type_code == ident) 451 flushed_nl = 0; 452 } /* end of while (search_brace) */ 453 last_else = 0; 454 check_type: 455 if (type_code == 0) { /* we got eof */ 456 if (s_lab != e_lab || s_code != e_code 457 || s_com != e_com) /* must dump end of line */ 458 dump_line(); 459 if (ps.tos > 1) /* check for balanced braces */ 460 diag2(1, "Stuff missing from end of file"); 461 462 if (verbose) { 463 printf("There were %d output lines and %d comments\n", 464 ps.out_lines, ps.out_coms); 465 printf("(Lines with comments)/(Lines with code): %6.3f\n", 466 (1.0 * ps.com_lines) / code_lines); 467 } 468 fflush(output); 469 exit(found_err); 470 } 471 if ( 472 (type_code != comment) && 473 (type_code != newline) && 474 (type_code != preesc) && 475 (type_code != form_feed)) { 476 if (force_nl && 477 (type_code != semicolon) && 478 (type_code != lbrace || !btype_2)) { 479 /* we should force a broken line here */ 480 if (verbose && !flushed_nl) 481 diag2(0, "Line broken"); 482 flushed_nl = false; 483 dump_line(); 484 ps.want_blank = false; /* dont insert blank at line start */ 485 force_nl = false; 486 } 487 ps.in_stmt = true; /* turn on flag which causes an extra level of 488 * indentation. this is turned off by a ; or 489 * '}' */ 490 if (s_com != e_com) { /* the turkey has embedded a comment 491 * in a line. fix it */ 492 *e_code++ = ' '; 493 for (t_ptr = s_com; *t_ptr; ++t_ptr) { 494 CHECK_SIZE_CODE; 495 *e_code++ = *t_ptr; 496 } 497 *e_code++ = ' '; 498 *e_code = '\0'; /* null terminate code sect */ 499 ps.want_blank = false; 500 e_com = s_com; 501 } 502 } 503 else if (type_code != comment) /* preserve force_nl thru a comment */ 504 force_nl = false; /* cancel forced newline after newline, form 505 * feed, etc */ 506 507 508 509 /*-----------------------------------------------------*\ 510 | do switch on type of token scanned | 511 \*-----------------------------------------------------*/ 512 CHECK_SIZE_CODE; 513 switch (type_code) { /* now, decide what to do with the token */ 514 515 case form_feed: /* found a form feed in line */ 516 ps.use_ff = true; /* a form feed is treated much like a newline */ 517 dump_line(); 518 ps.want_blank = false; 519 break; 520 521 case newline: 522 if (ps.last_token != comma || ps.p_l_follow > 0 523 || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) { 524 dump_line(); 525 ps.want_blank = false; 526 } 527 ++line_no; /* keep track of input line number */ 528 break; 529 530 case lparen: /* got a '(' or '[' */ 531 /* count parens to make Healy happy */ 532 if (++ps.p_l_follow == nitems(ps.paren_indents)) { 533 diag3(0, "Reached internal limit of %d unclosed parens", 534 nitems(ps.paren_indents)); 535 ps.p_l_follow--; 536 } 537 if (ps.want_blank && *token != '[' && 538 ((ps.last_token != ident && ps.last_token != funcname) || 539 proc_calls_space || 540 /* offsetof (1) is never allowed a space; sizeof (2) gets 541 * one iff -bs; all other keywords (>2) always get a space 542 * before lparen */ 543 ps.keyword + Bill_Shannon > 2)) 544 *e_code++ = ' '; 545 ps.want_blank = false; 546 if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent && 547 !is_procname) { 548 /* function pointer declarations */ 549 if (troff) { 550 sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); 551 e_code += strlen(e_code); 552 } 553 else { 554 indent_declaration(dec_ind, tabs_to_var); 555 } 556 ps.dumped_decl_indent = true; 557 } 558 if (!troff) 559 *e_code++ = token[0]; 560 ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code; 561 if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent 562 && ps.paren_indents[0] < 2 * ps.ind_size) 563 ps.paren_indents[0] = 2 * ps.ind_size; 564 if (ps.in_or_st && *token == '(' && ps.tos <= 2) { 565 /* 566 * this is a kluge to make sure that declarations will be 567 * aligned right if proc decl has an explicit type on it, i.e. 568 * "int a(x) {..." 569 */ 570 parse(semicolon); /* I said this was a kluge... */ 571 ps.in_or_st = false; /* turn off flag for structure decl or 572 * initialization */ 573 } 574 /* parenthesized type following sizeof or offsetof is not a cast */ 575 if (ps.keyword == 1 || ps.keyword == 2) 576 ps.not_cast_mask |= 1 << ps.p_l_follow; 577 break; 578 579 case rparen: /* got a ')' or ']' */ 580 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) { 581 ps.last_u_d = true; 582 ps.cast_mask &= (1 << ps.p_l_follow) - 1; 583 ps.want_blank = space_after_cast; 584 } else 585 ps.want_blank = true; 586 ps.not_cast_mask &= (1 << ps.p_l_follow) - 1; 587 if (--ps.p_l_follow < 0) { 588 ps.p_l_follow = 0; 589 diag3(0, "Extra %c", *token); 590 } 591 if (e_code == s_code) /* if the paren starts the line */ 592 ps.paren_level = ps.p_l_follow; /* then indent it */ 593 594 *e_code++ = token[0]; 595 596 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if 597 * (...), or some such */ 598 sp_sw = false; 599 force_nl = true;/* must force newline after if */ 600 ps.last_u_d = true; /* inform lexi that a following 601 * operator is unary */ 602 ps.in_stmt = false; /* dont use stmt continuation 603 * indentation */ 604 605 parse(hd_type); /* let parser worry about if, or whatever */ 606 } 607 ps.search_brace = btype_2; /* this should insure that constructs 608 * such as main(){...} and int[]{...} 609 * have their braces put in the right 610 * place */ 611 break; 612 613 case unary_op: /* this could be any unary operation */ 614 if (!ps.dumped_decl_indent && ps.in_decl && !is_procname && 615 !ps.block_init) { 616 /* pointer declarations */ 617 if (troff) { 618 if (ps.want_blank) 619 *e_code++ = ' '; 620 sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, 621 token); 622 e_code += strlen(e_code); 623 } 624 else { 625 /* if this is a unary op in a declaration, we should 626 * indent this token */ 627 for (i = 0; token[i]; ++i) 628 /* find length of token */; 629 indent_declaration(dec_ind - i, tabs_to_var); 630 } 631 ps.dumped_decl_indent = true; 632 } 633 else if (ps.want_blank) 634 *e_code++ = ' '; 635 { 636 const char *res = token; 637 638 if (troff && token[0] == '-' && token[1] == '>') 639 res = "\\(->"; 640 for (t_ptr = res; *t_ptr; ++t_ptr) { 641 CHECK_SIZE_CODE; 642 *e_code++ = *t_ptr; 643 } 644 } 645 ps.want_blank = false; 646 break; 647 648 case binary_op: /* any binary operation */ 649 if (ps.want_blank) 650 *e_code++ = ' '; 651 { 652 const char *res = token; 653 654 if (troff) 655 switch (token[0]) { 656 case '<': 657 if (token[1] == '=') 658 res = "\\(<="; 659 break; 660 case '>': 661 if (token[1] == '=') 662 res = "\\(>="; 663 break; 664 case '!': 665 if (token[1] == '=') 666 res = "\\(!="; 667 break; 668 case '|': 669 if (token[1] == '|') 670 res = "\\(br\\(br"; 671 else if (token[1] == 0) 672 res = "\\(br"; 673 break; 674 } 675 for (t_ptr = res; *t_ptr; ++t_ptr) { 676 CHECK_SIZE_CODE; 677 *e_code++ = *t_ptr; /* move the operator */ 678 } 679 } 680 ps.want_blank = true; 681 break; 682 683 case postop: /* got a trailing ++ or -- */ 684 *e_code++ = token[0]; 685 *e_code++ = token[1]; 686 ps.want_blank = true; 687 break; 688 689 case question: /* got a ? */ 690 squest++; /* this will be used when a later colon 691 * appears so we can distinguish the 692 * <c>?<n>:<n> construct */ 693 if (ps.want_blank) 694 *e_code++ = ' '; 695 *e_code++ = '?'; 696 ps.want_blank = true; 697 break; 698 699 case casestmt: /* got word 'case' or 'default' */ 700 scase = true; /* so we can process the later colon properly */ 701 goto copy_id; 702 703 case colon: /* got a ':' */ 704 if (squest > 0) { /* it is part of the <c>?<n>: <n> construct */ 705 --squest; 706 if (ps.want_blank) 707 *e_code++ = ' '; 708 *e_code++ = ':'; 709 ps.want_blank = true; 710 break; 711 } 712 if (ps.in_or_st) { 713 *e_code++ = ':'; 714 ps.want_blank = false; 715 break; 716 } 717 ps.in_stmt = false; /* seeing a label does not imply we are in a 718 * stmt */ 719 for (t_ptr = s_code; *t_ptr; ++t_ptr) 720 *e_lab++ = *t_ptr; /* turn everything so far into a label */ 721 e_code = s_code; 722 *e_lab++ = ':'; 723 *e_lab++ = ' '; 724 *e_lab = '\0'; 725 726 force_nl = ps.pcase = scase; /* ps.pcase will be used by 727 * dump_line to decide how to 728 * indent the label. force_nl 729 * will force a case n: to be 730 * on a line by itself */ 731 scase = false; 732 ps.want_blank = false; 733 break; 734 735 case semicolon: /* got a ';' */ 736 if (ps.dec_nest == 0) 737 ps.in_or_st = false;/* we are not in an initialization or 738 * structure declaration */ 739 scase = false; /* these will only need resetting in an error */ 740 squest = 0; 741 if (ps.last_token == rparen) 742 ps.in_parameter_declaration = 0; 743 ps.cast_mask = 0; 744 ps.not_cast_mask = 0; 745 ps.block_init = 0; 746 ps.block_init_level = 0; 747 ps.just_saw_decl--; 748 749 if (ps.in_decl && s_code == e_code && !ps.block_init && 750 !ps.dumped_decl_indent) { 751 /* indent stray semicolons in declarations */ 752 indent_declaration(dec_ind - 1, tabs_to_var); 753 ps.dumped_decl_indent = true; 754 } 755 756 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level 757 * structure declaration, we 758 * arent any more */ 759 760 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) { 761 762 /* 763 * This should be true iff there were unbalanced parens in the 764 * stmt. It is a bit complicated, because the semicolon might 765 * be in a for stmt 766 */ 767 diag2(1, "Unbalanced parens"); 768 ps.p_l_follow = 0; 769 if (sp_sw) { /* this is a check for an if, while, etc. with 770 * unbalanced parens */ 771 sp_sw = false; 772 parse(hd_type); /* dont lose the if, or whatever */ 773 } 774 } 775 *e_code++ = ';'; 776 ps.want_blank = true; 777 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the 778 * middle of a stmt */ 779 780 if (!sp_sw) { /* if not if for (;;) */ 781 parse(semicolon); /* let parser know about end of stmt */ 782 force_nl = true;/* force newline after an end of stmt */ 783 } 784 break; 785 786 case lbrace: /* got a '{' */ 787 ps.in_stmt = false; /* dont indent the {} */ 788 if (!ps.block_init) 789 force_nl = true;/* force other stuff on same line as '{' onto 790 * new line */ 791 else if (ps.block_init_level <= 0) 792 ps.block_init_level = 1; 793 else 794 ps.block_init_level++; 795 796 if (s_code != e_code && !ps.block_init) { 797 if (!btype_2) { 798 dump_line(); 799 ps.want_blank = false; 800 } 801 else if (ps.in_parameter_declaration && !ps.in_or_st) { 802 ps.i_l_follow = 0; 803 if (function_brace_split) { /* dump the line prior to the 804 * brace ... */ 805 dump_line(); 806 ps.want_blank = false; 807 } else /* add a space between the decl and brace */ 808 ps.want_blank = true; 809 } 810 } 811 if (ps.in_parameter_declaration) 812 prefix_blankline_requested = 0; 813 814 if (ps.p_l_follow > 0) { /* check for preceding unbalanced 815 * parens */ 816 diag2(1, "Unbalanced parens"); 817 ps.p_l_follow = 0; 818 if (sp_sw) { /* check for unclosed if, for, etc. */ 819 sp_sw = false; 820 parse(hd_type); 821 ps.ind_level = ps.i_l_follow; 822 } 823 } 824 if (s_code == e_code) 825 ps.ind_stmt = false; /* dont put extra indentation on line 826 * with '{' */ 827 if (ps.in_decl && ps.in_or_st) { /* this is either a structure 828 * declaration or an init */ 829 di_stack[ps.dec_nest++] = dec_ind; 830 /* ? dec_ind = 0; */ 831 } 832 else { 833 ps.decl_on_line = false; /* we can't be in the middle of 834 * a declaration, so don't do 835 * special indentation of 836 * comments */ 837 if (blanklines_after_declarations_at_proctop 838 && ps.in_parameter_declaration) 839 postfix_blankline_requested = 1; 840 ps.in_parameter_declaration = 0; 841 ps.in_decl = false; 842 } 843 dec_ind = 0; 844 parse(lbrace); /* let parser know about this */ 845 if (ps.want_blank) /* put a blank before '{' if '{' is not at 846 * start of line */ 847 *e_code++ = ' '; 848 ps.want_blank = false; 849 *e_code++ = '{'; 850 ps.just_saw_decl = 0; 851 break; 852 853 case rbrace: /* got a '}' */ 854 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be 855 * omitted in 856 * declarations */ 857 parse(semicolon); 858 if (ps.p_l_follow) {/* check for unclosed if, for, else. */ 859 diag2(1, "Unbalanced parens"); 860 ps.p_l_follow = 0; 861 sp_sw = false; 862 } 863 ps.just_saw_decl = 0; 864 ps.block_init_level--; 865 if (s_code != e_code && !ps.block_init) { /* '}' must be first on 866 * line */ 867 if (verbose) 868 diag2(0, "Line broken"); 869 dump_line(); 870 } 871 *e_code++ = '}'; 872 ps.want_blank = true; 873 ps.in_stmt = ps.ind_stmt = false; 874 if (ps.dec_nest > 0) { /* we are in multi-level structure 875 * declaration */ 876 dec_ind = di_stack[--ps.dec_nest]; 877 if (ps.dec_nest == 0 && !ps.in_parameter_declaration) 878 ps.just_saw_decl = 2; 879 ps.in_decl = true; 880 } 881 prefix_blankline_requested = 0; 882 parse(rbrace); /* let parser know about this */ 883 ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead 884 && ps.il[ps.tos] >= ps.ind_level; 885 if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0) 886 postfix_blankline_requested = 1; 887 break; 888 889 case swstmt: /* got keyword "switch" */ 890 sp_sw = true; 891 hd_type = swstmt; /* keep this for when we have seen the 892 * expression */ 893 goto copy_id; /* go move the token into buffer */ 894 895 case sp_paren: /* token is if, while, for */ 896 sp_sw = true; /* the interesting stuff is done after the 897 * expression is scanned */ 898 hd_type = (*token == 'i' ? ifstmt : 899 (*token == 'w' ? whilestmt : forstmt)); 900 901 /* 902 * remember the type of header for later use by parser 903 */ 904 goto copy_id; /* copy the token into line */ 905 906 case sp_nparen: /* got else, do */ 907 ps.in_stmt = false; 908 if (*token == 'e') { 909 if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) { 910 if (verbose) 911 diag2(0, "Line broken"); 912 dump_line();/* make sure this starts a line */ 913 ps.want_blank = false; 914 } 915 force_nl = true;/* also, following stuff must go onto new line */ 916 last_else = 1; 917 parse(elselit); 918 } 919 else { 920 if (e_code != s_code) { /* make sure this starts a line */ 921 if (verbose) 922 diag2(0, "Line broken"); 923 dump_line(); 924 ps.want_blank = false; 925 } 926 force_nl = true;/* also, following stuff must go onto new line */ 927 last_else = 0; 928 parse(dolit); 929 } 930 goto copy_id; /* move the token into line */ 931 932 case storage: 933 prefix_blankline_requested = 0; 934 goto copy_id; 935 936 case decl: /* we have a declaration type (int, etc.) */ 937 parse(decl); /* let parser worry about indentation */ 938 if (ps.last_token == rparen && ps.tos <= 1) { 939 if (s_code != e_code) { 940 dump_line(); 941 ps.want_blank = 0; 942 } 943 } 944 if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) { 945 ps.ind_level = ps.i_l_follow = 1; 946 ps.ind_stmt = 0; 947 } 948 ps.in_or_st = true; /* this might be a structure or initialization 949 * declaration */ 950 ps.in_decl = ps.decl_on_line = true; 951 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) 952 ps.just_saw_decl = 2; 953 prefix_blankline_requested = 0; 954 for (i = 0; token[i++];); /* get length of token */ 955 956 if (ps.ind_level == 0 || ps.dec_nest > 0) { 957 /* global variable or struct member in local variable */ 958 dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i; 959 tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0); 960 } else { 961 /* local variable */ 962 dec_ind = ps.local_decl_indent > 0 ? ps.local_decl_indent : i; 963 tabs_to_var = (use_tabs ? ps.local_decl_indent > 0 : 0); 964 } 965 goto copy_id; 966 967 case funcname: 968 case ident: /* got an identifier or constant */ 969 if (ps.in_decl) { /* if we are in a declaration, we must indent 970 * identifier */ 971 if (type_code != funcname || !procnames_start_line) { 972 if (!ps.block_init && !ps.dumped_decl_indent) { 973 if (troff) { 974 if (ps.want_blank) 975 *e_code++ = ' '; 976 sprintf(e_code, "\n.De %dp+\200p\n", dec_ind * 7); 977 e_code += strlen(e_code); 978 } else 979 indent_declaration(dec_ind, tabs_to_var); 980 ps.dumped_decl_indent = true; 981 ps.want_blank = false; 982 } 983 } else { 984 if (ps.want_blank && !(procnames_start_line && 985 type_code == funcname)) 986 *e_code++ = ' '; 987 ps.want_blank = false; 988 if (dec_ind && s_code != e_code) { 989 *e_code = '\0'; 990 dump_line(); 991 } 992 dec_ind = 0; 993 } 994 } 995 else if (sp_sw && ps.p_l_follow == 0) { 996 sp_sw = false; 997 force_nl = true; 998 ps.last_u_d = true; 999 ps.in_stmt = false; 1000 parse(hd_type); 1001 } 1002 copy_id: 1003 if (ps.want_blank) 1004 *e_code++ = ' '; 1005 if (troff && ps.keyword) { 1006 e_code = chfont(&bodyf, &keywordf, e_code); 1007 for (t_ptr = token; *t_ptr; ++t_ptr) { 1008 CHECK_SIZE_CODE; 1009 *e_code++ = keywordf.allcaps && islower(*t_ptr) 1010 ? toupper(*t_ptr) : *t_ptr; 1011 } 1012 e_code = chfont(&keywordf, &bodyf, e_code); 1013 } 1014 else 1015 for (t_ptr = token; *t_ptr; ++t_ptr) { 1016 CHECK_SIZE_CODE; 1017 *e_code++ = *t_ptr; 1018 } 1019 if (type_code != funcname) 1020 ps.want_blank = true; 1021 break; 1022 1023 case strpfx: 1024 if (ps.want_blank) 1025 *e_code++ = ' '; 1026 for (t_ptr = token; *t_ptr; ++t_ptr) { 1027 CHECK_SIZE_CODE; 1028 *e_code++ = *t_ptr; 1029 } 1030 ps.want_blank = false; 1031 break; 1032 1033 case period: /* treat a period kind of like a binary 1034 * operation */ 1035 *e_code++ = '.'; /* move the period into line */ 1036 ps.want_blank = false; /* dont put a blank after a period */ 1037 break; 1038 1039 case comma: 1040 ps.want_blank = (s_code != e_code); /* only put blank after comma 1041 * if comma does not start the 1042 * line */ 1043 if (ps.in_decl && is_procname == 0 && !ps.block_init && 1044 !ps.dumped_decl_indent) { 1045 /* indent leading commas and not the actual identifiers */ 1046 indent_declaration(dec_ind - 1, tabs_to_var); 1047 ps.dumped_decl_indent = true; 1048 } 1049 *e_code++ = ','; 1050 if (ps.p_l_follow == 0) { 1051 if (ps.block_init_level <= 0) 1052 ps.block_init = 0; 1053 if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8)) 1054 force_nl = true; 1055 } 1056 break; 1057 1058 case preesc: /* got the character '#' */ 1059 if ((s_com != e_com) || 1060 (s_lab != e_lab) || 1061 (s_code != e_code)) 1062 dump_line(); 1063 *e_lab++ = '#'; /* move whole line to 'label' buffer */ 1064 { 1065 int in_comment = 0; 1066 int com_start = 0; 1067 char quote = 0; 1068 int com_end = 0; 1069 1070 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 1071 buf_ptr++; 1072 if (buf_ptr >= buf_end) 1073 fill_buffer(); 1074 } 1075 while (*buf_ptr != '\n' || (in_comment && !had_eof)) { 1076 CHECK_SIZE_LAB; 1077 *e_lab = *buf_ptr++; 1078 if (buf_ptr >= buf_end) 1079 fill_buffer(); 1080 switch (*e_lab++) { 1081 case BACKSLASH: 1082 if (troff) 1083 *e_lab++ = BACKSLASH; 1084 if (!in_comment) { 1085 *e_lab++ = *buf_ptr++; 1086 if (buf_ptr >= buf_end) 1087 fill_buffer(); 1088 } 1089 break; 1090 case '/': 1091 if (*buf_ptr == '*' && !in_comment && !quote) { 1092 in_comment = 1; 1093 *e_lab++ = *buf_ptr++; 1094 com_start = e_lab - s_lab - 2; 1095 } 1096 break; 1097 case '"': 1098 if (quote == '"') 1099 quote = 0; 1100 break; 1101 case '\'': 1102 if (quote == '\'') 1103 quote = 0; 1104 break; 1105 case '*': 1106 if (*buf_ptr == '/' && in_comment) { 1107 in_comment = 0; 1108 *e_lab++ = *buf_ptr++; 1109 com_end = e_lab - s_lab; 1110 } 1111 break; 1112 } 1113 } 1114 1115 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1116 e_lab--; 1117 if (e_lab - s_lab == com_end && bp_save == NULL) { 1118 /* comment on preprocessor line */ 1119 if (sc_end == NULL) /* if this is the first comment, we 1120 * must set up the buffer */ 1121 sc_end = &(save_com[0]); 1122 else { 1123 *sc_end++ = '\n'; /* add newline between 1124 * comments */ 1125 *sc_end++ = ' '; 1126 --line_no; 1127 } 1128 bcopy(s_lab + com_start, sc_end, com_end - com_start); 1129 sc_end += com_end - com_start; 1130 if (sc_end >= &save_com[sc_size]) 1131 abort(); 1132 e_lab = s_lab + com_start; 1133 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1134 e_lab--; 1135 bp_save = buf_ptr; /* save current input buffer */ 1136 be_save = buf_end; 1137 buf_ptr = save_com; /* fix so that subsequent calls to 1138 * lexi will take tokens out of 1139 * save_com */ 1140 *sc_end++ = ' '; /* add trailing blank, just in case */ 1141 buf_end = sc_end; 1142 sc_end = NULL; 1143 } 1144 *e_lab = '\0'; /* null terminate line */ 1145 ps.pcase = false; 1146 } 1147 1148 if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */ 1149 if ((size_t)ifdef_level < nitems(state_stack)) { 1150 match_state[ifdef_level].tos = -1; 1151 state_stack[ifdef_level++] = ps; 1152 } 1153 else 1154 diag2(1, "#if stack overflow"); 1155 } 1156 else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */ 1157 if (ifdef_level <= 0) 1158 diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else"); 1159 else { 1160 match_state[ifdef_level - 1] = ps; 1161 ps = state_stack[ifdef_level - 1]; 1162 } 1163 } 1164 else if (strncmp(s_lab, "#endif", 6) == 0) { 1165 if (ifdef_level <= 0) 1166 diag2(1, "Unmatched #endif"); 1167 else 1168 ifdef_level--; 1169 } else { 1170 struct directives { 1171 int size; 1172 const char *string; 1173 } 1174 recognized[] = { 1175 {7, "include"}, 1176 {6, "define"}, 1177 {5, "undef"}, 1178 {4, "line"}, 1179 {5, "error"}, 1180 {6, "pragma"} 1181 }; 1182 int d = nitems(recognized); 1183 while (--d >= 0) 1184 if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0) 1185 break; 1186 if (d < 0) { 1187 diag2(1, "Unrecognized cpp directive"); 1188 break; 1189 } 1190 } 1191 if (blanklines_around_conditional_compilation) { 1192 postfix_blankline_requested++; 1193 n_real_blanklines = 0; 1194 } 1195 else { 1196 postfix_blankline_requested = 0; 1197 prefix_blankline_requested = 0; 1198 } 1199 break; /* subsequent processing of the newline 1200 * character will cause the line to be printed */ 1201 1202 case comment: /* we have gotten a / followed by * this is a biggie */ 1203 if (flushed_nl) { /* we should force a broken line here */ 1204 dump_line(); 1205 ps.want_blank = false; /* dont insert blank at line start */ 1206 force_nl = false; 1207 } 1208 pr_comment(); 1209 break; 1210 } /* end of big switch stmt */ 1211 1212 *e_code = '\0'; /* make sure code section is null terminated */ 1213 if (type_code != comment && type_code != newline && type_code != preesc) 1214 ps.last_token = type_code; 1215 } /* end of main while (1) loop */ 1216 } 1217 1218 /* 1219 * copy input file to backup file if in_name is /blah/blah/blah/file, then 1220 * backup file will be ".Bfile" then make the backup file the input and 1221 * original input file the output 1222 */ 1223 static void 1224 bakcopy(void) 1225 { 1226 int n, 1227 bakchn; 1228 char buff[8 * 1024]; 1229 const char *p; 1230 1231 /* construct file name .Bfile */ 1232 for (p = in_name; *p; p++); /* skip to end of string */ 1233 while (p > in_name && *p != '/') /* find last '/' */ 1234 p--; 1235 if (*p == '/') 1236 p++; 1237 sprintf(bakfile, "%s.BAK", p); 1238 1239 /* copy in_name to backup file */ 1240 bakchn = creat(bakfile, 0600); 1241 if (bakchn < 0) 1242 err(1, "%s", bakfile); 1243 while ((n = read(fileno(input), buff, sizeof(buff))) > 0) 1244 if (write(bakchn, buff, n) != n) 1245 err(1, "%s", bakfile); 1246 if (n < 0) 1247 err(1, "%s", in_name); 1248 close(bakchn); 1249 fclose(input); 1250 1251 /* re-open backup file as the input file */ 1252 input = fopen(bakfile, "r"); 1253 if (input == NULL) 1254 err(1, "%s", bakfile); 1255 /* now the original input file will be the output */ 1256 output = fopen(in_name, "w"); 1257 if (output == NULL) { 1258 unlink(bakfile); 1259 err(1, "%s", in_name); 1260 } 1261 } 1262 1263 static void 1264 indent_declaration(int cur_dec_ind, int tabs_to_var) 1265 { 1266 int pos = e_code - s_code; 1267 char *startpos = e_code; 1268 1269 /* 1270 * get the tab math right for indentations that are not multiples of 8 1271 */ 1272 if ((ps.ind_level * ps.ind_size) % 8 != 0) { 1273 pos += (ps.ind_level * ps.ind_size) % 8; 1274 cur_dec_ind += (ps.ind_level * ps.ind_size) % 8; 1275 } 1276 if (tabs_to_var) 1277 while ((pos & ~7) + 8 <= cur_dec_ind) { 1278 CHECK_SIZE_CODE; 1279 *e_code++ = '\t'; 1280 pos = (pos & ~7) + 8; 1281 } 1282 while (pos < cur_dec_ind) { 1283 CHECK_SIZE_CODE; 1284 *e_code++ = ' '; 1285 pos++; 1286 } 1287 if (e_code == startpos && ps.want_blank) { 1288 *e_code++ = ' '; 1289 ps.want_blank = false; 1290 } 1291 } 1292