xref: /freebsd/usr.bin/indent/indent.c (revision 7660b554bc59a07be0431c17e0e33815818baa69)
1 /*
2  * Copyright (c) 1985 Sun Microsystems, Inc.
3  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #ifndef lint
37 static const char copyright[] =
38 "@(#) Copyright (c) 1985 Sun Microsystems, Inc.\n\
39 @(#) Copyright (c) 1976 Board of Trustees of the University of Illinois.\n\
40 @(#) Copyright (c) 1980, 1993\n\
41 	The Regents of the University of California.  All rights reserved.\n";
42 #endif /* not lint */
43 
44 #if 0
45 #ifndef lint
46 static char sccsid[] = "@(#)indent.c	5.17 (Berkeley) 6/7/93";
47 #endif /* not lint */
48 #endif
49 
50 #include <sys/cdefs.h>
51 __FBSDID("$FreeBSD$");
52 
53 #include <sys/param.h>
54 #include <err.h>
55 #include <fcntl.h>
56 #include <unistd.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <ctype.h>
61 #include "indent_globs.h"
62 #include "indent_codes.h"
63 #include "indent.h"
64 
65 static void bakcopy(void);
66 
67 const char *in_name = "Standard Input";	/* will always point to name of input
68 					 * file */
69 const char *out_name = "Standard Output";	/* will always point to name
70 						 * of output file */
71 char        bakfile[MAXPATHLEN] = "";
72 
73 extern int  found_err;	/* flag set in diagN() on error */
74 
75 int
76 main(int argc, char **argv)
77 {
78 
79     int         dec_ind;	/* current indentation for declarations */
80     int         di_stack[20];	/* a stack of structure indentation levels */
81     int         flushed_nl;	/* used when buffering up comments to remember
82 				 * that a newline was passed over */
83     int         force_nl;	/* when true, code must be broken */
84     int         hd_type = 0;	/* used to store type of stmt for if (...),
85 				 * for (...), etc */
86     int i;		/* local loop counter */
87     int         scase;		/* set to true when we see a case, so we will
88 				 * know what to do with the following colon */
89     int         sp_sw;		/* when true, we are in the expression of
90 				 * if(...), while(...), etc. */
91     int         squest;		/* when this is positive, we have seen a ?
92 				 * without the matching : in a <c>?<s>:<s>
93 				 * construct */
94     const char *t_ptr;		/* used for copying tokens */
95     int         type_code;	/* the type of token, returned by lexi */
96 
97     int         last_else = 0;	/* true iff last keyword was an else */
98 
99 
100     /*-----------------------------------------------*\
101     |		      INITIALIZATION		      |
102     \*-----------------------------------------------*/
103 
104     found_err = 0;
105 
106     ps.p_stack[0] = stmt;	/* this is the parser's stack */
107     ps.last_nl = true;		/* this is true if the last thing scanned was
108 				 * a newline */
109     ps.last_token = semicolon;
110     combuf = (char *) malloc(bufsize);
111     if (combuf == NULL)
112 	err(1, NULL);
113     labbuf = (char *) malloc(bufsize);
114     if (labbuf == NULL)
115 	err(1, NULL);
116     codebuf = (char *) malloc(bufsize);
117     if (codebuf == NULL)
118 	err(1, NULL);
119     tokenbuf = (char *) malloc(bufsize);
120     if (tokenbuf == NULL)
121 	err(1, NULL);
122     l_com = combuf + bufsize - 5;
123     l_lab = labbuf + bufsize - 5;
124     l_code = codebuf + bufsize - 5;
125     l_token = tokenbuf + bufsize - 5;
126     combuf[0] = codebuf[0] = labbuf[0] = ' ';	/* set up code, label, and
127 						 * comment buffers */
128     combuf[1] = codebuf[1] = labbuf[1] = '\0';
129     ps.else_if = 1;		/* Default else-if special processing to on */
130     s_lab = e_lab = labbuf + 1;
131     s_code = e_code = codebuf + 1;
132     s_com = e_com = combuf + 1;
133     s_token = e_token = tokenbuf + 1;
134 
135     in_buffer = (char *) malloc(10);
136     if (in_buffer == NULL)
137 	err(1, NULL);
138     in_buffer_limit = in_buffer + 8;
139     buf_ptr = buf_end = in_buffer;
140     line_no = 1;
141     had_eof = ps.in_decl = ps.decl_on_line = break_comma = false;
142     sp_sw = force_nl = false;
143     ps.in_or_st = false;
144     ps.bl_line = true;
145     dec_ind = 0;
146     di_stack[ps.dec_nest = 0] = 0;
147     ps.want_blank = ps.in_stmt = ps.ind_stmt = false;
148 
149     scase = ps.pcase = false;
150     squest = 0;
151     sc_end = 0;
152     bp_save = 0;
153     be_save = 0;
154 
155     output = 0;
156 
157     /*--------------------------------------------------*\
158     |   		COMMAND LINE SCAN		 |
159     \*--------------------------------------------------*/
160 
161 #ifdef undef
162     max_col = 78;		/* -l78 */
163     lineup_to_parens = 1;	/* -lp */
164     ps.ljust_decl = 0;		/* -ndj */
165     ps.com_ind = 33;		/* -c33 */
166     star_comment_cont = 1;	/* -sc */
167     ps.ind_size = 8;		/* -i8 */
168     verbose = 0;
169     ps.decl_indent = 16;	/* -di16 */
170     ps.indent_parameters = 1;	/* -ip */
171     ps.decl_com_ind = 0;	/* if this is not set to some positive value
172 				 * by an arg, we will set this equal to
173 				 * ps.com_ind */
174     btype_2 = 1;		/* -br */
175     cuddle_else = 1;		/* -ce */
176     ps.unindent_displace = 0;	/* -d0 */
177     ps.case_indent = 0;		/* -cli0 */
178     format_block_comments = 1;	/* -fcb */
179     format_col1_comments = 1;	/* -fc1 */
180     procnames_start_line = 1;	/* -psl */
181     proc_calls_space = 0;	/* -npcs */
182     comment_delimiter_on_blankline = 1;	/* -cdb */
183     ps.leave_comma = 1;		/* -nbc */
184 #endif
185 
186     for (i = 1; i < argc; ++i)
187 	if (strcmp(argv[i], "-npro") == 0)
188 	    break;
189     set_defaults();
190     if (i >= argc)
191 	set_profile();
192 
193     for (i = 1; i < argc; ++i) {
194 
195 	/*
196 	 * look thru args (if any) for changes to defaults
197 	 */
198 	if (argv[i][0] != '-') {/* no flag on parameter */
199 	    if (input == 0) {	/* we must have the input file */
200 		in_name = argv[i];	/* remember name of input file */
201 		input = fopen(in_name, "r");
202 		if (input == 0)		/* check for open error */
203 			err(1, "%s", in_name);
204 		continue;
205 	    }
206 	    else if (output == 0) {	/* we have the output file */
207 		out_name = argv[i];	/* remember name of output file */
208 		if (strcmp(in_name, out_name) == 0) {	/* attempt to overwrite
209 							 * the file */
210 		    errx(1, "input and output files must be different");
211 		}
212 		output = fopen(out_name, "w");
213 		if (output == 0)	/* check for create error */
214 			err(1, "%s", out_name);
215 		continue;
216 	    }
217 	    errx(1, "unknown parameter: %s", argv[i]);
218 	}
219 	else
220 	    set_option(argv[i]);
221     }				/* end of for */
222     if (input == 0)
223 	input = stdin;
224     if (output == 0) {
225 	if (troff || input == stdin)
226 	    output = stdout;
227 	else {
228 	    out_name = in_name;
229 	    bakcopy();
230 	}
231     }
232     if (ps.com_ind <= 1)
233 	ps.com_ind = 2;		/* dont put normal comments before column 2 */
234     if (troff) {
235 	if (bodyf.font[0] == 0)
236 	    parsefont(&bodyf, "R");
237 	if (scomf.font[0] == 0)
238 	    parsefont(&scomf, "I");
239 	if (blkcomf.font[0] == 0)
240 	    blkcomf = scomf, blkcomf.size += 2;
241 	if (boxcomf.font[0] == 0)
242 	    boxcomf = blkcomf;
243 	if (stringf.font[0] == 0)
244 	    parsefont(&stringf, "L");
245 	if (keywordf.font[0] == 0)
246 	    parsefont(&keywordf, "B");
247 	writefdef(&bodyf, 'B');
248 	writefdef(&scomf, 'C');
249 	writefdef(&blkcomf, 'L');
250 	writefdef(&boxcomf, 'X');
251 	writefdef(&stringf, 'S');
252 	writefdef(&keywordf, 'K');
253     }
254     if (block_comment_max_col <= 0)
255 	block_comment_max_col = max_col;
256     if (ps.decl_com_ind <= 0)	/* if not specified by user, set this */
257 	ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind;
258     if (continuation_indent == 0)
259 	continuation_indent = ps.ind_size;
260     fill_buffer();		/* get first batch of stuff into input buffer */
261 
262     parse(semicolon);
263     {
264 	char *p = buf_ptr;
265 	int col = 1;
266 
267 	while (1) {
268 	    if (*p == ' ')
269 		col++;
270 	    else if (*p == '\t')
271 		col = ((col - 1) & ~7) + 9;
272 	    else
273 		break;
274 	    p++;
275 	}
276 	if (col > ps.ind_size)
277 	    ps.ind_level = ps.i_l_follow = col / ps.ind_size;
278     }
279     if (troff) {
280 	const char *p = in_name,
281 	           *beg = in_name;
282 
283 	while (*p)
284 	    if (*p++ == '/')
285 		beg = p;
286 	fprintf(output, ".Fn \"%s\"\n", beg);
287     }
288     /*
289      * START OF MAIN LOOP
290      */
291 
292     while (1) {			/* this is the main loop.  it will go until we
293 				 * reach eof */
294 	int         is_procname;
295 
296 	type_code = lexi();	/* lexi reads one token.  The actual
297 				 * characters read are stored in "token". lexi
298 				 * returns a code indicating the type of token */
299 	is_procname = ps.procname[0];
300 
301 	/*
302 	 * The following code moves everything following an if (), while (),
303 	 * else, etc. up to the start of the following stmt to a buffer. This
304 	 * allows proper handling of both kinds of brace placement.
305 	 */
306 
307 	flushed_nl = false;
308 	while (ps.search_brace) {	/* if we scanned an if(), while(),
309 					 * etc., we might need to copy stuff
310 					 * into a buffer we must loop, copying
311 					 * stuff into save_com, until we find
312 					 * the start of the stmt which follows
313 					 * the if, or whatever */
314 	    switch (type_code) {
315 	    case newline:
316 		++line_no;
317 		flushed_nl = true;
318 	    case form_feed:
319 		break;		/* form feeds and newlines found here will be
320 				 * ignored */
321 
322 	    case lbrace:	/* this is a brace that starts the compound
323 				 * stmt */
324 		if (sc_end == 0) {	/* ignore buffering if a comment wasnt
325 					 * stored up */
326 		    ps.search_brace = false;
327 		    goto check_type;
328 		}
329 		if (btype_2) {
330 		    save_com[0] = '{';	/* we either want to put the brace
331 					 * right after the if */
332 		    goto sw_buffer;	/* go to common code to get out of
333 					 * this loop */
334 		}
335 	    case comment:	/* we have a comment, so we must copy it into
336 				 * the buffer */
337 		if (!flushed_nl || sc_end != 0) {
338 		    if (sc_end == 0) {	/* if this is the first comment, we
339 					 * must set up the buffer */
340 			save_com[0] = save_com[1] = ' ';
341 			sc_end = &(save_com[2]);
342 		    }
343 		    else {
344 			*sc_end++ = '\n';	/* add newline between
345 						 * comments */
346 			*sc_end++ = ' ';
347 			--line_no;
348 		    }
349 		    *sc_end++ = '/';	/* copy in start of comment */
350 		    *sc_end++ = '*';
351 
352 		    for (;;) {	/* loop until we get to the end of the comment */
353 			*sc_end = *buf_ptr++;
354 			if (buf_ptr >= buf_end)
355 			    fill_buffer();
356 
357 			if (*sc_end++ == '*' && *buf_ptr == '/')
358 			    break;	/* we are at end of comment */
359 
360 			if (sc_end >= &(save_com[sc_size])) {	/* check for temp buffer
361 								 * overflow */
362 			    diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever");
363 			    fflush(output);
364 			    exit(1);
365 			}
366 		    }
367 		    *sc_end++ = '/';	/* add ending slash */
368 		    if (++buf_ptr >= buf_end)	/* get past / in buffer */
369 			fill_buffer();
370 		    break;
371 		}
372 	    default:		/* it is the start of a normal statement */
373 		if (flushed_nl)	/* if we flushed a newline, make sure it is
374 				 * put back */
375 		    force_nl = true;
376 		if ((type_code == sp_paren && *token == 'i'
377 			&& last_else && ps.else_if)
378 			|| (type_code == sp_nparen && *token == 'e'
379 			&& e_code != s_code && e_code[-1] == '}'))
380 		    force_nl = false;
381 
382 		if (sc_end == 0) {	/* ignore buffering if comment wasnt
383 					 * saved up */
384 		    ps.search_brace = false;
385 		    goto check_type;
386 		}
387 		if (force_nl) {	/* if we should insert a nl here, put it into
388 				 * the buffer */
389 		    force_nl = false;
390 		    --line_no;	/* this will be re-increased when the nl is
391 				 * read from the buffer */
392 		    *sc_end++ = '\n';
393 		    *sc_end++ = ' ';
394 		    if (verbose && !flushed_nl)	/* print error msg if the line
395 						 * was not already broken */
396 			diag2(0, "Line broken");
397 		    flushed_nl = false;
398 		}
399 		for (t_ptr = token; *t_ptr; ++t_ptr)
400 		    *sc_end++ = *t_ptr;	/* copy token into temp buffer */
401 		ps.procname[0] = 0;
402 
403 	sw_buffer:
404 		ps.search_brace = false;	/* stop looking for start of
405 						 * stmt */
406 		bp_save = buf_ptr;	/* save current input buffer */
407 		be_save = buf_end;
408 		buf_ptr = save_com;	/* fix so that subsequent calls to
409 					 * lexi will take tokens out of
410 					 * save_com */
411 		*sc_end++ = ' ';/* add trailing blank, just in case */
412 		buf_end = sc_end;
413 		sc_end = 0;
414 		break;
415 	    }			/* end of switch */
416 	    if (type_code != 0)	/* we must make this check, just in case there
417 				 * was an unexpected EOF */
418 		type_code = lexi();	/* read another token */
419 	    /* if (ps.search_brace) ps.procname[0] = 0; */
420 	    if ((is_procname = ps.procname[0]) && flushed_nl
421 		    && !procnames_start_line && ps.in_decl
422 		    && type_code == ident)
423 		flushed_nl = 0;
424 	}			/* end of while (search_brace) */
425 	last_else = 0;
426 check_type:
427 	if (type_code == 0) {	/* we got eof */
428 	    if (s_lab != e_lab || s_code != e_code
429 		    || s_com != e_com)	/* must dump end of line */
430 		dump_line();
431 	    if (ps.tos > 1)	/* check for balanced braces */
432 		diag2(1, "Stuff missing from end of file");
433 
434 	    if (verbose) {
435 		printf("There were %d output lines and %d comments\n",
436 		       ps.out_lines, ps.out_coms);
437 		printf("(Lines with comments)/(Lines with code): %6.3f\n",
438 		       (1.0 * ps.com_lines) / code_lines);
439 	    }
440 	    fflush(output);
441 	    exit(found_err);
442 	}
443 	if (
444 		(type_code != comment) &&
445 		(type_code != newline) &&
446 		(type_code != preesc) &&
447 		(type_code != form_feed)) {
448 	    if (force_nl &&
449 		    (type_code != semicolon) &&
450 		    (type_code != lbrace || !btype_2)) {
451 		/* we should force a broken line here */
452 		if (verbose && !flushed_nl)
453 		    diag2(0, "Line broken");
454 		flushed_nl = false;
455 		dump_line();
456 		ps.want_blank = false;	/* dont insert blank at line start */
457 		force_nl = false;
458 	    }
459 	    ps.in_stmt = true;	/* turn on flag which causes an extra level of
460 				 * indentation. this is turned off by a ; or
461 				 * '}' */
462 	    if (s_com != e_com) {	/* the turkey has embedded a comment
463 					 * in a line. fix it */
464 		*e_code++ = ' ';
465 		for (t_ptr = s_com; *t_ptr; ++t_ptr) {
466 		    CHECK_SIZE_CODE;
467 		    *e_code++ = *t_ptr;
468 		}
469 		*e_code++ = ' ';
470 		*e_code = '\0';	/* null terminate code sect */
471 		ps.want_blank = false;
472 		e_com = s_com;
473 	    }
474 	}
475 	else if (type_code != comment)	/* preserve force_nl thru a comment */
476 	    force_nl = false;	/* cancel forced newline after newline, form
477 				 * feed, etc */
478 
479 
480 
481 	/*-----------------------------------------------------*\
482 	|	   do switch on type of token scanned		|
483 	\*-----------------------------------------------------*/
484 	CHECK_SIZE_CODE;
485 	switch (type_code) {	/* now, decide what to do with the token */
486 
487 	case form_feed:	/* found a form feed in line */
488 	    ps.use_ff = true;	/* a form feed is treated much like a newline */
489 	    dump_line();
490 	    ps.want_blank = false;
491 	    break;
492 
493 	case newline:
494 	    if (ps.last_token != comma || ps.p_l_follow > 0
495 		    || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) {
496 		dump_line();
497 		ps.want_blank = false;
498 	    }
499 	    ++line_no;		/* keep track of input line number */
500 	    break;
501 
502 	case lparen:		/* got a '(' or '[' */
503 	    ++ps.p_l_follow;	/* count parens to make Healy happy */
504 	    if (ps.want_blank && *token != '[' &&
505 		    (ps.last_token != ident || proc_calls_space
506 	      || (ps.its_a_keyword && (!ps.sizeof_keyword || Bill_Shannon))))
507 		*e_code++ = ' ';
508 	    if (ps.in_decl && !ps.block_init)
509 		if (troff && !ps.dumped_decl_indent && !is_procname && ps.last_token == decl) {
510 		    ps.dumped_decl_indent = 1;
511 		    sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token);
512 		    e_code += strlen(e_code);
513 		}
514 		else {
515 		    while ((e_code - s_code) < dec_ind) {
516 			CHECK_SIZE_CODE;
517 			*e_code++ = ' ';
518 		    }
519 		    *e_code++ = token[0];
520 		}
521 	    else
522 		*e_code++ = token[0];
523 	    ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code;
524 	    if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent
525 		    && ps.paren_indents[0] < 2 * ps.ind_size)
526 		ps.paren_indents[0] = 2 * ps.ind_size;
527 	    ps.want_blank = false;
528 	    if (ps.in_or_st && *token == '(' && ps.tos <= 2) {
529 		/*
530 		 * this is a kluge to make sure that declarations will be
531 		 * aligned right if proc decl has an explicit type on it, i.e.
532 		 * "int a(x) {..."
533 		 */
534 		parse(semicolon);	/* I said this was a kluge... */
535 		ps.in_or_st = false;	/* turn off flag for structure decl or
536 					 * initialization */
537 	    }
538 	    if (ps.sizeof_keyword)
539 		ps.sizeof_mask |= 1 << ps.p_l_follow;
540 	    break;
541 
542 	case rparen:		/* got a ')' or ']' */
543 	    rparen_count--;
544 	    if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.sizeof_mask) {
545 		ps.last_u_d = true;
546 		ps.cast_mask &= (1 << ps.p_l_follow) - 1;
547 		ps.want_blank = false;
548 	    } else
549 		ps.want_blank = true;
550 	    ps.sizeof_mask &= (1 << ps.p_l_follow) - 1;
551 	    if (--ps.p_l_follow < 0) {
552 		ps.p_l_follow = 0;
553 		diag3(0, "Extra %c", *token);
554 	    }
555 	    if (e_code == s_code)	/* if the paren starts the line */
556 		ps.paren_level = ps.p_l_follow;	/* then indent it */
557 
558 	    *e_code++ = token[0];
559 
560 	    if (sp_sw && (ps.p_l_follow == 0)) {	/* check for end of if
561 							 * (...), or some such */
562 		sp_sw = false;
563 		force_nl = true;/* must force newline after if */
564 		ps.last_u_d = true;	/* inform lexi that a following
565 					 * operator is unary */
566 		ps.in_stmt = false;	/* dont use stmt continuation
567 					 * indentation */
568 
569 		parse(hd_type);	/* let parser worry about if, or whatever */
570 	    }
571 	    ps.search_brace = btype_2;	/* this should insure that constructs
572 					 * such as main(){...} and int[]{...}
573 					 * have their braces put in the right
574 					 * place */
575 	    break;
576 
577 	case unary_op:		/* this could be any unary operation */
578 	    if (ps.want_blank)
579 		*e_code++ = ' ';
580 
581 	    if (troff && !ps.dumped_decl_indent && ps.in_decl && !is_procname) {
582 		sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token);
583 		ps.dumped_decl_indent = 1;
584 		e_code += strlen(e_code);
585 	    }
586 	    else {
587 		const char *res = token;
588 
589 		if (ps.in_decl && !ps.block_init) {	/* if this is a unary op
590 							 * in a declaration, we
591 							 * should indent this
592 							 * token */
593 		    for (i = 0; token[i]; ++i);	/* find length of token */
594 		    while ((e_code - s_code) < (dec_ind - i)) {
595 			CHECK_SIZE_CODE;
596 			*e_code++ = ' ';	/* pad it */
597 		    }
598 		}
599 		if (troff && token[0] == '-' && token[1] == '>')
600 		    res = "\\(->";
601 		for (t_ptr = res; *t_ptr; ++t_ptr) {
602 		    CHECK_SIZE_CODE;
603 		    *e_code++ = *t_ptr;
604 		}
605 	    }
606 	    ps.want_blank = false;
607 	    break;
608 
609 	case binary_op:	/* any binary operation */
610 	    if (ps.want_blank)
611 		*e_code++ = ' ';
612 	    {
613 		const char *res = token;
614 
615 		if (troff)
616 		    switch (token[0]) {
617 		    case '<':
618 			if (token[1] == '=')
619 			    res = "\\(<=";
620 			break;
621 		    case '>':
622 			if (token[1] == '=')
623 			    res = "\\(>=";
624 			break;
625 		    case '!':
626 			if (token[1] == '=')
627 			    res = "\\(!=";
628 			break;
629 		    case '|':
630 			if (token[1] == '|')
631 			    res = "\\(br\\(br";
632 			else if (token[1] == 0)
633 			    res = "\\(br";
634 			break;
635 		    }
636 		for (t_ptr = res; *t_ptr; ++t_ptr) {
637 		    CHECK_SIZE_CODE;
638 		    *e_code++ = *t_ptr;	/* move the operator */
639 		}
640 	    }
641 	    ps.want_blank = true;
642 	    break;
643 
644 	case postop:		/* got a trailing ++ or -- */
645 	    *e_code++ = token[0];
646 	    *e_code++ = token[1];
647 	    ps.want_blank = true;
648 	    break;
649 
650 	case question:		/* got a ? */
651 	    squest++;		/* this will be used when a later colon
652 				 * appears so we can distinguish the
653 				 * <c>?<n>:<n> construct */
654 	    if (ps.want_blank)
655 		*e_code++ = ' ';
656 	    *e_code++ = '?';
657 	    ps.want_blank = true;
658 	    break;
659 
660 	case casestmt:		/* got word 'case' or 'default' */
661 	    scase = true;	/* so we can process the later colon properly */
662 	    goto copy_id;
663 
664 	case colon:		/* got a ':' */
665 	    if (squest > 0) {	/* it is part of the <c>?<n>: <n> construct */
666 		--squest;
667 		if (ps.want_blank)
668 		    *e_code++ = ' ';
669 		*e_code++ = ':';
670 		ps.want_blank = true;
671 		break;
672 	    }
673 	    if (ps.in_decl) {
674 		*e_code++ = ':';
675 		ps.want_blank = false;
676 		break;
677 	    }
678 	    ps.in_stmt = false;	/* seeing a label does not imply we are in a
679 				 * stmt */
680 	    for (t_ptr = s_code; *t_ptr; ++t_ptr)
681 		*e_lab++ = *t_ptr;	/* turn everything so far into a label */
682 	    e_code = s_code;
683 	    *e_lab++ = ':';
684 	    *e_lab++ = ' ';
685 	    *e_lab = '\0';
686 
687 	    force_nl = ps.pcase = scase;	/* ps.pcase will be used by
688 						 * dump_line to decide how to
689 						 * indent the label. force_nl
690 						 * will force a case n: to be
691 						 * on a line by itself */
692 	    scase = false;
693 	    ps.want_blank = false;
694 	    break;
695 
696 	case semicolon:	/* got a ';' */
697 	    ps.in_or_st = false;/* we are not in an initialization or
698 				 * structure declaration */
699 	    scase = false;	/* these will only need resetting in an error */
700 	    squest = 0;
701 	    if (ps.last_token == rparen && rparen_count == 0)
702 		ps.in_parameter_declaration = 0;
703 	    ps.cast_mask = 0;
704 	    ps.sizeof_mask = 0;
705 	    ps.block_init = 0;
706 	    ps.block_init_level = 0;
707 	    ps.just_saw_decl--;
708 
709 	    if (ps.in_decl && s_code == e_code && !ps.block_init)
710 		while ((e_code - s_code) < (dec_ind - 1)) {
711 		    CHECK_SIZE_CODE;
712 		    *e_code++ = ' ';
713 		}
714 
715 	    ps.in_decl = (ps.dec_nest > 0);	/* if we were in a first level
716 						 * structure declaration, we
717 						 * arent any more */
718 
719 	    if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) {
720 
721 		/*
722 		 * This should be true iff there were unbalanced parens in the
723 		 * stmt.  It is a bit complicated, because the semicolon might
724 		 * be in a for stmt
725 		 */
726 		diag2(1, "Unbalanced parens");
727 		ps.p_l_follow = 0;
728 		if (sp_sw) {	/* this is a check for an if, while, etc. with
729 				 * unbalanced parens */
730 		    sp_sw = false;
731 		    parse(hd_type);	/* dont lose the if, or whatever */
732 		}
733 	    }
734 	    *e_code++ = ';';
735 	    ps.want_blank = true;
736 	    ps.in_stmt = (ps.p_l_follow > 0);	/* we are no longer in the
737 						 * middle of a stmt */
738 
739 	    if (!sp_sw) {	/* if not if for (;;) */
740 		parse(semicolon);	/* let parser know about end of stmt */
741 		force_nl = true;/* force newline after an end of stmt */
742 	    }
743 	    break;
744 
745 	case lbrace:		/* got a '{' */
746 	    ps.in_stmt = false;	/* dont indent the {} */
747 	    if (!ps.block_init)
748 		force_nl = true;/* force other stuff on same line as '{' onto
749 				 * new line */
750 	    else if (ps.block_init_level <= 0)
751 		ps.block_init_level = 1;
752 	    else
753 		ps.block_init_level++;
754 
755 	    if (s_code != e_code && !ps.block_init) {
756 		if (!btype_2) {
757 		    dump_line();
758 		    ps.want_blank = false;
759 		}
760 		else if (ps.in_parameter_declaration && !ps.in_or_st) {
761 		    ps.i_l_follow = 0;
762 		    dump_line();
763 		    ps.want_blank = false;
764 		}
765 	    }
766 	    if (ps.in_parameter_declaration)
767 		prefix_blankline_requested = 0;
768 
769 	    if (ps.p_l_follow > 0) {	/* check for preceding unbalanced
770 					 * parens */
771 		diag2(1, "Unbalanced parens");
772 		ps.p_l_follow = 0;
773 		if (sp_sw) {	/* check for unclosed if, for, etc. */
774 		    sp_sw = false;
775 		    parse(hd_type);
776 		    ps.ind_level = ps.i_l_follow;
777 		}
778 	    }
779 	    if (s_code == e_code)
780 		ps.ind_stmt = false;	/* dont put extra indentation on line
781 					 * with '{' */
782 	    if (ps.in_decl && ps.in_or_st) {	/* this is either a structure
783 						 * declaration or an init */
784 		di_stack[ps.dec_nest++] = dec_ind;
785 		/* ?		dec_ind = 0; */
786 	    }
787 	    else {
788 		ps.decl_on_line = false;	/* we cant be in the middle of
789 						 * a declaration, so dont do
790 						 * special indentation of
791 						 * comments */
792 		if (blanklines_after_declarations_at_proctop
793 			&& ps.in_parameter_declaration)
794 		    postfix_blankline_requested = 1;
795 		ps.in_parameter_declaration = 0;
796 	    }
797 	    dec_ind = 0;
798 	    parse(lbrace);	/* let parser know about this */
799 	    if (ps.want_blank)	/* put a blank before '{' if '{' is not at
800 				 * start of line */
801 		*e_code++ = ' ';
802 	    ps.want_blank = false;
803 	    *e_code++ = '{';
804 	    ps.just_saw_decl = 0;
805 	    break;
806 
807 	case rbrace:		/* got a '}' */
808 	    if (ps.p_stack[ps.tos] == decl && !ps.block_init)	/* semicolons can be
809 								 * omitted in
810 								 * declarations */
811 		parse(semicolon);
812 	    if (ps.p_l_follow) {/* check for unclosed if, for, else. */
813 		diag2(1, "Unbalanced parens");
814 		ps.p_l_follow = 0;
815 		sp_sw = false;
816 	    }
817 	    ps.just_saw_decl = 0;
818 	    ps.block_init_level--;
819 	    if (s_code != e_code && !ps.block_init) {	/* '}' must be first on
820 							 * line */
821 		if (verbose)
822 		    diag2(0, "Line broken");
823 		dump_line();
824 	    }
825 	    *e_code++ = '}';
826 	    ps.want_blank = true;
827 	    ps.in_stmt = ps.ind_stmt = false;
828 	    if (ps.dec_nest > 0) {	/* we are in multi-level structure
829 					 * declaration */
830 		dec_ind = di_stack[--ps.dec_nest];
831 		if (ps.dec_nest == 0 && !ps.in_parameter_declaration)
832 		    ps.just_saw_decl = 2;
833 		ps.in_decl = true;
834 	    }
835 	    prefix_blankline_requested = 0;
836 	    parse(rbrace);	/* let parser know about this */
837 	    ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead
838 		&& ps.il[ps.tos] >= ps.ind_level;
839 	    if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0)
840 		postfix_blankline_requested = 1;
841 	    break;
842 
843 	case swstmt:		/* got keyword "switch" */
844 	    sp_sw = true;
845 	    hd_type = swstmt;	/* keep this for when we have seen the
846 				 * expression */
847 	    goto copy_id;	/* go move the token into buffer */
848 
849 	case sp_paren:		/* token is if, while, for */
850 	    sp_sw = true;	/* the interesting stuff is done after the
851 				 * expression is scanned */
852 	    hd_type = (*token == 'i' ? ifstmt :
853 		       (*token == 'w' ? whilestmt : forstmt));
854 
855 	    /*
856 	     * remember the type of header for later use by parser
857 	     */
858 	    goto copy_id;	/* copy the token into line */
859 
860 	case sp_nparen:	/* got else, do */
861 	    ps.in_stmt = false;
862 	    if (*token == 'e') {
863 		if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) {
864 		    if (verbose)
865 			diag2(0, "Line broken");
866 		    dump_line();/* make sure this starts a line */
867 		    ps.want_blank = false;
868 		}
869 		force_nl = true;/* also, following stuff must go onto new line */
870 		last_else = 1;
871 		parse(elselit);
872 	    }
873 	    else {
874 		if (e_code != s_code) {	/* make sure this starts a line */
875 		    if (verbose)
876 			diag2(0, "Line broken");
877 		    dump_line();
878 		    ps.want_blank = false;
879 		}
880 		force_nl = true;/* also, following stuff must go onto new line */
881 		last_else = 0;
882 		parse(dolit);
883 	    }
884 	    goto copy_id;	/* move the token into line */
885 
886 	case decl:		/* we have a declaration type (int, register,
887 				 * etc.) */
888 	    parse(decl);	/* let parser worry about indentation */
889 	    if (ps.last_token == rparen && ps.tos <= 1) {
890 		ps.in_parameter_declaration = 1;
891 		if (s_code != e_code) {
892 		    dump_line();
893 		    ps.want_blank = 0;
894 		}
895 	    }
896 	    if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) {
897 		ps.ind_level = ps.i_l_follow = 1;
898 		ps.ind_stmt = 0;
899 	    }
900 	    ps.in_or_st = true;	/* this might be a structure or initialization
901 				 * declaration */
902 	    ps.in_decl = ps.decl_on_line = true;
903 	    if ( /* !ps.in_or_st && */ ps.dec_nest <= 0)
904 		ps.just_saw_decl = 2;
905 	    prefix_blankline_requested = 0;
906 	    for (i = 0; token[i++];);	/* get length of token */
907 
908 	    /*
909 	     * dec_ind = e_code - s_code + (ps.decl_indent>i ? ps.decl_indent
910 	     * : i);
911 	     */
912 	    dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i;
913 	    goto copy_id;
914 
915 	case ident:		/* got an identifier or constant */
916 	    if (ps.in_decl) {	/* if we are in a declaration, we must indent
917 				 * identifier */
918 		if (ps.want_blank)
919 		    *e_code++ = ' ';
920 		ps.want_blank = false;
921 		if (is_procname == 0 || !procnames_start_line) {
922 		    if (!ps.block_init) {
923 			if (troff && !ps.dumped_decl_indent) {
924 			    sprintf(e_code, "\n.De %dp+\200p\n", dec_ind * 7);
925 			    ps.dumped_decl_indent = 1;
926 			    e_code += strlen(e_code);
927 			} else {
928 			    while ((e_code - s_code) < dec_ind) {
929 				CHECK_SIZE_CODE;
930 				*e_code++ = ' ';
931 			    }
932 			}
933 		    }
934 		} else {
935 		    if (dec_ind && s_code != e_code)
936 			dump_line();
937 		    dec_ind = 0;
938 		    ps.want_blank = false;
939 		}
940 	    }
941 	    else if (sp_sw && ps.p_l_follow == 0) {
942 		sp_sw = false;
943 		force_nl = true;
944 		ps.last_u_d = true;
945 		ps.in_stmt = false;
946 		parse(hd_type);
947 	    }
948     copy_id:
949 	    if (ps.want_blank)
950 		*e_code++ = ' ';
951 	    if (troff && ps.its_a_keyword) {
952 		e_code = chfont(&bodyf, &keywordf, e_code);
953 		for (t_ptr = token; *t_ptr; ++t_ptr) {
954 		    CHECK_SIZE_CODE;
955 		    *e_code++ = keywordf.allcaps && islower(*t_ptr)
956 			? toupper(*t_ptr) : *t_ptr;
957 		}
958 		e_code = chfont(&keywordf, &bodyf, e_code);
959 	    }
960 	    else
961 		for (t_ptr = token; *t_ptr; ++t_ptr) {
962 		    CHECK_SIZE_CODE;
963 		    *e_code++ = *t_ptr;
964 		}
965 	    ps.want_blank = true;
966 	    break;
967 
968 	case period:		/* treat a period kind of like a binary
969 				 * operation */
970 	    *e_code++ = '.';	/* move the period into line */
971 	    ps.want_blank = false;	/* dont put a blank after a period */
972 	    break;
973 
974 	case comma:
975 	    ps.want_blank = (s_code != e_code);	/* only put blank after comma
976 						 * if comma does not start the
977 						 * line */
978 	    if (ps.in_decl && is_procname == 0 && !ps.block_init)
979 		while ((e_code - s_code) < (dec_ind - 1)) {
980 		    CHECK_SIZE_CODE;
981 		    *e_code++ = ' ';
982 		}
983 
984 	    *e_code++ = ',';
985 	    if (ps.p_l_follow == 0) {
986 		if (ps.block_init_level <= 0)
987 		    ps.block_init = 0;
988 		if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8))
989 		    force_nl = true;
990 	    }
991 	    break;
992 
993 	case preesc:		/* got the character '#' */
994 	    if ((s_com != e_com) ||
995 		    (s_lab != e_lab) ||
996 		    (s_code != e_code))
997 		dump_line();
998 	    *e_lab++ = '#';	/* move whole line to 'label' buffer */
999 	    {
1000 		int         in_comment = 0;
1001 		int         com_start = 0;
1002 		char        quote = 0;
1003 		int         com_end = 0;
1004 
1005 		while (*buf_ptr == ' ' || *buf_ptr == '\t') {
1006 		    buf_ptr++;
1007 		    if (buf_ptr >= buf_end)
1008 			fill_buffer();
1009 		}
1010 		while (*buf_ptr != '\n' || (in_comment && !had_eof)) {
1011 		    CHECK_SIZE_LAB;
1012 		    *e_lab = *buf_ptr++;
1013 		    if (buf_ptr >= buf_end)
1014 			fill_buffer();
1015 		    switch (*e_lab++) {
1016 		    case BACKSLASH:
1017 			if (troff)
1018 			    *e_lab++ = BACKSLASH;
1019 			if (!in_comment) {
1020 			    *e_lab++ = *buf_ptr++;
1021 			    if (buf_ptr >= buf_end)
1022 				fill_buffer();
1023 			}
1024 			break;
1025 		    case '/':
1026 			if (*buf_ptr == '*' && !in_comment && !quote) {
1027 			    in_comment = 1;
1028 			    *e_lab++ = *buf_ptr++;
1029 			    com_start = e_lab - s_lab - 2;
1030 			}
1031 			break;
1032 		    case '"':
1033 			if (quote == '"')
1034 			    quote = 0;
1035 			break;
1036 		    case '\'':
1037 			if (quote == '\'')
1038 			    quote = 0;
1039 			break;
1040 		    case '*':
1041 			if (*buf_ptr == '/' && in_comment) {
1042 			    in_comment = 0;
1043 			    *e_lab++ = *buf_ptr++;
1044 			    com_end = e_lab - s_lab;
1045 			}
1046 			break;
1047 		    }
1048 		}
1049 
1050 		while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1051 		    e_lab--;
1052 		if (e_lab - s_lab == com_end && bp_save == 0) {	/* comment on
1053 								 * preprocessor line */
1054 		    if (sc_end == 0)	/* if this is the first comment, we
1055 					 * must set up the buffer */
1056 			sc_end = &(save_com[0]);
1057 		    else {
1058 			*sc_end++ = '\n';	/* add newline between
1059 						 * comments */
1060 			*sc_end++ = ' ';
1061 			--line_no;
1062 		    }
1063 		    bcopy(s_lab + com_start, sc_end, com_end - com_start);
1064 		    sc_end += com_end - com_start;
1065 		    if (sc_end >= &save_com[sc_size])
1066 			abort();
1067 		    e_lab = s_lab + com_start;
1068 		    while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1069 			e_lab--;
1070 		    bp_save = buf_ptr;	/* save current input buffer */
1071 		    be_save = buf_end;
1072 		    buf_ptr = save_com;	/* fix so that subsequent calls to
1073 					 * lexi will take tokens out of
1074 					 * save_com */
1075 		    *sc_end++ = ' ';	/* add trailing blank, just in case */
1076 		    buf_end = sc_end;
1077 		    sc_end = 0;
1078 		}
1079 		*e_lab = '\0';	/* null terminate line */
1080 		ps.pcase = false;
1081 	    }
1082 
1083 	    if (strncmp(s_lab, "#if", 3) == 0) {
1084 		if (blanklines_around_conditional_compilation) {
1085 		    int c;
1086 		    prefix_blankline_requested++;
1087 		    while ((c = getc(input)) == '\n');
1088 		    ungetc(c, input);
1089 		}
1090 		if ((size_t)ifdef_level < sizeof(state_stack)/sizeof(state_stack[0])) {
1091 		    match_state[ifdef_level].tos = -1;
1092 		    state_stack[ifdef_level++] = ps;
1093 		}
1094 		else
1095 		    diag2(1, "#if stack overflow");
1096 	    }
1097 	    else if (strncmp(s_lab, "#else", 5) == 0)
1098 		if (ifdef_level <= 0)
1099 		    diag2(1, "Unmatched #else");
1100 		else {
1101 		    match_state[ifdef_level - 1] = ps;
1102 		    ps = state_stack[ifdef_level - 1];
1103 		}
1104 	    else if (strncmp(s_lab, "#endif", 6) == 0) {
1105 		if (ifdef_level <= 0)
1106 		    diag2(1, "Unmatched #endif");
1107 		else {
1108 		    ifdef_level--;
1109 
1110 #ifdef undef
1111 		    /*
1112 		     * This match needs to be more intelligent before the
1113 		     * message is useful
1114 		     */
1115 		    if (match_state[ifdef_level].tos >= 0
1116 			  && bcmp(&ps, &match_state[ifdef_level], sizeof ps))
1117 			diag2(0, "Syntactically inconsistent #ifdef alternatives");
1118 #endif
1119 		}
1120 		if (blanklines_around_conditional_compilation) {
1121 		    postfix_blankline_requested++;
1122 		    n_real_blanklines = 0;
1123 		}
1124 	    }
1125 	    break;		/* subsequent processing of the newline
1126 				 * character will cause the line to be printed */
1127 
1128 	case comment:		/* we have gotten a / followed by * this is a biggie */
1129 	    if (flushed_nl) {	/* we should force a broken line here */
1130 		flushed_nl = false;
1131 		dump_line();
1132 		ps.want_blank = false;	/* dont insert blank at line start */
1133 		force_nl = false;
1134 	    }
1135 	    pr_comment();
1136 	    break;
1137 	}			/* end of big switch stmt */
1138 
1139 	*e_code = '\0';		/* make sure code section is null terminated */
1140 	if (type_code != comment && type_code != newline && type_code != preesc)
1141 	    ps.last_token = type_code;
1142     }				/* end of main while (1) loop */
1143 }
1144 
1145 /*
1146  * copy input file to backup file if in_name is /blah/blah/blah/file, then
1147  * backup file will be ".Bfile" then make the backup file the input and
1148  * original input file the output
1149  */
1150 static void
1151 bakcopy(void)
1152 {
1153     int         n,
1154                 bakchn;
1155     char        buff[8 * 1024];
1156     const char *p;
1157 
1158     /* construct file name .Bfile */
1159     for (p = in_name; *p; p++);	/* skip to end of string */
1160     while (p > in_name && *p != '/')	/* find last '/' */
1161 	p--;
1162     if (*p == '/')
1163 	p++;
1164     sprintf(bakfile, "%s.BAK", p);
1165 
1166     /* copy in_name to backup file */
1167     bakchn = creat(bakfile, 0600);
1168     if (bakchn < 0)
1169 	err(1, "%s", bakfile);
1170     while ((n = read(fileno(input), buff, sizeof buff)) != 0)
1171 	if (write(bakchn, buff, n) != n)
1172 	    err(1, "%s", bakfile);
1173     if (n < 0)
1174 	err(1, "%s", in_name);
1175     close(bakchn);
1176     fclose(input);
1177 
1178     /* re-open backup file as the input file */
1179     input = fopen(bakfile, "r");
1180     if (input == 0)
1181 	err(1, "%s", bakfile);
1182     /* now the original input file will be the output */
1183     output = fopen(in_name, "w");
1184     if (output == 0) {
1185 	unlink(bakfile);
1186 	err(1, "%s", in_name);
1187     }
1188 }
1189