xref: /freebsd/usr.bin/fmt/fmt.c (revision 3f8da92bc857c1d7c3f9397facf2863914da82d0)
1 /*
2  * Copyright (c) 1980, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 static char copyright[] =
36 "@(#) Copyright (c) 1980, 1993\n\
37 	The Regents of the University of California.  All rights reserved.\n";
38 #endif /* not lint */
39 
40 #ifndef lint
41 static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
42 #endif /* not lint */
43 
44 #include <stdio.h>
45 #include <ctype.h>
46 #include <locale.h>
47 #include <stdlib.h>
48 
49 /*
50  * fmt -- format the concatenation of input files or standard input
51  * onto standard output.  Designed for use with Mail ~|
52  *
53  * Syntax : fmt [ goal [ max ] ] [ name ... ]
54  * Authors: Kurt Shoens (UCB) 12/7/78;
55  *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
56  */
57 
58 /* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
59  * #define	LENGTH	72		Max line length in output
60  */
61 #define	NOSTR	((char *) 0)	/* Null string pointer for lint */
62 
63 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
64 #define GOAL_LENGTH 65
65 #define MAX_LENGTH 75
66 int	goal_length;		/* Target or goal line length in output */
67 int	max_length;		/* Max line length in output */
68 int	pfx;			/* Current leading blank count */
69 int	lineno;			/* Current input line */
70 int	mark;			/* Last place we saw a head line */
71 int	center;
72 
73 char	*malloc();		/* for lint . . . */
74 char	*headnames[] = {"To", "Subject", "Cc", 0};
75 
76 /*
77  * Drive the whole formatter by managing input files.  Also,
78  * cause initialization of the output stuff and flush it out
79  * at the end.
80  */
81 
82 main(argc, argv)
83 	int argc;
84 	char **argv;
85 {
86 	register FILE *fi;
87 	register int errs = 0;
88 	int number;		/* LIZ@UOM 6/18/85 */
89 
90 	(void) setlocale(LC_CTYPE, "");
91 
92 	goal_length = GOAL_LENGTH;
93 	max_length = MAX_LENGTH;
94 	setout();
95 	lineno = 1;
96 	mark = -10;
97 	/*
98 	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
99 	 */
100 	if (argc > 1 && !strcmp(argv[1], "-c")) {
101 		center++;
102 		argc--;
103 		argv++;
104 	}
105 	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
106 		argv++;
107 		argc--;
108 		goal_length = number;
109 		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
110 			argv++;
111 			argc--;
112 			max_length = number;
113 		}
114 	}
115 	if (max_length <= goal_length) {
116 		fprintf(stderr, "Max length must be greater than %s\n",
117 			"goal length");
118 		exit(1);
119 	}
120 	if (argc < 2) {
121 		fmt(stdin);
122 		oflush();
123 		exit(0);
124 	}
125 	while (--argc) {
126 		if ((fi = fopen(*++argv, "r")) == NULL) {
127 			perror(*argv);
128 			errs++;
129 			continue;
130 		}
131 		fmt(fi);
132 		fclose(fi);
133 	}
134 	oflush();
135 	exit(errs);
136 }
137 
138 /*
139  * Read up characters from the passed input file, forming lines,
140  * doing ^H processing, expanding tabs, stripping trailing blanks,
141  * and sending each line down for analysis.
142  */
143 fmt(fi)
144 	FILE *fi;
145 {
146 	static char *linebuf = 0, *canonb = 0;
147 	register char *cp, *cp2, cc;
148 	register int c, col;
149 #define CHUNKSIZE 1024
150 	static int lbufsize = 0, cbufsize = 0;
151 
152 	if (center) {
153 		linebuf = malloc(BUFSIZ);
154 		while (1) {
155 			cp = fgets(linebuf, BUFSIZ, fi);
156 			if (!cp)
157 				return;
158 			while (*cp && isspace(*cp))
159 				cp++;
160 			cp2 = cp + strlen(cp) - 1;
161 			while (cp2 > cp && isspace(*cp2))
162 				cp2--;
163 			if (cp == cp2)
164 				putchar('\n');
165 			col = cp2 - cp;
166 			for (c = 0; c < (goal_length-col)/2; c++)
167 				putchar(' ');
168 			while (cp <= cp2)
169 				putchar(*cp++);
170 			putchar('\n');
171 		}
172 	}
173 	c = getc(fi);
174 	while (c != EOF) {
175 		/*
176 		 * Collect a line, doing ^H processing.
177 		 * Leave tabs for now.
178 		 */
179 		cp = linebuf;
180 		while (c != '\n' && c != EOF) {
181 			if (cp - linebuf >= lbufsize) {
182 				int offset = cp - linebuf;
183 				lbufsize += CHUNKSIZE;
184 				linebuf = realloc(linebuf, lbufsize);
185 				if(linebuf == 0)
186 					abort();
187 				cp = linebuf + offset;
188 			}
189 			if (c == '\b') {
190 				if (cp > linebuf)
191 					cp--;
192 				c = getc(fi);
193 				continue;
194 			}
195 			if (!isprint(c) && c != '\t') {
196 				c = getc(fi);
197 				continue;
198 			}
199 			*cp++ = c;
200 			c = getc(fi);
201 		}
202 
203 		/*
204 		 * Toss anything remaining on the input line.
205 		 */
206 		while (c != '\n' && c != EOF)
207 			c = getc(fi);
208 
209 		if (cp != NULL) {
210 			*cp = '\0';
211 		} else {
212 			putchar('\n');
213 			c = getc(fi);
214 			continue;
215 		}
216 
217 		/*
218 		 * Expand tabs on the way to canonb.
219 		 */
220 		col = 0;
221 		cp = linebuf;
222 		cp2 = canonb;
223 		while (cc = *cp++) {
224 			if (cc != '\t') {
225 				col++;
226 				if (cp2 - canonb >= cbufsize) {
227 					int offset = cp2 - canonb;
228 					cbufsize += CHUNKSIZE;
229 					canonb = realloc(canonb, cbufsize);
230 					if(canonb == 0)
231 						abort();
232 					cp2 = canonb + offset;
233 				}
234 				*cp2++ = cc;
235 				continue;
236 			}
237 			do {
238 				if (cp2 - canonb >= cbufsize) {
239 					int offset = cp2 - canonb;
240 					cbufsize += CHUNKSIZE;
241 					canonb = realloc(canonb, cbufsize);
242 					if(canonb == 0)
243 						abort();
244 					cp2 = canonb + offset;
245 				}
246 				*cp2++ = ' ';
247 				col++;
248 			} while ((col & 07) != 0);
249 		}
250 
251 		/*
252 		 * Swipe trailing blanks from the line.
253 		 */
254 		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
255 			;
256 		*++cp2 = '\0';
257 		prefix(canonb);
258 		if (c != EOF)
259 			c = getc(fi);
260 	}
261 }
262 
263 /*
264  * Take a line devoid of tabs and other garbage and determine its
265  * blank prefix.  If the indent changes, call for a linebreak.
266  * If the input line is blank, echo the blank line on the output.
267  * Finally, if the line minus the prefix is a mail header, try to keep
268  * it on a line by itself.
269  */
270 prefix(line)
271 	char line[];
272 {
273 	register char *cp, **hp;
274 	register int np, h;
275 
276 	if (!*line) {
277 		oflush();
278 		putchar('\n');
279 		return;
280 	}
281 	for (cp = line; *cp == ' '; cp++)
282 		;
283 	np = cp - line;
284 
285 	/*
286 	 * The following horrible expression attempts to avoid linebreaks
287 	 * when the indent changes due to a paragraph.
288 	 */
289 	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
290 		oflush();
291 	if (h = ishead(cp))
292 		oflush(), mark = lineno;
293 	if (lineno - mark < 3 && lineno - mark > 0)
294 		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
295 			if (ispref(*hp, cp)) {
296 				h = 1;
297 				oflush();
298 				break;
299 			}
300 	if (!h && (h = (*cp == '.')))
301 		oflush();
302 	pfx = np;
303 	if (h)
304 		pack(cp, strlen(cp));
305 	else	split(cp);
306 	if (h)
307 		oflush();
308 	lineno++;
309 }
310 
311 /*
312  * Split up the passed line into output "words" which are
313  * maximal strings of non-blanks with the blank separation
314  * attached at the end.  Pass these words along to the output
315  * line packer.
316  */
317 split(line)
318 	char line[];
319 {
320 	register char *cp, *cp2;
321 	char word[BUFSIZ];
322 	int wordl;		/* LIZ@UOM 6/18/85 */
323 
324 	cp = line;
325 	while (*cp) {
326 		cp2 = word;
327 		wordl = 0;	/* LIZ@UOM 6/18/85 */
328 
329 		/*
330 		 * Collect a 'word,' allowing it to contain escaped white
331 		 * space.
332 		 */
333 		while (*cp && *cp != ' ') {
334 			if (*cp == '\\' && isspace(cp[1]))
335 				*cp2++ = *cp++;
336 			*cp2++ = *cp++;
337 			wordl++;/* LIZ@UOM 6/18/85 */
338 		}
339 
340 		/*
341 		 * Guarantee a space at end of line. Two spaces after end of
342 		 * sentence punctuation.
343 		 */
344 		if (*cp == '\0') {
345 			*cp2++ = ' ';
346 			if (index(".:!", cp[-1]))
347 				*cp2++ = ' ';
348 		}
349 		while (*cp == ' ')
350 			*cp2++ = *cp++;
351 		*cp2 = '\0';
352 		/*
353 		 * LIZ@UOM 6/18/85 pack(word);
354 		 */
355 		pack(word, wordl);
356 	}
357 }
358 
359 /*
360  * Output section.
361  * Build up line images from the words passed in.  Prefix
362  * each line with correct number of blanks.  The buffer "outbuf"
363  * contains the current partial line image, including prefixed blanks.
364  * "outp" points to the next available space therein.  When outp is NOSTR,
365  * there ain't nothing in there yet.  At the bottom of this whole mess,
366  * leading tabs are reinserted.
367  */
368 char	outbuf[BUFSIZ];			/* Sandbagged output line image */
369 char	*outp;				/* Pointer in above */
370 
371 /*
372  * Initialize the output section.
373  */
374 setout()
375 {
376 	outp = NOSTR;
377 }
378 
379 /*
380  * Pack a word onto the output line.  If this is the beginning of
381  * the line, push on the appropriately-sized string of blanks first.
382  * If the word won't fit on the current line, flush and begin a new
383  * line.  If the word is too long to fit all by itself on a line,
384  * just give it its own and hope for the best.
385  *
386  * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
387  *	goal length, take it.  If not, then check to see if the line
388  *	will be over the max length; if so put the word on the next
389  *	line.  If not, check to see if the line will be closer to the
390  *	goal length with or without the word and take it or put it on
391  *	the next line accordingly.
392  */
393 
394 /*
395  * LIZ@UOM 6/18/85 -- pass in the length of the word as well
396  * pack(word)
397  *	char word[];
398  */
399 pack(word,wl)
400 	char word[];
401 	int wl;
402 {
403 	register char *cp;
404 	register int s, t;
405 
406 	if (outp == NOSTR)
407 		leadin();
408 	/*
409 	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
410 	 * length of the line before the word is added; t is now the length
411 	 * of the line after the word is added
412 	 *	t = strlen(word);
413 	 *	if (t+s <= LENGTH)
414 	 */
415 	s = outp - outbuf;
416 	t = wl + s;
417 	if ((t <= goal_length) ||
418 	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
419 		/*
420 		 * In like flint!
421 		 */
422 		for (cp = word; *cp; *outp++ = *cp++);
423 		return;
424 	}
425 	if (s > pfx) {
426 		oflush();
427 		leadin();
428 	}
429 	for (cp = word; *cp; *outp++ = *cp++);
430 }
431 
432 /*
433  * If there is anything on the current output line, send it on
434  * its way.  Set outp to NOSTR to indicate the absence of the current
435  * line prefix.
436  */
437 oflush()
438 {
439 	if (outp == NOSTR)
440 		return;
441 	*outp = '\0';
442 	tabulate(outbuf);
443 	outp = NOSTR;
444 }
445 
446 /*
447  * Take the passed line buffer, insert leading tabs where possible, and
448  * output on standard output (finally).
449  */
450 tabulate(line)
451 	char line[];
452 {
453 	register char *cp;
454 	register int b, t;
455 
456 	/*
457 	 * Toss trailing blanks in the output line.
458 	 */
459 	cp = line + strlen(line) - 1;
460 	while (cp >= line && *cp == ' ')
461 		cp--;
462 	*++cp = '\0';
463 
464 	/*
465 	 * Count the leading blank space and tabulate.
466 	 */
467 	for (cp = line; *cp == ' '; cp++)
468 		;
469 	b = cp-line;
470 	t = b >> 3;
471 	b &= 07;
472 	if (t > 0)
473 		do
474 			putc('\t', stdout);
475 		while (--t);
476 	if (b > 0)
477 		do
478 			putc(' ', stdout);
479 		while (--b);
480 	while (*cp)
481 		putc(*cp++, stdout);
482 	putc('\n', stdout);
483 }
484 
485 /*
486  * Initialize the output line with the appropriate number of
487  * leading blanks.
488  */
489 leadin()
490 {
491 	register int b;
492 	register char *cp;
493 
494 	for (b = 0, cp = outbuf; b < pfx; b++)
495 		*cp++ = ' ';
496 	outp = cp;
497 }
498 
499 /*
500  * Save a string in dynamic space.
501  * This little goodie is needed for
502  * a headline detector in head.c
503  */
504 char *
505 savestr(str)
506 	char str[];
507 {
508 	register char *top;
509 
510 	top = malloc(strlen(str) + 1);
511 	if (top == NOSTR) {
512 		fprintf(stderr, "fmt:  Ran out of memory\n");
513 		exit(1);
514 	}
515 	strcpy(top, str);
516 	return (top);
517 }
518 
519 /*
520  * Is s1 a prefix of s2??
521  */
522 ispref(s1, s2)
523 	register char *s1, *s2;
524 {
525 
526 	while (*s1++ == *s2)
527 		;
528 	return (*s1 == '\0');
529 }
530