xref: /freebsd/usr.bin/fmt/fmt.c (revision 48991a368427cadb9cdac39581d1676c29619c52)
1 /*
2  * Copyright (c) 1980, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 static char copyright[] =
36 "@(#) Copyright (c) 1980, 1993\n\
37 	The Regents of the University of California.  All rights reserved.\n";
38 #endif /* not lint */
39 
40 #ifndef lint
41 static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
42 #endif /* not lint */
43 
44 #include <stdio.h>
45 #include <ctype.h>
46 #include <locale.h>
47 #include <stdlib.h>
48 
49 /*
50  * fmt -- format the concatenation of input files or standard input
51  * onto standard output.  Designed for use with Mail ~|
52  *
53  * Syntax : fmt [ goal [ max ] ] [ name ... ]
54  * Authors: Kurt Shoens (UCB) 12/7/78;
55  *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
56  */
57 
58 /* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
59  * #define	LENGTH	72		Max line length in output
60  */
61 #define	NOSTR	((char *) 0)	/* Null string pointer for lint */
62 
63 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
64 #define GOAL_LENGTH 65
65 #define MAX_LENGTH 75
66 int	goal_length;		/* Target or goal line length in output */
67 int	max_length;		/* Max line length in output */
68 int	pfx;			/* Current leading blank count */
69 int	lineno;			/* Current input line */
70 int	mark;			/* Last place we saw a head line */
71 
72 char	*malloc();		/* for lint . . . */
73 char	*headnames[] = {"To", "Subject", "Cc", 0};
74 
75 /*
76  * Drive the whole formatter by managing input files.  Also,
77  * cause initialization of the output stuff and flush it out
78  * at the end.
79  */
80 
81 main(argc, argv)
82 	int argc;
83 	char **argv;
84 {
85 	register FILE *fi;
86 	register int errs = 0;
87 	int number;		/* LIZ@UOM 6/18/85 */
88 
89 	(void) setlocale(LC_CTYPE, "");
90 
91 	goal_length = GOAL_LENGTH;
92 	max_length = MAX_LENGTH;
93 	setout();
94 	lineno = 1;
95 	mark = -10;
96 	/*
97 	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
98 	 */
99 	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
100 		argv++;
101 		argc--;
102 		goal_length = number;
103 		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
104 			argv++;
105 			argc--;
106 			max_length = number;
107 		}
108 	}
109 	if (max_length <= goal_length) {
110 		fprintf(stderr, "Max length must be greater than %s\n",
111 			"goal length");
112 		exit(1);
113 	}
114 	if (argc < 2) {
115 		fmt(stdin);
116 		oflush();
117 		exit(0);
118 	}
119 	while (--argc) {
120 		if ((fi = fopen(*++argv, "r")) == NULL) {
121 			perror(*argv);
122 			errs++;
123 			continue;
124 		}
125 		fmt(fi);
126 		fclose(fi);
127 	}
128 	oflush();
129 	exit(errs);
130 }
131 
132 /*
133  * Read up characters from the passed input file, forming lines,
134  * doing ^H processing, expanding tabs, stripping trailing blanks,
135  * and sending each line down for analysis.
136  */
137 fmt(fi)
138 	FILE *fi;
139 {
140 	static char *linebuf = 0, *canonb = 0;
141 	register char *cp, *cp2, cc;
142 	register int c, col;
143 #define CHUNKSIZE 1024
144 	static int lbufsize = 0, cbufsize = 0;
145 
146 	c = getc(fi);
147 	while (c != EOF) {
148 		/*
149 		 * Collect a line, doing ^H processing.
150 		 * Leave tabs for now.
151 		 */
152 		cp = linebuf;
153 		while (c != '\n' && c != EOF) {
154 			if (cp - linebuf >= lbufsize) {
155 				int offset = cp - linebuf;
156 				lbufsize += CHUNKSIZE;
157 				linebuf = realloc(linebuf, lbufsize);
158 				if(linebuf == 0)
159 					abort();
160 				cp = linebuf + offset;
161 			}
162 			if (c == '\b') {
163 				if (cp > linebuf)
164 					cp--;
165 				c = getc(fi);
166 				continue;
167 			}
168 			if (!isprint(c) && c != '\t') {
169 				c = getc(fi);
170 				continue;
171 			}
172 			*cp++ = c;
173 			c = getc(fi);
174 		}
175 		*cp = '\0';
176 
177 		/*
178 		 * Toss anything remaining on the input line.
179 		 */
180 		while (c != '\n' && c != EOF)
181 			c = getc(fi);
182 
183 		/*
184 		 * Expand tabs on the way to canonb.
185 		 */
186 		col = 0;
187 		cp = linebuf;
188 		cp2 = canonb;
189 		while (cc = *cp++) {
190 			if (cc != '\t') {
191 				col++;
192 				if (cp2 - canonb >= cbufsize) {
193 					int offset = cp2 - canonb;
194 					cbufsize += CHUNKSIZE;
195 					canonb = realloc(canonb, cbufsize);
196 					if(canonb == 0)
197 						abort();
198 					cp2 = canonb + offset;
199 				}
200 				*cp2++ = cc;
201 				continue;
202 			}
203 			do {
204 				if (cp2 - canonb >= cbufsize) {
205 					int offset = cp2 - canonb;
206 					cbufsize += CHUNKSIZE;
207 					canonb = realloc(canonb, cbufsize);
208 					if(canonb == 0)
209 						abort();
210 					cp2 = canonb + offset;
211 				}
212 				*cp2++ = ' ';
213 				col++;
214 			} while ((col & 07) != 0);
215 		}
216 
217 		/*
218 		 * Swipe trailing blanks from the line.
219 		 */
220 		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
221 			;
222 		*++cp2 = '\0';
223 		prefix(canonb);
224 		if (c != EOF)
225 			c = getc(fi);
226 	}
227 }
228 
229 /*
230  * Take a line devoid of tabs and other garbage and determine its
231  * blank prefix.  If the indent changes, call for a linebreak.
232  * If the input line is blank, echo the blank line on the output.
233  * Finally, if the line minus the prefix is a mail header, try to keep
234  * it on a line by itself.
235  */
236 prefix(line)
237 	char line[];
238 {
239 	register char *cp, **hp;
240 	register int np, h;
241 
242 	if (!*line) {
243 		oflush();
244 		putchar('\n');
245 		return;
246 	}
247 	for (cp = line; *cp == ' '; cp++)
248 		;
249 	np = cp - line;
250 
251 	/*
252 	 * The following horrible expression attempts to avoid linebreaks
253 	 * when the indent changes due to a paragraph.
254 	 */
255 	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
256 		oflush();
257 	if (h = ishead(cp))
258 		oflush(), mark = lineno;
259 	if (lineno - mark < 3 && lineno - mark > 0)
260 		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
261 			if (ispref(*hp, cp)) {
262 				h = 1;
263 				oflush();
264 				break;
265 			}
266 	if (!h && (h = (*cp == '.')))
267 		oflush();
268 	pfx = np;
269 	if (h)
270 		pack(cp, strlen(cp));
271 	else	split(cp);
272 	if (h)
273 		oflush();
274 	lineno++;
275 }
276 
277 /*
278  * Split up the passed line into output "words" which are
279  * maximal strings of non-blanks with the blank separation
280  * attached at the end.  Pass these words along to the output
281  * line packer.
282  */
283 split(line)
284 	char line[];
285 {
286 	register char *cp, *cp2;
287 	char word[BUFSIZ];
288 	int wordl;		/* LIZ@UOM 6/18/85 */
289 
290 	cp = line;
291 	while (*cp) {
292 		cp2 = word;
293 		wordl = 0;	/* LIZ@UOM 6/18/85 */
294 
295 		/*
296 		 * Collect a 'word,' allowing it to contain escaped white
297 		 * space.
298 		 */
299 		while (*cp && *cp != ' ') {
300 			if (*cp == '\\' && isspace(cp[1]))
301 				*cp2++ = *cp++;
302 			*cp2++ = *cp++;
303 			wordl++;/* LIZ@UOM 6/18/85 */
304 		}
305 
306 		/*
307 		 * Guarantee a space at end of line. Two spaces after end of
308 		 * sentence punctuation.
309 		 */
310 		if (*cp == '\0') {
311 			*cp2++ = ' ';
312 			if (index(".:!", cp[-1]))
313 				*cp2++ = ' ';
314 		}
315 		while (*cp == ' ')
316 			*cp2++ = *cp++;
317 		*cp2 = '\0';
318 		/*
319 		 * LIZ@UOM 6/18/85 pack(word);
320 		 */
321 		pack(word, wordl);
322 	}
323 }
324 
325 /*
326  * Output section.
327  * Build up line images from the words passed in.  Prefix
328  * each line with correct number of blanks.  The buffer "outbuf"
329  * contains the current partial line image, including prefixed blanks.
330  * "outp" points to the next available space therein.  When outp is NOSTR,
331  * there ain't nothing in there yet.  At the bottom of this whole mess,
332  * leading tabs are reinserted.
333  */
334 char	outbuf[BUFSIZ];			/* Sandbagged output line image */
335 char	*outp;				/* Pointer in above */
336 
337 /*
338  * Initialize the output section.
339  */
340 setout()
341 {
342 	outp = NOSTR;
343 }
344 
345 /*
346  * Pack a word onto the output line.  If this is the beginning of
347  * the line, push on the appropriately-sized string of blanks first.
348  * If the word won't fit on the current line, flush and begin a new
349  * line.  If the word is too long to fit all by itself on a line,
350  * just give it its own and hope for the best.
351  *
352  * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
353  *	goal length, take it.  If not, then check to see if the line
354  *	will be over the max length; if so put the word on the next
355  *	line.  If not, check to see if the line will be closer to the
356  *	goal length with or without the word and take it or put it on
357  *	the next line accordingly.
358  */
359 
360 /*
361  * LIZ@UOM 6/18/85 -- pass in the length of the word as well
362  * pack(word)
363  *	char word[];
364  */
365 pack(word,wl)
366 	char word[];
367 	int wl;
368 {
369 	register char *cp;
370 	register int s, t;
371 
372 	if (outp == NOSTR)
373 		leadin();
374 	/*
375 	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
376 	 * length of the line before the word is added; t is now the length
377 	 * of the line after the word is added
378 	 *	t = strlen(word);
379 	 *	if (t+s <= LENGTH)
380 	 */
381 	s = outp - outbuf;
382 	t = wl + s;
383 	if ((t <= goal_length) ||
384 	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
385 		/*
386 		 * In like flint!
387 		 */
388 		for (cp = word; *cp; *outp++ = *cp++);
389 		return;
390 	}
391 	if (s > pfx) {
392 		oflush();
393 		leadin();
394 	}
395 	for (cp = word; *cp; *outp++ = *cp++);
396 }
397 
398 /*
399  * If there is anything on the current output line, send it on
400  * its way.  Set outp to NOSTR to indicate the absence of the current
401  * line prefix.
402  */
403 oflush()
404 {
405 	if (outp == NOSTR)
406 		return;
407 	*outp = '\0';
408 	tabulate(outbuf);
409 	outp = NOSTR;
410 }
411 
412 /*
413  * Take the passed line buffer, insert leading tabs where possible, and
414  * output on standard output (finally).
415  */
416 tabulate(line)
417 	char line[];
418 {
419 	register char *cp;
420 	register int b, t;
421 
422 	/*
423 	 * Toss trailing blanks in the output line.
424 	 */
425 	cp = line + strlen(line) - 1;
426 	while (cp >= line && *cp == ' ')
427 		cp--;
428 	*++cp = '\0';
429 
430 	/*
431 	 * Count the leading blank space and tabulate.
432 	 */
433 	for (cp = line; *cp == ' '; cp++)
434 		;
435 	b = cp-line;
436 	t = b >> 3;
437 	b &= 07;
438 	if (t > 0)
439 		do
440 			putc('\t', stdout);
441 		while (--t);
442 	if (b > 0)
443 		do
444 			putc(' ', stdout);
445 		while (--b);
446 	while (*cp)
447 		putc(*cp++, stdout);
448 	putc('\n', stdout);
449 }
450 
451 /*
452  * Initialize the output line with the appropriate number of
453  * leading blanks.
454  */
455 leadin()
456 {
457 	register int b;
458 	register char *cp;
459 
460 	for (b = 0, cp = outbuf; b < pfx; b++)
461 		*cp++ = ' ';
462 	outp = cp;
463 }
464 
465 /*
466  * Save a string in dynamic space.
467  * This little goodie is needed for
468  * a headline detector in head.c
469  */
470 char *
471 savestr(str)
472 	char str[];
473 {
474 	register char *top;
475 
476 	top = malloc(strlen(str) + 1);
477 	if (top == NOSTR) {
478 		fprintf(stderr, "fmt:  Ran out of memory\n");
479 		exit(1);
480 	}
481 	strcpy(top, str);
482 	return (top);
483 }
484 
485 /*
486  * Is s1 a prefix of s2??
487  */
488 ispref(s1, s2)
489 	register char *s1, *s2;
490 {
491 
492 	while (*s1++ == *s2)
493 		;
494 	return (*s1 == '\0');
495 }
496