xref: /freebsd/usr.bin/fmt/fmt.c (revision d82e286489da73321a47e329d98a98817b0438b6)
1 /*
2  * Copyright (c) 1980, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 static char copyright[] =
36 "@(#) Copyright (c) 1980, 1993\n\
37 	The Regents of the University of California.  All rights reserved.\n";
38 #endif /* not lint */
39 
40 #ifndef lint
41 static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
42 #endif /* not lint */
43 
44 #include <stdio.h>
45 #include <ctype.h>
46 #include <locale.h>
47 
48 /*
49  * fmt -- format the concatenation of input files or standard input
50  * onto standard output.  Designed for use with Mail ~|
51  *
52  * Syntax : fmt [ goal [ max ] ] [ name ... ]
53  * Authors: Kurt Shoens (UCB) 12/7/78;
54  *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
55  */
56 
57 /* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
58  * #define	LENGTH	72		Max line length in output
59  */
60 #define	NOSTR	((char *) 0)	/* Null string pointer for lint */
61 
62 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
63 #define GOAL_LENGTH 65
64 #define MAX_LENGTH 75
65 int	goal_length;		/* Target or goal line length in output */
66 int	max_length;		/* Max line length in output */
67 int	pfx;			/* Current leading blank count */
68 int	lineno;			/* Current input line */
69 int	mark;			/* Last place we saw a head line */
70 
71 char	*malloc();		/* for lint . . . */
72 char	*headnames[] = {"To", "Subject", "Cc", 0};
73 
74 /*
75  * Drive the whole formatter by managing input files.  Also,
76  * cause initialization of the output stuff and flush it out
77  * at the end.
78  */
79 
80 main(argc, argv)
81 	int argc;
82 	char **argv;
83 {
84 	register FILE *fi;
85 	register int errs = 0;
86 	int number;		/* LIZ@UOM 6/18/85 */
87 
88 	(void) setlocale(LC_CTYPE, "");
89 
90 	goal_length = GOAL_LENGTH;
91 	max_length = MAX_LENGTH;
92 	setout();
93 	lineno = 1;
94 	mark = -10;
95 	/*
96 	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
97 	 */
98 	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
99 		argv++;
100 		argc--;
101 		goal_length = number;
102 		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
103 			argv++;
104 			argc--;
105 			max_length = number;
106 		}
107 	}
108 	if (max_length <= goal_length) {
109 		fprintf(stderr, "Max length must be greater than %s\n",
110 			"goal length");
111 		exit(1);
112 	}
113 	if (argc < 2) {
114 		fmt(stdin);
115 		oflush();
116 		exit(0);
117 	}
118 	while (--argc) {
119 		if ((fi = fopen(*++argv, "r")) == NULL) {
120 			perror(*argv);
121 			errs++;
122 			continue;
123 		}
124 		fmt(fi);
125 		fclose(fi);
126 	}
127 	oflush();
128 	exit(errs);
129 }
130 
131 /*
132  * Read up characters from the passed input file, forming lines,
133  * doing ^H processing, expanding tabs, stripping trailing blanks,
134  * and sending each line down for analysis.
135  */
136 fmt(fi)
137 	FILE *fi;
138 {
139 	char linebuf[BUFSIZ], canonb[BUFSIZ];
140 	register char *cp, *cp2, cc;
141 	register int c, col;
142 
143 	c = getc(fi);
144 	while (c != EOF) {
145 		/*
146 		 * Collect a line, doing ^H processing.
147 		 * Leave tabs for now.
148 		 */
149 		cp = linebuf;
150 		while (c != '\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
151 			if (c == '\b') {
152 				if (cp > linebuf)
153 					cp--;
154 				c = getc(fi);
155 				continue;
156 			}
157 			if (!isprint(c) && c != '\t') {
158 				c = getc(fi);
159 				continue;
160 			}
161 			*cp++ = c;
162 			c = getc(fi);
163 		}
164 		*cp = '\0';
165 
166 		/*
167 		 * Toss anything remaining on the input line.
168 		 */
169 		while (c != '\n' && c != EOF)
170 			c = getc(fi);
171 
172 		/*
173 		 * Expand tabs on the way to canonb.
174 		 */
175 		col = 0;
176 		cp = linebuf;
177 		cp2 = canonb;
178 		while (cc = *cp++) {
179 			if (cc != '\t') {
180 				col++;
181 				if (cp2-canonb < BUFSIZ-1)
182 					*cp2++ = cc;
183 				continue;
184 			}
185 			do {
186 				if (cp2-canonb < BUFSIZ-1)
187 					*cp2++ = ' ';
188 				col++;
189 			} while ((col & 07) != 0);
190 		}
191 
192 		/*
193 		 * Swipe trailing blanks from the line.
194 		 */
195 		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
196 			;
197 		*++cp2 = '\0';
198 		prefix(canonb);
199 		if (c != EOF)
200 			c = getc(fi);
201 	}
202 }
203 
204 /*
205  * Take a line devoid of tabs and other garbage and determine its
206  * blank prefix.  If the indent changes, call for a linebreak.
207  * If the input line is blank, echo the blank line on the output.
208  * Finally, if the line minus the prefix is a mail header, try to keep
209  * it on a line by itself.
210  */
211 prefix(line)
212 	char line[];
213 {
214 	register char *cp, **hp;
215 	register int np, h;
216 
217 	if (!*line) {
218 		oflush();
219 		putchar('\n');
220 		return;
221 	}
222 	for (cp = line; *cp == ' '; cp++)
223 		;
224 	np = cp - line;
225 
226 	/*
227 	 * The following horrible expression attempts to avoid linebreaks
228 	 * when the indent changes due to a paragraph.
229 	 */
230 	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
231 		oflush();
232 	if (h = ishead(cp))
233 		oflush(), mark = lineno;
234 	if (lineno - mark < 3 && lineno - mark > 0)
235 		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
236 			if (ispref(*hp, cp)) {
237 				h = 1;
238 				oflush();
239 				break;
240 			}
241 	if (!h && (h = (*cp == '.')))
242 		oflush();
243 	pfx = np;
244 	if (h)
245 		pack(cp, strlen(cp));
246 	else	split(cp);
247 	if (h)
248 		oflush();
249 	lineno++;
250 }
251 
252 /*
253  * Split up the passed line into output "words" which are
254  * maximal strings of non-blanks with the blank separation
255  * attached at the end.  Pass these words along to the output
256  * line packer.
257  */
258 split(line)
259 	char line[];
260 {
261 	register char *cp, *cp2;
262 	char word[BUFSIZ];
263 	int wordl;		/* LIZ@UOM 6/18/85 */
264 
265 	cp = line;
266 	while (*cp) {
267 		cp2 = word;
268 		wordl = 0;	/* LIZ@UOM 6/18/85 */
269 
270 		/*
271 		 * Collect a 'word,' allowing it to contain escaped white
272 		 * space.
273 		 */
274 		while (*cp && *cp != ' ') {
275 			if (*cp == '\\' && isspace(cp[1]))
276 				*cp2++ = *cp++;
277 			*cp2++ = *cp++;
278 			wordl++;/* LIZ@UOM 6/18/85 */
279 		}
280 
281 		/*
282 		 * Guarantee a space at end of line. Two spaces after end of
283 		 * sentence punctuation.
284 		 */
285 		if (*cp == '\0') {
286 			*cp2++ = ' ';
287 			if (index(".:!", cp[-1]))
288 				*cp2++ = ' ';
289 		}
290 		while (*cp == ' ')
291 			*cp2++ = *cp++;
292 		*cp2 = '\0';
293 		/*
294 		 * LIZ@UOM 6/18/85 pack(word);
295 		 */
296 		pack(word, wordl);
297 	}
298 }
299 
300 /*
301  * Output section.
302  * Build up line images from the words passed in.  Prefix
303  * each line with correct number of blanks.  The buffer "outbuf"
304  * contains the current partial line image, including prefixed blanks.
305  * "outp" points to the next available space therein.  When outp is NOSTR,
306  * there ain't nothing in there yet.  At the bottom of this whole mess,
307  * leading tabs are reinserted.
308  */
309 char	outbuf[BUFSIZ];			/* Sandbagged output line image */
310 char	*outp;				/* Pointer in above */
311 
312 /*
313  * Initialize the output section.
314  */
315 setout()
316 {
317 	outp = NOSTR;
318 }
319 
320 /*
321  * Pack a word onto the output line.  If this is the beginning of
322  * the line, push on the appropriately-sized string of blanks first.
323  * If the word won't fit on the current line, flush and begin a new
324  * line.  If the word is too long to fit all by itself on a line,
325  * just give it its own and hope for the best.
326  *
327  * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
328  *	goal length, take it.  If not, then check to see if the line
329  *	will be over the max length; if so put the word on the next
330  *	line.  If not, check to see if the line will be closer to the
331  *	goal length with or without the word and take it or put it on
332  *	the next line accordingly.
333  */
334 
335 /*
336  * LIZ@UOM 6/18/85 -- pass in the length of the word as well
337  * pack(word)
338  *	char word[];
339  */
340 pack(word,wl)
341 	char word[];
342 	int wl;
343 {
344 	register char *cp;
345 	register int s, t;
346 
347 	if (outp == NOSTR)
348 		leadin();
349 	/*
350 	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
351 	 * length of the line before the word is added; t is now the length
352 	 * of the line after the word is added
353 	 *	t = strlen(word);
354 	 *	if (t+s <= LENGTH)
355 	 */
356 	s = outp - outbuf;
357 	t = wl + s;
358 	if ((t <= goal_length) ||
359 	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
360 		/*
361 		 * In like flint!
362 		 */
363 		for (cp = word; *cp; *outp++ = *cp++);
364 		return;
365 	}
366 	if (s > pfx) {
367 		oflush();
368 		leadin();
369 	}
370 	for (cp = word; *cp; *outp++ = *cp++);
371 }
372 
373 /*
374  * If there is anything on the current output line, send it on
375  * its way.  Set outp to NOSTR to indicate the absence of the current
376  * line prefix.
377  */
378 oflush()
379 {
380 	if (outp == NOSTR)
381 		return;
382 	*outp = '\0';
383 	tabulate(outbuf);
384 	outp = NOSTR;
385 }
386 
387 /*
388  * Take the passed line buffer, insert leading tabs where possible, and
389  * output on standard output (finally).
390  */
391 tabulate(line)
392 	char line[];
393 {
394 	register char *cp;
395 	register int b, t;
396 
397 	/*
398 	 * Toss trailing blanks in the output line.
399 	 */
400 	cp = line + strlen(line) - 1;
401 	while (cp >= line && *cp == ' ')
402 		cp--;
403 	*++cp = '\0';
404 
405 	/*
406 	 * Count the leading blank space and tabulate.
407 	 */
408 	for (cp = line; *cp == ' '; cp++)
409 		;
410 	b = cp-line;
411 	t = b >> 3;
412 	b &= 07;
413 	if (t > 0)
414 		do
415 			putc('\t', stdout);
416 		while (--t);
417 	if (b > 0)
418 		do
419 			putc(' ', stdout);
420 		while (--b);
421 	while (*cp)
422 		putc(*cp++, stdout);
423 	putc('\n', stdout);
424 }
425 
426 /*
427  * Initialize the output line with the appropriate number of
428  * leading blanks.
429  */
430 leadin()
431 {
432 	register int b;
433 	register char *cp;
434 
435 	for (b = 0, cp = outbuf; b < pfx; b++)
436 		*cp++ = ' ';
437 	outp = cp;
438 }
439 
440 /*
441  * Save a string in dynamic space.
442  * This little goodie is needed for
443  * a headline detector in head.c
444  */
445 char *
446 savestr(str)
447 	char str[];
448 {
449 	register char *top;
450 
451 	top = malloc(strlen(str) + 1);
452 	if (top == NOSTR) {
453 		fprintf(stderr, "fmt:  Ran out of memory\n");
454 		exit(1);
455 	}
456 	strcpy(top, str);
457 	return (top);
458 }
459 
460 /*
461  * Is s1 a prefix of s2??
462  */
463 ispref(s1, s2)
464 	register char *s1, *s2;
465 {
466 
467 	while (*s1++ == *s2)
468 		;
469 	return (*s1 == '\0');
470 }
471