xref: /titanic_44/usr/src/lib/libcmd/common/fmt.c (revision 4a6ec905b96eb96a398c346f59e034a90ce8ad37)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *           Copyright (c) 1992-2007 AT&T Knowledge Ventures            *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                      by AT&T Knowledge Ventures                      *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                                                                      *
20 ***********************************************************************/
21 #pragma prototyped
22 
23 static const char usage[] =
24 "[-?\n@(#)$Id: fmt (AT&T Research) 2007-01-02 $\n]"
25 USAGE_LICENSE
26 "[+NAME?fmt - simple text formatter]"
27 "[+DESCRIPTION?\bfmt\b reads the input files and left justifies space "
28     "separated words into lines \awidth\a characters or less in length and "
29     "writes the lines to the standard output. The standard input is read if "
30     "\b-\b or no files are specified. Blank lines and interword spacing are "
31     "preserved in the output. Indentation is preserved, and lines with "
32     "identical indentation are joined and justified.]"
33 "[+?\bfmt\b is meant to format mail messages prior to sending, but may "
34     "also be useful for other simple tasks. For example, in \bvi\b(1) the "
35     "command \b:!}fmt\b will justify the lines in the current paragraph.]"
36 "[c:crown-margin?Preserve the indentation of the first two lines within "
37     "a paragraph, and align the left margin of each subsequent line with "
38     "that of the second line.]"
39 "[o:optget?Format concatenated \boptget\b(3) usage strings.]"
40 "[s:split-only?Split lines only; do not join short lines to form longer "
41     "ones.]"
42 "[u:uniform-spacing?One space between words, two after sentences.]"
43 "[w:width?Set the output line width to \acolumns\a.]#[columns:=72]"
44     "\n\n"
45 "[ file ... ]"
46     "\n\n"
47 "[+SEE ALSO?\bmailx\b(1), \bnroff\b(1), \btroff\b(1), \bvi\b(1), "
48     "\boptget\b(3)]"
49 ;
50 
51 #include <cmd.h>
52 #include <ctype.h>
53 
54 typedef struct Fmt_s
55 {
56 	long	flags;
57 	char*	outp;
58 	char*	outbuf;
59 	char*	endbuf;
60 	Sfio_t*	in;
61 	Sfio_t*	out;
62 	int	indent;
63 	int	nextdent;
64 	int	nwords;
65 	int	prefix;
66 	int	quote;
67 	int	retain;
68 	int	section;
69 } Fmt_t;
70 
71 #define INDENT		4
72 #define TABSZ		8
73 
74 #define isoption(fp,c)	((fp)->flags&(1L<<((c)-'a')))
75 #define setoption(fp,c)	((fp)->flags|=(1L<<((c)-'a')))
76 #define clroption(fp,c)	((fp)->flags&=~(1L<<((c)-'a')))
77 
78 static void
79 outline(Fmt_t* fp)
80 {
81 	register char*	cp = fp->outbuf;
82 	int		n = 0;
83 	int		c;
84 	int		d;
85 
86 	if (!fp->outp)
87 		return;
88 	while (fp->outp[-1] == ' ')
89 		fp->outp--;
90 	*fp->outp = 0;
91 	while (*cp++ == ' ')
92 		n++;
93 	if (n >= TABSZ)
94 	{
95 		n /= TABSZ;
96 		cp = &fp->outbuf[TABSZ*n];
97 		while (n--)
98 			*--cp = '\t';
99 	}
100 	else
101 		cp = fp->outbuf;
102 	fp->nwords = 0;
103 	if (!isoption(fp, 'o'))
104 		sfputr(fp->out, cp, '\n');
105 	else if (*cp)
106 	{
107 		n = fp->indent;
108 		if (*cp != '[')
109 		{
110 			if (*cp == ' ')
111 				cp++;
112 			n += INDENT;
113 		}
114 		while (n--)
115 			sfputc(fp->out, ' ');
116 		if (fp->quote)
117 		{
118 			if ((d = (fp->outp - cp)) <= 0)
119 				c = 0;
120 			else if ((c = fp->outp[-1]) == 'n' && d > 1 && fp->outp[-2] == '\\')
121 				c = '}';
122 			sfprintf(fp->out, "\"%s%s\"\n", cp, c == ']' || c == '{' || c == '}' ? "" : " ");
123 		}
124 		else
125 			sfputr(fp->out, cp, '\n');
126 		if (fp->nextdent)
127 		{
128 			fp->indent += fp->nextdent;
129 			fp->endbuf -= fp->nextdent;
130 			fp->nextdent = 0;
131 		}
132 	}
133 	fp->outp = 0;
134 }
135 
136 static void
137 split(Fmt_t* fp, char* buf, int splice)
138 {
139 	register char*	cp;
140 	register char*	ep;
141 	register char*	qp;
142 	register int	c = 1;
143 	register int	q = 0;
144 	register int	n;
145 	int		prefix;
146 
147 	for (ep = buf; *ep == ' '; ep++);
148 	prefix = ep - buf;
149 
150 	/*
151 	 * preserve blank lines
152 	 */
153 
154 	if ((*ep == 0 || *buf == '.') && !isoption(fp, 'o'))
155 	{
156 		if (*ep)
157 			prefix = strlen(buf);
158 		outline(fp);
159 		strcpy(fp->outbuf, buf);
160 		fp->outp = fp->outbuf+prefix;
161 		outline(fp);
162 		return;
163 	}
164 	if (fp->prefix < prefix && !isoption(fp, 'c'))
165 		outline(fp);
166 	if (!fp->outp || prefix < fp->prefix)
167 		fp->prefix = prefix;
168 	while (c)
169 	{
170 		cp = ep;
171 		while (*ep == ' ')
172 			ep++;
173 		if (cp != ep && isoption(fp, 'u'))
174 			cp = ep-1;
175 		while (c = *ep)
176 		{
177 			if (c == ' ')
178 				break;
179 			ep++;
180 
181 			/*
182 			 * skip over \space
183 			 */
184 
185 			if (c == '\\' && *ep)
186 				ep++;
187 		}
188 		n = (ep-cp);
189 		if (n && isoption(fp, 'o'))
190 		{
191 			for (qp = cp; qp < ep; qp++)
192 				if (*qp == '\\')
193 					qp++;
194 				else if (*qp == '"')
195 					q = !q;
196 			if (*(ep-1) == '"')
197 				goto skip;
198 		}
199 		if (fp->nwords > 0 && &fp->outp[n] >= fp->endbuf && !fp->retain && !q)
200 			outline(fp);
201 	skip:
202 		if (fp->nwords == 0)
203 		{
204 			if (fp->prefix)
205 				memset(fp->outbuf, ' ', fp->prefix);
206 			fp->outp = &fp->outbuf[fp->prefix];
207 			while (*cp == ' ')
208 				cp++;
209 			n = (ep-cp);
210 		}
211 		memcpy(fp->outp, cp, n);
212 		fp->outp += n;
213 		fp->nwords++;
214 	}
215 	if (isoption(fp, 's') || *buf == 0)
216 		outline(fp);
217 	else if (fp->outp)
218 	{
219 		/*
220 		 * two spaces at ends of sentences
221 		 */
222 
223 		if (!isoption(fp, 'o') && strchr(".:!?", fp->outp[-1]))
224 			*fp->outp++ = ' ';
225 		if (!splice && !fp->retain && (!fp->quote || (fp->outp - fp->outbuf) < 2 || fp->outp[-2] != '\\' || fp->outp[-1] != 'n' && fp->outp[-1] != 't' && fp->outp[-1] != ' '))
226 			*fp->outp++ = ' ';
227 	}
228 }
229 
230 static int
231 dofmt(Fmt_t* fp)
232 {
233 	register int	c;
234 	int		b;
235 	int		x;
236 	int		splice;
237 	char*		cp;
238 	char*		dp;
239 	char*		ep;
240 	char*		lp;
241 	char*		tp;
242 	char		buf[8192];
243 
244 	cp = 0;
245 	while (cp || (cp = sfgetr(fp->in, '\n', 0)) && !(splice = 0) && (lp = cp + sfvalue(fp->in) - 1) || (cp = sfgetr(fp->in, '\n', SF_LASTR)) && (splice = 1) && (lp = cp + sfvalue(fp->in)))
246 	{
247 		if (isoption(fp, 'o'))
248 		{
249 			if (!isoption(fp, 'i'))
250 			{
251 				setoption(fp, 'i');
252 				b = 0;
253 				while (cp < lp)
254 				{
255 					if (*cp == ' ')
256 						b += 1;
257 					else if (*cp == '\t')
258 						b += INDENT;
259 					else
260 						break;
261 					cp++;
262 				}
263 				fp->indent = roundof(b, INDENT);
264 			}
265 			else
266 				while (cp < lp && (*cp == ' ' || *cp == '\t'))
267 					cp++;
268 			if (!isoption(fp, 'q') && cp < lp)
269 			{
270 				setoption(fp, 'q');
271 				if (*cp == '"')
272 				{
273 					ep = lp;
274 					while (--ep > cp)
275 						if (*ep == '"')
276 						{
277 							fp->quote = 1;
278 							break;
279 						}
280 						else if (*ep != ' ' && *ep != '\t')
281 							break;
282 				}
283 			}
284 		}
285 	again:
286 		dp = buf;
287 		ep = 0;
288 		for (b = 1;; b = 0)
289 		{
290 			if (cp >= lp)
291 			{
292 				cp = 0;
293 				break;
294 			}
295 			c = *cp++;
296 			if (isoption(fp, 'o'))
297 			{
298 				if (c == '\\')
299 				{
300 					x = 0;
301 					c = ' ';
302 					cp--;
303 					while (cp < lp)
304 					{
305 						if (*cp == '\\')
306 						{
307 							cp++;
308 							if ((lp - cp) < 1)
309 							{
310 								c = '\\';
311 								break;
312 							}
313 							if (*cp == 'n')
314 							{
315 								cp++;
316 								c = '\n';
317 								if ((lp - cp) > 2)
318 								{
319 									if (*cp == ']' || *cp == '@' && *(cp + 1) == '(')
320 									{
321 										*dp++ = '\\';
322 										*dp++ = 'n';
323 										c = *cp++;
324 										break;
325 									}
326 									if (*cp == '\\' && *(cp + 1) == 'n')
327 									{
328 										cp += 2;
329 										*dp++ = '\n';
330 										break;
331 									}
332 								}
333 							}
334 							else if (*cp == 't' || *cp == ' ')
335 							{
336 								cp++;
337 								x = 1;
338 								c = ' ';
339 							}
340 							else
341 							{
342 								if (x && dp != buf && *(dp - 1) != ' ')
343 									*dp++ = ' ';
344 								*dp++ = '\\';
345 								c = *cp++;
346 								break;
347 							}
348 						}
349 						else if (*cp == ' ' || *cp == '\t')
350 						{
351 							cp++;
352 							c = ' ';
353 							x = 1;
354 						}
355 						else
356 						{
357 							if (x && c != '\n' && dp != buf && *(dp - 1) != ' ')
358 								*dp++ = ' ';
359 							break;
360 						}
361 					}
362 					if (c == '\n')
363 					{
364 						c = 0;
365 						goto flush;
366 					}
367 					if (c == ' ' && (dp == buf || *(dp - 1) == ' '))
368 						continue;
369 				}
370 				else if (c == '"')
371 				{
372 					if (b || cp >= lp)
373 					{
374 						if (fp->quote)
375 							continue;
376 						fp->section = 0;
377 					}
378 				}
379 				else if (c == '\a')
380 				{
381 					*dp++ = '\\';
382 					c = 'a';
383 				}
384 				else if (c == '\b')
385 				{
386 					*dp++ = '\\';
387 					c = 'b';
388 				}
389 				else if (c == '\f')
390 				{
391 					*dp++ = '\\';
392 					c = 'f';
393 				}
394 				else if (c == '\v')
395 				{
396 					*dp++ = '\\';
397 					c = 'v';
398 				}
399 				else if (c == ']' && (cp >= lp || *cp != ':' && *cp != '#' && *cp != '!'))
400 				{
401 					if (cp < lp && *cp == ']')
402 					{
403 						cp++;
404 						*dp++ = c;
405 					}
406 					else
407 					{
408 						fp->section = 1;
409 						fp->retain = 0;
410 					flush:
411 						*dp++ = c;
412 						*dp = 0;
413 						split(fp, buf, 0);
414 						outline(fp);
415 						goto again;
416 					}
417 				}
418 				else if (fp->section)
419 				{
420 					if (c == '[')
421 					{
422 						if (b)
423 							fp->retain = 1;
424 						else
425 						{
426 							cp--;
427 							c = 0;
428 							goto flush;
429 						}
430 						fp->section = 0;
431 					}
432 					else if (c == '{')
433 					{
434 						x = 1;
435 						for (tp = cp; tp < lp; tp++)
436 						{
437 							if (*tp == '[' || *tp == '\n')
438 								break;
439 							if (*tp == ' ' || *tp == '\t' || *tp == '"')
440 								continue;
441 							if (*tp == '\\' && (lp - tp) > 1)
442 							{
443 								if (*++tp == 'n')
444 									break;
445 								if (*tp == 't' || *tp == '\n')
446 									continue;
447 							}
448 							x = 0;
449 							break;
450 						}
451 						if (x)
452 						{
453 							if (fp->endbuf > (fp->outbuf + fp->indent + 2*INDENT))
454 								fp->nextdent = 2*INDENT;
455 							goto flush;
456 						}
457 						else
458 							fp->section = 0;
459 					}
460 					else if (c == '}')
461 					{
462 						if (fp->indent && (b || *(cp - 2) != 'f'))
463 						{
464 							if (b)
465 							{
466 								fp->indent -= 2*INDENT;
467 								fp->endbuf += 2*INDENT;
468 							}
469 							else
470 							{
471 								cp--;
472 								c = 0;
473 							}
474 							goto flush;
475 						}
476 						else
477 							fp->section = 0;
478 					}
479 					else if (c == ' ' || c == '\t')
480 						continue;
481 					else
482 						fp->section = 0;
483 				}
484 				else if (c == '?' && (cp >= lp || *cp != '?'))
485 				{
486 					if (fp->retain)
487 					{
488 						cp--;
489 						while (cp < lp && *cp != ' ' && *cp != '\t' && *cp != ']' && dp < &buf[sizeof(buf)-3])
490 							*dp++ = *cp++;
491 						if (cp < lp && (*cp == ' ' || *cp == '\t'))
492 							*dp++ = *cp++;
493 						*dp = 0;
494 						split(fp, buf, 0);
495 						dp = buf;
496 						ep = 0;
497 						fp->retain = 0;
498 						if (fp->outp >= fp->endbuf)
499 							outline(fp);
500 						continue;
501 					}
502 				}
503 				else if (c == ' ' || c == '\t')
504 					for (c = ' '; *cp == ' ' || *cp == '\t'; cp++);
505 			}
506 			else if (c == '\b')
507 			{
508 				if (dp > buf)
509 				{
510 					dp--;
511 					if (ep)
512 						ep--;
513 				}
514 				continue;
515 			}
516 			else if (c == '\t')
517 			{
518 				/*
519 				 * expand tabs
520 				 */
521 
522 				if (!ep)
523 					ep = dp;
524 				c = isoption(fp, 'o') ? 1 : TABSZ - (dp - buf) % TABSZ;
525 				if (dp >= &buf[sizeof(buf) - c - 3])
526 				{
527 					cp--;
528 					break;
529 				}
530 				while (c-- > 0)
531 					*dp++ = ' ';
532 				continue;
533 			}
534 			else if (!isprint(c))
535 				continue;
536 			if (dp >= &buf[sizeof(buf) - 3])
537 			{
538 				tp = dp;
539 				while (--tp > buf)
540 					if (isspace(*tp))
541 					{
542 						cp -= dp - tp;
543 						dp = tp;
544 						break;
545 					}
546 				ep = 0;
547 				break;
548 			}
549 			if (c != ' ')
550 				ep = 0;
551 			else if (!ep)
552 				ep = dp;
553 			*dp++ = c;
554 		}
555 		if (ep)
556 			*ep = 0;
557 		else
558 			*dp = 0;
559 		split(fp, buf, splice);
560 	}
561 	return 0;
562 }
563 
564 int
565 b_fmt(int argc, char** argv, void *context)
566 {
567 	register int	n;
568 	char*		cp;
569 	Fmt_t		fmt;
570 	char		outbuf[8 * 1024];
571 
572 	fmt.flags = 0;
573 	fmt.out = sfstdout;
574 	fmt.outbuf = outbuf;
575 	fmt.outp = 0;
576 	fmt.endbuf = &outbuf[72];
577 	fmt.indent = 0;
578 	fmt.nextdent = 0;
579 	fmt.nwords = 0;
580 	fmt.prefix = 0;
581 	fmt.quote = 0;
582 	fmt.retain = 0;
583 	fmt.section = 1;
584 	cmdinit(argc, argv, context, ERROR_CATALOG, 0);
585 	while (n = optget(argv, usage))
586 		switch (n)
587 		{
588 		case 'c':
589 		case 'o':
590 		case 's':
591 		case 'u':
592 			setoption(&fmt, n);
593 			break;
594 		case 'w':
595 			if (opt_info.num < TABSZ || opt_info.num>= sizeof(outbuf))
596 				error(2, "width out of range");
597 			fmt.endbuf = &outbuf[opt_info.num];
598 			break;
599 		case ':':
600 			error(2, "%s", opt_info.arg);
601 			break;
602 		case '?':
603 			error(ERROR_usage(2), "%s", opt_info.arg);
604 			break;
605 		}
606 	argv += opt_info.index;
607 	if (error_info.errors)
608 		error(ERROR_usage(2), "%s", optusage(NiL));
609 	if (isoption(&fmt, 'o'))
610 		setoption(&fmt, 'c');
611 	if (isoption(&fmt, 's'))
612 		clroption(&fmt, 'u');
613 	if (cp = *argv)
614 		argv++;
615 	do {
616 		if (!cp || streq(cp, "-"))
617 			fmt.in = sfstdin;
618 		else if (!(fmt.in = sfopen(NiL, cp, "r")))
619 		{
620 			error(ERROR_system(0), "%s: cannot open", cp);
621 			error_info.errors = 1;
622 			continue;
623 		}
624 		dofmt(&fmt);
625 		if (fmt.in != sfstdin)
626 			sfclose(fmt.in);
627 	} while (cp = *argv++);
628 	outline(&fmt);
629 	if (sfsync(sfstdout))
630 		error(ERROR_system(0), "write error");
631 	return error_info.errors != 0;
632 }
633