1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1992-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * *
20 ***********************************************************************/
21 #pragma prototyped
22
23 static const char usage[] =
24 "[-?\n@(#)$Id: fmt (AT&T Research) 2007-01-02 $\n]"
25 USAGE_LICENSE
26 "[+NAME?fmt - simple text formatter]"
27 "[+DESCRIPTION?\bfmt\b reads the input files and left justifies space "
28 "separated words into lines \awidth\a characters or less in length and "
29 "writes the lines to the standard output. The standard input is read if "
30 "\b-\b or no files are specified. Blank lines and interword spacing are "
31 "preserved in the output. Indentation is preserved, and lines with "
32 "identical indentation are joined and justified.]"
33 "[+?\bfmt\b is meant to format mail messages prior to sending, but may "
34 "also be useful for other simple tasks. For example, in \bvi\b(1) the "
35 "command \b:!}fmt\b will justify the lines in the current paragraph.]"
36 "[c:crown-margin?Preserve the indentation of the first two lines within "
37 "a paragraph, and align the left margin of each subsequent line with "
38 "that of the second line.]"
39 "[o:optget?Format concatenated \boptget\b(3) usage strings.]"
40 "[s:split-only?Split lines only; do not join short lines to form longer "
41 "ones.]"
42 "[u:uniform-spacing?One space between words, two after sentences.]"
43 "[w:width?Set the output line width to \acolumns\a.]#[columns:=72]"
44 "\n\n"
45 "[ file ... ]"
46 "\n\n"
47 "[+SEE ALSO?\bmailx\b(1), \bnroff\b(1), \btroff\b(1), \bvi\b(1), "
48 "\boptget\b(3)]"
49 ;
50
51 #include <cmd.h>
52 #include <ctype.h>
53
54 typedef struct Fmt_s
55 {
56 long flags;
57 char* outp;
58 char* outbuf;
59 char* endbuf;
60 Sfio_t* in;
61 Sfio_t* out;
62 int indent;
63 int nextdent;
64 int nwords;
65 int prefix;
66 int quote;
67 int retain;
68 int section;
69 } Fmt_t;
70
71 #define INDENT 4
72 #define TABSZ 8
73
74 #define isoption(fp,c) ((fp)->flags&(1L<<((c)-'a')))
75 #define setoption(fp,c) ((fp)->flags|=(1L<<((c)-'a')))
76 #define clroption(fp,c) ((fp)->flags&=~(1L<<((c)-'a')))
77
78 static void
outline(Fmt_t * fp)79 outline(Fmt_t* fp)
80 {
81 register char* cp = fp->outbuf;
82 int n = 0;
83 int c;
84 int d;
85
86 if (!fp->outp)
87 return;
88 while (fp->outp[-1] == ' ')
89 fp->outp--;
90 *fp->outp = 0;
91 while (*cp++ == ' ')
92 n++;
93 if (n >= TABSZ)
94 {
95 n /= TABSZ;
96 cp = &fp->outbuf[TABSZ*n];
97 while (n--)
98 *--cp = '\t';
99 }
100 else
101 cp = fp->outbuf;
102 fp->nwords = 0;
103 if (!isoption(fp, 'o'))
104 sfputr(fp->out, cp, '\n');
105 else if (*cp)
106 {
107 n = fp->indent;
108 if (*cp != '[')
109 {
110 if (*cp == ' ')
111 cp++;
112 n += INDENT;
113 }
114 while (n--)
115 sfputc(fp->out, ' ');
116 if (fp->quote)
117 {
118 if ((d = (fp->outp - cp)) <= 0)
119 c = 0;
120 else if ((c = fp->outp[-1]) == 'n' && d > 1 && fp->outp[-2] == '\\')
121 c = '}';
122 sfprintf(fp->out, "\"%s%s\"\n", cp, c == ']' || c == '{' || c == '}' ? "" : " ");
123 }
124 else
125 sfputr(fp->out, cp, '\n');
126 if (fp->nextdent)
127 {
128 fp->indent += fp->nextdent;
129 fp->endbuf -= fp->nextdent;
130 fp->nextdent = 0;
131 }
132 }
133 fp->outp = 0;
134 }
135
136 static void
split(Fmt_t * fp,char * buf,int splice)137 split(Fmt_t* fp, char* buf, int splice)
138 {
139 register char* cp;
140 register char* ep;
141 register char* qp;
142 register int c = 1;
143 register int q = 0;
144 register int n;
145 int prefix;
146
147 for (ep = buf; *ep == ' '; ep++);
148 prefix = ep - buf;
149
150 /*
151 * preserve blank lines
152 */
153
154 if ((*ep == 0 || *buf == '.') && !isoption(fp, 'o'))
155 {
156 if (*ep)
157 prefix = strlen(buf);
158 outline(fp);
159 strcpy(fp->outbuf, buf);
160 fp->outp = fp->outbuf+prefix;
161 outline(fp);
162 return;
163 }
164 if (fp->prefix < prefix && !isoption(fp, 'c'))
165 outline(fp);
166 if (!fp->outp || prefix < fp->prefix)
167 fp->prefix = prefix;
168 while (c)
169 {
170 cp = ep;
171 while (*ep == ' ')
172 ep++;
173 if (cp != ep && isoption(fp, 'u'))
174 cp = ep-1;
175 while (c = *ep)
176 {
177 if (c == ' ')
178 break;
179 ep++;
180
181 /*
182 * skip over \space
183 */
184
185 if (c == '\\' && *ep)
186 ep++;
187 }
188 n = (ep-cp);
189 if (n && isoption(fp, 'o'))
190 {
191 for (qp = cp; qp < ep; qp++)
192 if (*qp == '\\')
193 qp++;
194 else if (*qp == '"')
195 q = !q;
196 if (*(ep-1) == '"')
197 goto skip;
198 }
199 if (fp->nwords > 0 && &fp->outp[n] >= fp->endbuf && !fp->retain && !q)
200 outline(fp);
201 skip:
202 if (fp->nwords == 0)
203 {
204 if (fp->prefix)
205 memset(fp->outbuf, ' ', fp->prefix);
206 fp->outp = &fp->outbuf[fp->prefix];
207 while (*cp == ' ')
208 cp++;
209 n = (ep-cp);
210 }
211 memcpy(fp->outp, cp, n);
212 fp->outp += n;
213 fp->nwords++;
214 }
215 if (isoption(fp, 's') || *buf == 0)
216 outline(fp);
217 else if (fp->outp)
218 {
219 /*
220 * two spaces at ends of sentences
221 */
222
223 if (!isoption(fp, 'o') && strchr(".:!?", fp->outp[-1]))
224 *fp->outp++ = ' ';
225 if (!splice && !fp->retain && (!fp->quote || (fp->outp - fp->outbuf) < 2 || fp->outp[-2] != '\\' || fp->outp[-1] != 'n' && fp->outp[-1] != 't' && fp->outp[-1] != ' '))
226 *fp->outp++ = ' ';
227 }
228 }
229
230 static int
dofmt(Fmt_t * fp)231 dofmt(Fmt_t* fp)
232 {
233 register int c;
234 int b;
235 int x;
236 int splice;
237 char* cp;
238 char* dp;
239 char* ep;
240 char* lp;
241 char* tp;
242 char buf[8192];
243
244 cp = 0;
245 while (cp || (cp = sfgetr(fp->in, '\n', 0)) && !(splice = 0) && (lp = cp + sfvalue(fp->in) - 1) || (cp = sfgetr(fp->in, '\n', SF_LASTR)) && (splice = 1) && (lp = cp + sfvalue(fp->in)))
246 {
247 if (isoption(fp, 'o'))
248 {
249 if (!isoption(fp, 'i'))
250 {
251 setoption(fp, 'i');
252 b = 0;
253 while (cp < lp)
254 {
255 if (*cp == ' ')
256 b += 1;
257 else if (*cp == '\t')
258 b += INDENT;
259 else
260 break;
261 cp++;
262 }
263 fp->indent = roundof(b, INDENT);
264 }
265 else
266 while (cp < lp && (*cp == ' ' || *cp == '\t'))
267 cp++;
268 if (!isoption(fp, 'q') && cp < lp)
269 {
270 setoption(fp, 'q');
271 if (*cp == '"')
272 {
273 ep = lp;
274 while (--ep > cp)
275 if (*ep == '"')
276 {
277 fp->quote = 1;
278 break;
279 }
280 else if (*ep != ' ' && *ep != '\t')
281 break;
282 }
283 }
284 }
285 again:
286 dp = buf;
287 ep = 0;
288 for (b = 1;; b = 0)
289 {
290 if (cp >= lp)
291 {
292 cp = 0;
293 break;
294 }
295 c = *cp++;
296 if (isoption(fp, 'o'))
297 {
298 if (c == '\\')
299 {
300 x = 0;
301 c = ' ';
302 cp--;
303 while (cp < lp)
304 {
305 if (*cp == '\\')
306 {
307 cp++;
308 if ((lp - cp) < 1)
309 {
310 c = '\\';
311 break;
312 }
313 if (*cp == 'n')
314 {
315 cp++;
316 c = '\n';
317 if ((lp - cp) > 2)
318 {
319 if (*cp == ']' || *cp == '@' && *(cp + 1) == '(')
320 {
321 *dp++ = '\\';
322 *dp++ = 'n';
323 c = *cp++;
324 break;
325 }
326 if (*cp == '\\' && *(cp + 1) == 'n')
327 {
328 cp += 2;
329 *dp++ = '\n';
330 break;
331 }
332 }
333 }
334 else if (*cp == 't' || *cp == ' ')
335 {
336 cp++;
337 x = 1;
338 c = ' ';
339 }
340 else
341 {
342 if (x && dp != buf && *(dp - 1) != ' ')
343 *dp++ = ' ';
344 *dp++ = '\\';
345 c = *cp++;
346 break;
347 }
348 }
349 else if (*cp == ' ' || *cp == '\t')
350 {
351 cp++;
352 c = ' ';
353 x = 1;
354 }
355 else
356 {
357 if (x && c != '\n' && dp != buf && *(dp - 1) != ' ')
358 *dp++ = ' ';
359 break;
360 }
361 }
362 if (c == '\n')
363 {
364 c = 0;
365 goto flush;
366 }
367 if (c == ' ' && (dp == buf || *(dp - 1) == ' '))
368 continue;
369 }
370 else if (c == '"')
371 {
372 if (b || cp >= lp)
373 {
374 if (fp->quote)
375 continue;
376 fp->section = 0;
377 }
378 }
379 else if (c == '\a')
380 {
381 *dp++ = '\\';
382 c = 'a';
383 }
384 else if (c == '\b')
385 {
386 *dp++ = '\\';
387 c = 'b';
388 }
389 else if (c == '\f')
390 {
391 *dp++ = '\\';
392 c = 'f';
393 }
394 else if (c == '\v')
395 {
396 *dp++ = '\\';
397 c = 'v';
398 }
399 else if (c == ']' && (cp >= lp || *cp != ':' && *cp != '#' && *cp != '!'))
400 {
401 if (cp < lp && *cp == ']')
402 {
403 cp++;
404 *dp++ = c;
405 }
406 else
407 {
408 fp->section = 1;
409 fp->retain = 0;
410 flush:
411 *dp++ = c;
412 *dp = 0;
413 split(fp, buf, 0);
414 outline(fp);
415 goto again;
416 }
417 }
418 else if (fp->section)
419 {
420 if (c == '[')
421 {
422 if (b)
423 fp->retain = 1;
424 else
425 {
426 cp--;
427 c = 0;
428 goto flush;
429 }
430 fp->section = 0;
431 }
432 else if (c == '{')
433 {
434 x = 1;
435 for (tp = cp; tp < lp; tp++)
436 {
437 if (*tp == '[' || *tp == '\n')
438 break;
439 if (*tp == ' ' || *tp == '\t' || *tp == '"')
440 continue;
441 if (*tp == '\\' && (lp - tp) > 1)
442 {
443 if (*++tp == 'n')
444 break;
445 if (*tp == 't' || *tp == '\n')
446 continue;
447 }
448 x = 0;
449 break;
450 }
451 if (x)
452 {
453 if (fp->endbuf > (fp->outbuf + fp->indent + 2*INDENT))
454 fp->nextdent = 2*INDENT;
455 goto flush;
456 }
457 else
458 fp->section = 0;
459 }
460 else if (c == '}')
461 {
462 if (fp->indent && (b || *(cp - 2) != 'f'))
463 {
464 if (b)
465 {
466 fp->indent -= 2*INDENT;
467 fp->endbuf += 2*INDENT;
468 }
469 else
470 {
471 cp--;
472 c = 0;
473 }
474 goto flush;
475 }
476 else
477 fp->section = 0;
478 }
479 else if (c == ' ' || c == '\t')
480 continue;
481 else
482 fp->section = 0;
483 }
484 else if (c == '?' && (cp >= lp || *cp != '?'))
485 {
486 if (fp->retain)
487 {
488 cp--;
489 while (cp < lp && *cp != ' ' && *cp != '\t' && *cp != ']' && dp < &buf[sizeof(buf)-3])
490 *dp++ = *cp++;
491 if (cp < lp && (*cp == ' ' || *cp == '\t'))
492 *dp++ = *cp++;
493 *dp = 0;
494 split(fp, buf, 0);
495 dp = buf;
496 ep = 0;
497 fp->retain = 0;
498 if (fp->outp >= fp->endbuf)
499 outline(fp);
500 continue;
501 }
502 }
503 else if (c == ' ' || c == '\t')
504 for (c = ' '; *cp == ' ' || *cp == '\t'; cp++);
505 }
506 else if (c == '\b')
507 {
508 if (dp > buf)
509 {
510 dp--;
511 if (ep)
512 ep--;
513 }
514 continue;
515 }
516 else if (c == '\t')
517 {
518 /*
519 * expand tabs
520 */
521
522 if (!ep)
523 ep = dp;
524 c = isoption(fp, 'o') ? 1 : TABSZ - (dp - buf) % TABSZ;
525 if (dp >= &buf[sizeof(buf) - c - 3])
526 {
527 cp--;
528 break;
529 }
530 while (c-- > 0)
531 *dp++ = ' ';
532 continue;
533 }
534 else if (!isprint(c))
535 continue;
536 if (dp >= &buf[sizeof(buf) - 3])
537 {
538 tp = dp;
539 while (--tp > buf)
540 if (isspace(*tp))
541 {
542 cp -= dp - tp;
543 dp = tp;
544 break;
545 }
546 ep = 0;
547 break;
548 }
549 if (c != ' ')
550 ep = 0;
551 else if (!ep)
552 ep = dp;
553 *dp++ = c;
554 }
555 if (ep)
556 *ep = 0;
557 else
558 *dp = 0;
559 split(fp, buf, splice);
560 }
561 return 0;
562 }
563
564 int
b_fmt(int argc,char ** argv,void * context)565 b_fmt(int argc, char** argv, void *context)
566 {
567 register int n;
568 char* cp;
569 Fmt_t fmt;
570 char outbuf[8 * 1024];
571
572 fmt.flags = 0;
573 fmt.out = sfstdout;
574 fmt.outbuf = outbuf;
575 fmt.outp = 0;
576 fmt.endbuf = &outbuf[72];
577 fmt.indent = 0;
578 fmt.nextdent = 0;
579 fmt.nwords = 0;
580 fmt.prefix = 0;
581 fmt.quote = 0;
582 fmt.retain = 0;
583 fmt.section = 1;
584 cmdinit(argc, argv, context, ERROR_CATALOG, 0);
585 while (n = optget(argv, usage))
586 switch (n)
587 {
588 case 'c':
589 case 'o':
590 case 's':
591 case 'u':
592 setoption(&fmt, n);
593 break;
594 case 'w':
595 if (opt_info.num < TABSZ || opt_info.num>= sizeof(outbuf))
596 error(2, "width out of range");
597 fmt.endbuf = &outbuf[opt_info.num];
598 break;
599 case ':':
600 error(2, "%s", opt_info.arg);
601 break;
602 case '?':
603 error(ERROR_usage(2), "%s", opt_info.arg);
604 break;
605 }
606 argv += opt_info.index;
607 if (error_info.errors)
608 error(ERROR_usage(2), "%s", optusage(NiL));
609 if (isoption(&fmt, 'o'))
610 setoption(&fmt, 'c');
611 if (isoption(&fmt, 's'))
612 clroption(&fmt, 'u');
613 if (cp = *argv)
614 argv++;
615 do {
616 if (!cp || streq(cp, "-"))
617 fmt.in = sfstdin;
618 else if (!(fmt.in = sfopen(NiL, cp, "r")))
619 {
620 error(ERROR_system(0), "%s: cannot open", cp);
621 error_info.errors = 1;
622 continue;
623 }
624 dofmt(&fmt);
625 if (fmt.in != sfstdin)
626 sfclose(fmt.in);
627 } while (cp = *argv++);
628 outline(&fmt);
629 if (sfsync(sfstdout))
630 error(ERROR_system(0), "write error");
631 return error_info.errors != 0;
632 }
633