1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1992-2007 AT&T Knowledge Ventures * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Knowledge Ventures * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * * 20 ***********************************************************************/ 21 #pragma prototyped 22 23 static const char usage[] = 24 "[-?\n@(#)$Id: fmt (AT&T Research) 2007-01-02 $\n]" 25 USAGE_LICENSE 26 "[+NAME?fmt - simple text formatter]" 27 "[+DESCRIPTION?\bfmt\b reads the input files and left justifies space " 28 "separated words into lines \awidth\a characters or less in length and " 29 "writes the lines to the standard output. The standard input is read if " 30 "\b-\b or no files are specified. Blank lines and interword spacing are " 31 "preserved in the output. Indentation is preserved, and lines with " 32 "identical indentation are joined and justified.]" 33 "[+?\bfmt\b is meant to format mail messages prior to sending, but may " 34 "also be useful for other simple tasks. For example, in \bvi\b(1) the " 35 "command \b:!}fmt\b will justify the lines in the current paragraph.]" 36 "[c:crown-margin?Preserve the indentation of the first two lines within " 37 "a paragraph, and align the left margin of each subsequent line with " 38 "that of the second line.]" 39 "[o:optget?Format concatenated \boptget\b(3) usage strings.]" 40 "[s:split-only?Split lines only; do not join short lines to form longer " 41 "ones.]" 42 "[u:uniform-spacing?One space between words, two after sentences.]" 43 "[w:width?Set the output line width to \acolumns\a.]#[columns:=72]" 44 "\n\n" 45 "[ file ... ]" 46 "\n\n" 47 "[+SEE ALSO?\bmailx\b(1), \bnroff\b(1), \btroff\b(1), \bvi\b(1), " 48 "\boptget\b(3)]" 49 ; 50 51 #include <cmd.h> 52 #include <ctype.h> 53 54 typedef struct Fmt_s 55 { 56 long flags; 57 char* outp; 58 char* outbuf; 59 char* endbuf; 60 Sfio_t* in; 61 Sfio_t* out; 62 int indent; 63 int nextdent; 64 int nwords; 65 int prefix; 66 int quote; 67 int retain; 68 int section; 69 } Fmt_t; 70 71 #define INDENT 4 72 #define TABSZ 8 73 74 #define isoption(fp,c) ((fp)->flags&(1L<<((c)-'a'))) 75 #define setoption(fp,c) ((fp)->flags|=(1L<<((c)-'a'))) 76 #define clroption(fp,c) ((fp)->flags&=~(1L<<((c)-'a'))) 77 78 static void 79 outline(Fmt_t* fp) 80 { 81 register char* cp = fp->outbuf; 82 int n = 0; 83 int c; 84 int d; 85 86 if (!fp->outp) 87 return; 88 while (fp->outp[-1] == ' ') 89 fp->outp--; 90 *fp->outp = 0; 91 while (*cp++ == ' ') 92 n++; 93 if (n >= TABSZ) 94 { 95 n /= TABSZ; 96 cp = &fp->outbuf[TABSZ*n]; 97 while (n--) 98 *--cp = '\t'; 99 } 100 else 101 cp = fp->outbuf; 102 fp->nwords = 0; 103 if (!isoption(fp, 'o')) 104 sfputr(fp->out, cp, '\n'); 105 else if (*cp) 106 { 107 n = fp->indent; 108 if (*cp != '[') 109 { 110 if (*cp == ' ') 111 cp++; 112 n += INDENT; 113 } 114 while (n--) 115 sfputc(fp->out, ' '); 116 if (fp->quote) 117 { 118 if ((d = (fp->outp - cp)) <= 0) 119 c = 0; 120 else if ((c = fp->outp[-1]) == 'n' && d > 1 && fp->outp[-2] == '\\') 121 c = '}'; 122 sfprintf(fp->out, "\"%s%s\"\n", cp, c == ']' || c == '{' || c == '}' ? "" : " "); 123 } 124 else 125 sfputr(fp->out, cp, '\n'); 126 if (fp->nextdent) 127 { 128 fp->indent += fp->nextdent; 129 fp->endbuf -= fp->nextdent; 130 fp->nextdent = 0; 131 } 132 } 133 fp->outp = 0; 134 } 135 136 static void 137 split(Fmt_t* fp, char* buf, int splice) 138 { 139 register char* cp; 140 register char* ep; 141 register char* qp; 142 register int c = 1; 143 register int q = 0; 144 register int n; 145 int prefix; 146 147 for (ep = buf; *ep == ' '; ep++); 148 prefix = ep - buf; 149 150 /* 151 * preserve blank lines 152 */ 153 154 if ((*ep == 0 || *buf == '.') && !isoption(fp, 'o')) 155 { 156 if (*ep) 157 prefix = strlen(buf); 158 outline(fp); 159 strcpy(fp->outbuf, buf); 160 fp->outp = fp->outbuf+prefix; 161 outline(fp); 162 return; 163 } 164 if (fp->prefix < prefix && !isoption(fp, 'c')) 165 outline(fp); 166 if (!fp->outp || prefix < fp->prefix) 167 fp->prefix = prefix; 168 while (c) 169 { 170 cp = ep; 171 while (*ep == ' ') 172 ep++; 173 if (cp != ep && isoption(fp, 'u')) 174 cp = ep-1; 175 while (c = *ep) 176 { 177 if (c == ' ') 178 break; 179 ep++; 180 181 /* 182 * skip over \space 183 */ 184 185 if (c == '\\' && *ep) 186 ep++; 187 } 188 n = (ep-cp); 189 if (n && isoption(fp, 'o')) 190 { 191 for (qp = cp; qp < ep; qp++) 192 if (*qp == '\\') 193 qp++; 194 else if (*qp == '"') 195 q = !q; 196 if (*(ep-1) == '"') 197 goto skip; 198 } 199 if (fp->nwords > 0 && &fp->outp[n] >= fp->endbuf && !fp->retain && !q) 200 outline(fp); 201 skip: 202 if (fp->nwords == 0) 203 { 204 if (fp->prefix) 205 memset(fp->outbuf, ' ', fp->prefix); 206 fp->outp = &fp->outbuf[fp->prefix]; 207 while (*cp == ' ') 208 cp++; 209 n = (ep-cp); 210 } 211 memcpy(fp->outp, cp, n); 212 fp->outp += n; 213 fp->nwords++; 214 } 215 if (isoption(fp, 's') || *buf == 0) 216 outline(fp); 217 else if (fp->outp) 218 { 219 /* 220 * two spaces at ends of sentences 221 */ 222 223 if (!isoption(fp, 'o') && strchr(".:!?", fp->outp[-1])) 224 *fp->outp++ = ' '; 225 if (!splice && !fp->retain && (!fp->quote || (fp->outp - fp->outbuf) < 2 || fp->outp[-2] != '\\' || fp->outp[-1] != 'n' && fp->outp[-1] != 't' && fp->outp[-1] != ' ')) 226 *fp->outp++ = ' '; 227 } 228 } 229 230 static int 231 dofmt(Fmt_t* fp) 232 { 233 register int c; 234 int b; 235 int x; 236 int splice; 237 char* cp; 238 char* dp; 239 char* ep; 240 char* lp; 241 char* tp; 242 char buf[8192]; 243 244 cp = 0; 245 while (cp || (cp = sfgetr(fp->in, '\n', 0)) && !(splice = 0) && (lp = cp + sfvalue(fp->in) - 1) || (cp = sfgetr(fp->in, '\n', SF_LASTR)) && (splice = 1) && (lp = cp + sfvalue(fp->in))) 246 { 247 if (isoption(fp, 'o')) 248 { 249 if (!isoption(fp, 'i')) 250 { 251 setoption(fp, 'i'); 252 b = 0; 253 while (cp < lp) 254 { 255 if (*cp == ' ') 256 b += 1; 257 else if (*cp == '\t') 258 b += INDENT; 259 else 260 break; 261 cp++; 262 } 263 fp->indent = roundof(b, INDENT); 264 } 265 else 266 while (cp < lp && (*cp == ' ' || *cp == '\t')) 267 cp++; 268 if (!isoption(fp, 'q') && cp < lp) 269 { 270 setoption(fp, 'q'); 271 if (*cp == '"') 272 { 273 ep = lp; 274 while (--ep > cp) 275 if (*ep == '"') 276 { 277 fp->quote = 1; 278 break; 279 } 280 else if (*ep != ' ' && *ep != '\t') 281 break; 282 } 283 } 284 } 285 again: 286 dp = buf; 287 ep = 0; 288 for (b = 1;; b = 0) 289 { 290 if (cp >= lp) 291 { 292 cp = 0; 293 break; 294 } 295 c = *cp++; 296 if (isoption(fp, 'o')) 297 { 298 if (c == '\\') 299 { 300 x = 0; 301 c = ' '; 302 cp--; 303 while (cp < lp) 304 { 305 if (*cp == '\\') 306 { 307 cp++; 308 if ((lp - cp) < 1) 309 { 310 c = '\\'; 311 break; 312 } 313 if (*cp == 'n') 314 { 315 cp++; 316 c = '\n'; 317 if ((lp - cp) > 2) 318 { 319 if (*cp == ']' || *cp == '@' && *(cp + 1) == '(') 320 { 321 *dp++ = '\\'; 322 *dp++ = 'n'; 323 c = *cp++; 324 break; 325 } 326 if (*cp == '\\' && *(cp + 1) == 'n') 327 { 328 cp += 2; 329 *dp++ = '\n'; 330 break; 331 } 332 } 333 } 334 else if (*cp == 't' || *cp == ' ') 335 { 336 cp++; 337 x = 1; 338 c = ' '; 339 } 340 else 341 { 342 if (x && dp != buf && *(dp - 1) != ' ') 343 *dp++ = ' '; 344 *dp++ = '\\'; 345 c = *cp++; 346 break; 347 } 348 } 349 else if (*cp == ' ' || *cp == '\t') 350 { 351 cp++; 352 c = ' '; 353 x = 1; 354 } 355 else 356 { 357 if (x && c != '\n' && dp != buf && *(dp - 1) != ' ') 358 *dp++ = ' '; 359 break; 360 } 361 } 362 if (c == '\n') 363 { 364 c = 0; 365 goto flush; 366 } 367 if (c == ' ' && (dp == buf || *(dp - 1) == ' ')) 368 continue; 369 } 370 else if (c == '"') 371 { 372 if (b || cp >= lp) 373 { 374 if (fp->quote) 375 continue; 376 fp->section = 0; 377 } 378 } 379 else if (c == '\a') 380 { 381 *dp++ = '\\'; 382 c = 'a'; 383 } 384 else if (c == '\b') 385 { 386 *dp++ = '\\'; 387 c = 'b'; 388 } 389 else if (c == '\f') 390 { 391 *dp++ = '\\'; 392 c = 'f'; 393 } 394 else if (c == '\v') 395 { 396 *dp++ = '\\'; 397 c = 'v'; 398 } 399 else if (c == ']' && (cp >= lp || *cp != ':' && *cp != '#' && *cp != '!')) 400 { 401 if (cp < lp && *cp == ']') 402 { 403 cp++; 404 *dp++ = c; 405 } 406 else 407 { 408 fp->section = 1; 409 fp->retain = 0; 410 flush: 411 *dp++ = c; 412 *dp = 0; 413 split(fp, buf, 0); 414 outline(fp); 415 goto again; 416 } 417 } 418 else if (fp->section) 419 { 420 if (c == '[') 421 { 422 if (b) 423 fp->retain = 1; 424 else 425 { 426 cp--; 427 c = 0; 428 goto flush; 429 } 430 fp->section = 0; 431 } 432 else if (c == '{') 433 { 434 x = 1; 435 for (tp = cp; tp < lp; tp++) 436 { 437 if (*tp == '[' || *tp == '\n') 438 break; 439 if (*tp == ' ' || *tp == '\t' || *tp == '"') 440 continue; 441 if (*tp == '\\' && (lp - tp) > 1) 442 { 443 if (*++tp == 'n') 444 break; 445 if (*tp == 't' || *tp == '\n') 446 continue; 447 } 448 x = 0; 449 break; 450 } 451 if (x) 452 { 453 if (fp->endbuf > (fp->outbuf + fp->indent + 2*INDENT)) 454 fp->nextdent = 2*INDENT; 455 goto flush; 456 } 457 else 458 fp->section = 0; 459 } 460 else if (c == '}') 461 { 462 if (fp->indent && (b || *(cp - 2) != 'f')) 463 { 464 if (b) 465 { 466 fp->indent -= 2*INDENT; 467 fp->endbuf += 2*INDENT; 468 } 469 else 470 { 471 cp--; 472 c = 0; 473 } 474 goto flush; 475 } 476 else 477 fp->section = 0; 478 } 479 else if (c == ' ' || c == '\t') 480 continue; 481 else 482 fp->section = 0; 483 } 484 else if (c == '?' && (cp >= lp || *cp != '?')) 485 { 486 if (fp->retain) 487 { 488 cp--; 489 while (cp < lp && *cp != ' ' && *cp != '\t' && *cp != ']' && dp < &buf[sizeof(buf)-3]) 490 *dp++ = *cp++; 491 if (cp < lp && (*cp == ' ' || *cp == '\t')) 492 *dp++ = *cp++; 493 *dp = 0; 494 split(fp, buf, 0); 495 dp = buf; 496 ep = 0; 497 fp->retain = 0; 498 if (fp->outp >= fp->endbuf) 499 outline(fp); 500 continue; 501 } 502 } 503 else if (c == ' ' || c == '\t') 504 for (c = ' '; *cp == ' ' || *cp == '\t'; cp++); 505 } 506 else if (c == '\b') 507 { 508 if (dp > buf) 509 { 510 dp--; 511 if (ep) 512 ep--; 513 } 514 continue; 515 } 516 else if (c == '\t') 517 { 518 /* 519 * expand tabs 520 */ 521 522 if (!ep) 523 ep = dp; 524 c = isoption(fp, 'o') ? 1 : TABSZ - (dp - buf) % TABSZ; 525 if (dp >= &buf[sizeof(buf) - c - 3]) 526 { 527 cp--; 528 break; 529 } 530 while (c-- > 0) 531 *dp++ = ' '; 532 continue; 533 } 534 else if (!isprint(c)) 535 continue; 536 if (dp >= &buf[sizeof(buf) - 3]) 537 { 538 tp = dp; 539 while (--tp > buf) 540 if (isspace(*tp)) 541 { 542 cp -= dp - tp; 543 dp = tp; 544 break; 545 } 546 ep = 0; 547 break; 548 } 549 if (c != ' ') 550 ep = 0; 551 else if (!ep) 552 ep = dp; 553 *dp++ = c; 554 } 555 if (ep) 556 *ep = 0; 557 else 558 *dp = 0; 559 split(fp, buf, splice); 560 } 561 return 0; 562 } 563 564 int 565 b_fmt(int argc, char** argv, void *context) 566 { 567 register int n; 568 char* cp; 569 Fmt_t fmt; 570 char outbuf[8 * 1024]; 571 572 fmt.flags = 0; 573 fmt.out = sfstdout; 574 fmt.outbuf = outbuf; 575 fmt.outp = 0; 576 fmt.endbuf = &outbuf[72]; 577 fmt.indent = 0; 578 fmt.nextdent = 0; 579 fmt.nwords = 0; 580 fmt.prefix = 0; 581 fmt.quote = 0; 582 fmt.retain = 0; 583 fmt.section = 1; 584 cmdinit(argc, argv, context, ERROR_CATALOG, 0); 585 while (n = optget(argv, usage)) 586 switch (n) 587 { 588 case 'c': 589 case 'o': 590 case 's': 591 case 'u': 592 setoption(&fmt, n); 593 break; 594 case 'w': 595 if (opt_info.num < TABSZ || opt_info.num>= sizeof(outbuf)) 596 error(2, "width out of range"); 597 fmt.endbuf = &outbuf[opt_info.num]; 598 break; 599 case ':': 600 error(2, "%s", opt_info.arg); 601 break; 602 case '?': 603 error(ERROR_usage(2), "%s", opt_info.arg); 604 break; 605 } 606 argv += opt_info.index; 607 if (error_info.errors) 608 error(ERROR_usage(2), "%s", optusage(NiL)); 609 if (isoption(&fmt, 'o')) 610 setoption(&fmt, 'c'); 611 if (isoption(&fmt, 's')) 612 clroption(&fmt, 'u'); 613 if (cp = *argv) 614 argv++; 615 do { 616 if (!cp || streq(cp, "-")) 617 fmt.in = sfstdin; 618 else if (!(fmt.in = sfopen(NiL, cp, "r"))) 619 { 620 error(ERROR_system(0), "%s: cannot open", cp); 621 error_info.errors = 1; 622 continue; 623 } 624 dofmt(&fmt); 625 if (fmt.in != sfstdin) 626 sfclose(fmt.in); 627 } while (cp = *argv++); 628 outline(&fmt); 629 if (sfsync(sfstdout)) 630 error(ERROR_system(0), "write error"); 631 return error_info.errors != 0; 632 } 633