1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 1997 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <ctype.h> 35 #include <wctype.h> 36 #include <widec.h> 37 #include <dlfcn.h> 38 #include <locale.h> 39 #include <sys/param.h> 40 #include <string.h> 41 42 /* 43 * fmt -- format the concatenation of input files or standard input 44 * onto standard output. Designed for use with Mail ~| 45 * 46 * Syntax: fmt [ -width | -w width ] [ -cs ] [ name ... ] 47 * Author: Kurt Shoens (UCB) 12/7/78 48 */ 49 50 #define NOSTR ((wchar_t *) 0) /* Null string pointer for lint */ 51 #define MAXLINES 100 /* maximum mail header lines to verify */ 52 53 wchar_t outbuf[BUFSIZ]; /* Sandbagged output line image */ 54 wchar_t *outp; /* Pointer in above */ 55 int filler; /* Filler amount in outbuf */ 56 57 int pfx; /* Current leading blank count */ 58 int width = 72; /* Width that we will not exceed */ 59 int nojoin = 0; /* split lines only, don't join short ones */ 60 int errs = 0; /* Current number of errors */ 61 62 enum crown_type {c_none, c_reset, c_head, c_lead, c_fixup, c_body}; 63 enum crown_type crown_state; /* Crown margin state */ 64 int crown_head; /* The header offset */ 65 int crown_body; /* The body offset */ 66 /* currently-known initial strings found in mail headers */ 67 wchar_t *headnames[] = { 68 L"Apparently-To", L"Bcc", L"bcc", L"Cc", L"cc", L"Confirmed-By", 69 L"Content", L"content-length", L"From", L"Date", L"id", 70 L"Message-I", L"MIME-Version", L"Precedence", L"Return-Path", 71 L"Received", L"Reply-To", L"Status", L"Subject", L"To", L"X-IMAP", 72 L"X-Lines", L"X-Sender", L"X-Sun", L"X-Status", L"X-UID", 73 0}; 74 75 enum hdr_type { 76 off, /* mail header processing is off */ 77 not_in_hdr, /* not currently processing a mail header */ 78 in_hdr, /* currently filling hdrbuf with potential hdr lines */ 79 flush_hdr, /* flush hdrbuf; not a header, no special processing */ 80 do_hdr /* process hdrbuf as a mail header */ 81 }; 82 /* current state of hdrbuf */ 83 enum hdr_type hdr_state = not_in_hdr; 84 85 wchar_t *hdrbuf[MAXLINES]; /* buffer to hold potential mail header lines */ 86 int h_lines; /* index into lines of hdrbuf */ 87 88 int (*(split))(); 89 extern int scrwidth(wchar_t); 90 91 static void fill_hdrbuf(wchar_t line[]); 92 static void header_chk(void); 93 static void process_hdrbuf(void); 94 95 /* 96 * Drive the whole formatter by managing input files. Also, 97 * cause initialization of the output stuff and flush it out 98 * at the end. 99 */ 100 101 main(int argc, char **argv) 102 { 103 register FILE *fi; 104 char sobuf[BUFSIZ]; 105 register char *cp; 106 int nofile; 107 char *locale; 108 int csplit(), msplit(); 109 void _wckind_init(); 110 111 outp = NOSTR; 112 setbuf(stdout, sobuf); 113 setlocale(LC_ALL, ""); 114 locale = setlocale(LC_CTYPE, ""); 115 if (strcmp(locale, "C") == 0) { 116 split = csplit; 117 } else { 118 split = msplit; 119 (void) _wckind_init(); 120 } 121 if (argc < 2) { 122 single: 123 fmt(stdin); 124 oflush(); 125 exit(0); 126 } 127 nofile = 1; 128 while (--argc) { 129 cp = *++argv; 130 if (setopt(cp)) 131 continue; 132 nofile = 0; 133 if ((fi = fopen(cp, "r")) == NULL) { 134 perror(cp); 135 errs++; 136 continue; 137 } 138 fmt(fi); 139 fclose(fi); 140 } 141 if (nofile) 142 goto single; 143 oflush(); 144 exit(errs); 145 /* NOTREACHED */ 146 } 147 148 /* 149 * Read up characters from the passed input file, forming lines, 150 * doing ^H processing, expanding tabs, stripping trailing blanks, 151 * and sending each line down for analysis. 152 */ 153 154 fmt(FILE *fi) 155 { 156 wchar_t linebuf[BUFSIZ], canonb[BUFSIZ]; 157 register wchar_t *cp, *cp2; 158 register int col; 159 wchar_t c; 160 char cbuf[BUFSIZ]; /* stores wchar_t string as char string */ 161 162 c = getwc(fi); 163 while (c != EOF) { 164 /* 165 * Collect a line, doing ^H processing. 166 * Leave tabs for now. 167 */ 168 169 cp = linebuf; 170 while (c != L'\n' && c != EOF && cp-linebuf < BUFSIZ-1) { 171 if (c == L'\b') { 172 if (cp > linebuf) 173 cp--; 174 c = getwc(fi); 175 continue; 176 } 177 if (!(iswprint(c)) && c != L'\t') { 178 c = getwc(fi); 179 continue; 180 } 181 *cp++ = c; 182 c = getwc(fi); 183 } 184 *cp = L'\0'; 185 186 /* 187 * Toss anything remaining on the input line. 188 */ 189 190 while (c != L'\n' && c != EOF) 191 c = getwc(fi); 192 /* 193 * Expand tabs on the way to canonb. 194 */ 195 196 col = 0; 197 cp = linebuf; 198 cp2 = canonb; 199 while (c = *cp++) { 200 if (c != L'\t') { 201 col += scrwidth(c); 202 if (cp2-canonb < BUFSIZ-1) 203 *cp2++ = c; 204 continue; 205 } 206 do { 207 if (cp2-canonb < BUFSIZ-1) 208 *cp2++ = L' '; 209 col++; 210 } while ((col & 07) != 0); 211 } 212 213 /* 214 * Swipe trailing blanks from the line. 215 */ 216 217 for (cp2--; cp2 >= canonb && *cp2 == L' '; cp2--); 218 *++cp2 = '\0'; 219 220 /* special processing to look for mail header lines */ 221 switch (hdr_state) { 222 case off: 223 prefix(canonb); 224 case not_in_hdr: 225 /* look for an initial mail header line */ 226 /* skip initial blanks */ 227 for (cp = canonb; *cp == L' '; cp++); 228 /* 229 * Need to convert string from wchar_t to char, 230 * since this is what ishead() expects. Since we 231 * only want to make sure cp points to a "From" line 232 * of the email, we don't have to alloc 233 * BUFSIZ * MB_LEN_MAX to cbuf. 234 */ 235 wcstombs(cbuf, cp, (BUFSIZ - 1)); 236 if (ishead(cbuf)) { 237 hdr_state = in_hdr; 238 fill_hdrbuf(canonb); 239 } else { 240 /* no mail header line; process normally */ 241 prefix(canonb); 242 } 243 break; 244 case in_hdr: 245 /* already saw 1st mail header line; look for more */ 246 if (canonb[0] == L'\0') { 247 /* 248 * blank line means end of mail header; 249 * verify current mail header buffer 250 * then process it accordingly 251 */ 252 header_chk(); 253 process_hdrbuf(); 254 /* now process the current blank line */ 255 prefix(canonb); 256 } else 257 /* 258 * not a blank line--save this line as 259 * a potential mail header line 260 */ 261 fill_hdrbuf(canonb); 262 break; 263 } 264 if (c != EOF) 265 c = getwc(fi); 266 } 267 /* 268 * end of this file--make sure we process the stuff in 269 * hdrbuf before we're finished 270 */ 271 if (hdr_state == in_hdr) { 272 header_chk(); 273 process_hdrbuf(); 274 } 275 } 276 277 /* 278 * Take a line devoid of tabs and other garbage and determine its 279 * blank prefix. If the indent changes, call for a linebreak. 280 * If the input line is blank, echo the blank line on the output. 281 * Finally, if the line minus the prefix is a mail header, try to keep 282 * it on a line by itself. 283 */ 284 285 prefix(wchar_t line[]) 286 { 287 register wchar_t *cp; 288 register int np; 289 register int i; 290 int nosplit = 0; /* flag set if line should not be split */ 291 292 if (line[0] == L'\0') { 293 oflush(); 294 putchar('\n'); 295 if (crown_state != c_none) 296 crown_state = c_reset; 297 return; 298 } 299 for (cp = line; *cp == L' '; cp++); 300 np = cp - line; 301 302 /* 303 * The following horrible expression attempts to avoid linebreaks 304 * when the indent changes due to a paragraph. 305 */ 306 307 if (crown_state == c_none && np != pfx && (np > pfx || abs(pfx-np) > 8)) 308 oflush(); 309 /* 310 * if this is a mail header line, don't split it; flush previous 311 * line, if any, so we don't join this line to it 312 */ 313 if (hdr_state == do_hdr) { 314 nosplit = 1; 315 oflush(); 316 } 317 /* flush previous line so we don't join this one to it */ 318 if (nojoin) 319 oflush(); 320 /* nroff-type lines starting with '.' are not split nor joined */ 321 if (!nosplit && (nosplit = (*cp == L'.'))) 322 oflush(); 323 pfx = np; 324 switch (crown_state) { 325 case c_reset: 326 crown_head = pfx; 327 crown_state = c_head; 328 break; 329 case c_lead: 330 crown_body = pfx; 331 crown_state = c_body; 332 break; 333 case c_fixup: 334 crown_body = pfx; 335 crown_state = c_body; 336 if (outp) { 337 wchar_t s[BUFSIZ]; 338 339 *outp = L'\0'; 340 wscpy(s, &outbuf[crown_head]); 341 outp = NOSTR; 342 split(s); 343 } 344 break; 345 } 346 if (nosplit) { 347 /* put whole input line onto outbuf and print it out */ 348 pack(cp); 349 oflush(); 350 } else 351 /* 352 * split puts current line onto outbuf, but splits it 353 * at word boundaries, if it exceeds desired length 354 */ 355 split(cp); 356 if (nojoin) 357 /* 358 * flush current line so next lines, if any, 359 * won't join to this one 360 */ 361 oflush(); 362 } 363 364 /* 365 * Split up the passed line into output "words" which are 366 * maximal strings of non-blanks with the blank separation 367 * attached at the end. Pass these words along to the output 368 * line packer. 369 */ 370 371 csplit(wchar_t line[]) 372 { 373 register wchar_t *cp, *cp2; 374 wchar_t word[BUFSIZ]; 375 static const wchar_t *srchlist = (const wchar_t *) L".:!?"; 376 377 cp = line; 378 while (*cp) { 379 cp2 = word; 380 381 /* 382 * Collect a 'word,' allowing it to contain escaped 383 * white space. 384 */ 385 386 while (*cp && !(iswspace(*cp))) { 387 if (*cp == '\\' && iswspace(cp[1])) 388 *cp2++ = *cp++; 389 *cp2++ = *cp++; 390 } 391 392 /* 393 * Guarantee a space at end of line. 394 * Two spaces after end of sentence punctuation. 395 */ 396 397 if (*cp == L'\0') { 398 *cp2++ = L' '; 399 if (wschr(srchlist, cp[-1]) != NULL) 400 *cp2++ = L' '; 401 } 402 while (iswspace(*cp)) 403 *cp2++ = *cp++; 404 *cp2 = L'\0'; 405 pack(word); 406 } 407 } 408 409 msplit(wchar_t line[]) 410 { 411 register wchar_t *cp, *cp2, prev; 412 wchar_t word[BUFSIZ]; 413 static const wchar_t *srchlist = (const wchar_t *) L".:!?"; 414 415 cp = line; 416 while (*cp) { 417 cp2 = word; 418 prev = *cp; 419 420 /* 421 * Collect a 'word,' allowing it to contain escaped 422 * white space. 423 */ 424 425 while (*cp) { 426 if (iswspace(*cp)) 427 break; 428 if (_wckind(*cp) != _wckind(prev)) 429 if (wcsetno(*cp) != 0 || wcsetno(prev) != 0) 430 break; 431 if (*cp == '\\' && iswspace(cp[1])) 432 *cp2++ = *cp++; 433 prev = *cp; 434 *cp2++ = *cp++; 435 } 436 437 /* 438 * Guarantee a space at end of line. 439 * Two spaces after end of sentence punctuation. 440 */ 441 442 if (*cp == L'\0') { 443 *cp2++ = L' '; 444 if (wschr(srchlist, cp[-1]) != NULL) 445 *cp2++ = L' '; 446 } 447 while (iswspace(*cp)) 448 *cp2++ = *cp++; 449 *cp2 = L'\0'; 450 pack(word); 451 } 452 } 453 454 /* 455 * Output section. 456 * Build up line images from the words passed in. Prefix 457 * each line with correct number of blanks. The buffer "outbuf" 458 * contains the current partial line image, including prefixed blanks. 459 * "outp" points to the next available space therein. When outp is NOSTR, 460 * there ain't nothing in there yet. At the bottom of this whole mess, 461 * leading tabs are reinserted. 462 */ 463 464 /* 465 * Pack a word onto the output line. If this is the beginning of 466 * the line, push on the appropriately-sized string of blanks first. 467 * If the word won't fit on the current line, flush and begin a new 468 * line. If the word is too long to fit all by itself on a line, 469 * just give it its own and hope for the best. 470 */ 471 472 pack(wchar_t word[]) 473 { 474 register wchar_t *cp; 475 register int s, t; 476 477 if (outp == NOSTR) 478 leadin(); 479 t = wscol(word); 480 *outp = L'\0'; 481 s = wscol(outbuf); 482 if (t+s <= width) { 483 for (cp = word; *cp; *outp++ = *cp++); 484 return; 485 } 486 if (s > filler) { 487 oflush(); 488 leadin(); 489 } 490 for (cp = word; *cp; *outp++ = *cp++); 491 } 492 493 /* 494 * If there is anything on the current output line, send it on 495 * its way. Set outp to NOSTR to indicate the absence of the current 496 * line prefix. 497 */ 498 499 oflush(void) 500 { 501 if (outp == NOSTR) 502 return; 503 *outp = L'\0'; 504 tabulate(outbuf); 505 outp = NOSTR; 506 } 507 508 /* 509 * Take the passed line buffer, insert leading tabs where possible, and 510 * output on standard output (finally). 511 */ 512 513 tabulate(wchar_t line[]) 514 { 515 register wchar_t *cp, *cp2; 516 register int b, t; 517 518 519 /* Toss trailing blanks in the output line */ 520 cp = line + wslen(line) - 1; 521 while (cp >= line && *cp == L' ') 522 cp--; 523 *++cp = L'\0'; 524 /* Count the leading blank space and tabulate */ 525 for (cp = line; *cp == L' '; cp++); 526 b = cp - line; 527 t = b >> 3; 528 b &= 07; 529 if (t > 0) 530 do 531 putc('\t', stdout); 532 while (--t); 533 if (b > 0) 534 do 535 putc(' ', stdout); 536 while (--b); 537 while (*cp) 538 putwc(*cp++, stdout); 539 putc('\n', stdout); 540 } 541 542 /* 543 * Initialize the output line with the appropriate number of 544 * leading blanks. 545 */ 546 547 leadin() 548 { 549 register int b; 550 register wchar_t *cp; 551 register int l; 552 553 switch (crown_state) { 554 case c_head: 555 l = crown_head; 556 crown_state = c_lead; 557 break; 558 559 case c_lead: 560 case c_fixup: 561 l = crown_head; 562 crown_state = c_fixup; 563 break; 564 565 case c_body: 566 l = crown_body; 567 break; 568 569 default: 570 l = pfx; 571 break; 572 } 573 filler = l; 574 for (b = 0, cp = outbuf; b < l; b++) 575 *cp++ = L' '; 576 outp = cp; 577 } 578 579 /* 580 * Is s1 a prefix of s2?? 581 */ 582 583 ispref(wchar_t *s1, wchar_t *s2) 584 { 585 586 while (*s1 != L'\0' && *s2 != L'\0') 587 if (*s1++ != *s2++) 588 return (0); 589 return (1); 590 } 591 592 /* 593 * Set an input option 594 */ 595 596 setopt(cp) 597 register char *cp; 598 { 599 static int ws = 0; 600 601 if (*cp == '-') { 602 if (cp[1] == 'c' && cp[2] == '\0') { 603 crown_state = c_reset; 604 return (1); 605 } 606 if (cp[1] == 's' && cp[2] == '\0') { 607 nojoin = 1; 608 return (1); 609 } 610 if (cp[1] == 'w' && cp[2] == '\0') { 611 ws++; 612 return (1); 613 } 614 width = atoi(cp+1); 615 } else if (ws) { 616 width = atoi(cp); 617 ws = 0; 618 } else 619 return (0); 620 if (width <= 0 || width >= BUFSIZ-2) { 621 fprintf(stderr, "fmt: bad width: %d\n", width); 622 exit(1); 623 } 624 return (1); 625 } 626 627 628 #define LIB_WDRESOLVE "/usr/lib/locale/%s/LC_CTYPE/wdresolve.so" 629 #define WCHKIND "_wdchkind_" 630 631 static int _wckind_c_locale(); 632 633 static int (*__wckind)() = _wckind_c_locale; 634 static void *dlhandle = NULL; 635 636 637 void 638 _wckind_init() 639 { 640 char *locale; 641 char path[MAXPATHLEN + 1]; 642 643 644 if (dlhandle != NULL) { 645 (void) dlclose(dlhandle); 646 dlhandle = NULL; 647 } 648 649 locale = setlocale(LC_CTYPE, NULL); 650 if (strcmp(locale, "C") == 0) 651 goto c_locale; 652 653 (void) sprintf(path, LIB_WDRESOLVE, locale); 654 655 if ((dlhandle = dlopen(path, RTLD_LAZY)) != NULL) { 656 __wckind = (int (*)(int))dlsym(dlhandle, WCHKIND); 657 if (__wckind != NULL) 658 return; 659 (void) dlclose(dlhandle); 660 dlhandle = NULL; 661 } 662 663 c_locale: 664 __wckind = _wckind_c_locale; 665 } 666 667 668 int 669 _wckind(wc) 670 wchar_t wc; 671 { 672 return (*__wckind) (wc); 673 } 674 675 676 static int 677 _wckind_c_locale(wc) 678 wchar_t wc; 679 { 680 int ret; 681 682 /* 683 * DEPEND_ON_ANSIC: L notion for the character is new in 684 * ANSI-C, k&r compiler won't work. 685 */ 686 if (iswascii(wc)) 687 ret = (iswalnum(wc) || wc == L'_') ? 0 : 1; 688 else 689 ret = wcsetno(wc) + 1; 690 691 return (ret); 692 } 693 694 /* 695 * header_chk - 696 * Called when done looking for a set mail header lines. 697 * Either a blank line was seen, or EOF was reached. 698 * 699 * Verifies if current hdrbuf of potential mail header lines 700 * is really a mail header. A mail header must be at least 2 701 * lines and more than half of them must start with one of the 702 * known mail header strings in headnames. 703 * 704 * header_chk sets hdr_state to do_hdr if hdrbuf contained a valid 705 * mail header. Otherwise, it sets hdr_state to flush_hdr. 706 * 707 * h_lines = hdrbuf index for next line to be saved; 708 * also indicates current # of lines in potential header 709 */ 710 static void 711 header_chk(void) 712 { 713 wchar_t *cp; /* ptr to current char of line */ 714 wchar_t **hp; /* ptr to current char of a valid */ 715 /* mail header string */ 716 int l; /* index */ 717 /* 718 * number of lines in hdrbuf that look 719 * like mail header lines (start with 720 * a known mail header prefix) 721 */ 722 int hdrcount = 0; 723 /* header must have at least 2 lines (h_lines > 1) */ 724 if (h_lines < 2) { 725 hdr_state = flush_hdr; 726 return; 727 } 728 /* 729 * go through each line in hdrbuf and see how many 730 * look like mail header lines 731 */ 732 for (l = 0; l < h_lines; l++) { 733 /* skip initial blanks */ 734 for (cp = hdrbuf[l]; *cp == L' '; cp++); 735 for (hp = &headnames[0]; *hp != (wchar_t *) 0; hp++) 736 if (ispref(*hp, cp)) { 737 hdrcount++; 738 break; 739 } 740 } 741 /* 742 * if over half match, we'll assume this is a header; 743 * set hdr_state to indicate whether to treat 744 * these lines as mail header (do_hdr) or not (flush_hdr) 745 */ 746 if (hdrcount > h_lines / 2) 747 hdr_state = do_hdr; 748 else 749 hdr_state = flush_hdr; 750 } 751 752 /* 753 * fill_hdrbuf - 754 * Save given input line into next element of hdrbuf, 755 * as a potential mail header line, to be processed later 756 * once we decide whether or not the contents of hdrbuf is 757 * really a mail header, via header_chk(). 758 * 759 * Does not allow hdrbuf to exceed MAXLINES lines. 760 * Dynamically allocates space for each line. If we are unable 761 * to allocate space for the current string, stop special mail 762 * header preservation at this point and continue formatting 763 * without it. 764 */ 765 static void 766 fill_hdrbuf(wchar_t line[]) 767 { 768 wchar_t *cp; /* pointer to characters in input line */ 769 int i; /* index into characters a hdrbuf line */ 770 771 if (h_lines >= MAXLINES) { 772 /* 773 * if we run over MAXLINES potential mail header 774 * lines, stop checking--this is most likely NOT a 775 * mail header; flush out the hdrbuf, then process 776 * the current 'line' normally. 777 */ 778 hdr_state = flush_hdr; 779 process_hdrbuf(); 780 prefix(line); 781 return; 782 } 783 hdrbuf[h_lines] = (wchar_t *)malloc(sizeof (wchar_t) * 784 (wslen(line) + 1)); 785 if (hdrbuf[h_lines] == NULL) { 786 perror("malloc"); 787 fprintf(stderr, "fmt: unable to do mail header preservation\n"); 788 errs++; 789 /* 790 * Can't process mail header; flush current contents 791 * of mail header and continue with no more mail 792 * header processing 793 */ 794 if (h_lines == 0) 795 /* hdrbuf is empty; process this line normally */ 796 prefix(line); 797 else { 798 hdr_state = flush_hdr; 799 for (i = 0; i < h_lines; i++) { 800 prefix(hdrbuf[i]); 801 free(hdrbuf[i]); 802 } 803 h_lines = 0; 804 } 805 hdr_state = off; 806 return; 807 } 808 /* save this line as a potential mail header line */ 809 for (i = 0, cp = line; (hdrbuf[h_lines][i] = *cp) != L'\0'; i++, cp++); 810 h_lines++; 811 } 812 813 /* 814 * process_hdrbuf - 815 * Outputs the lines currently stored in hdrbuf, according 816 * to the current hdr_state value, assumed to be either do_hdr 817 * or flush_hdr. 818 * This should be called after doing a header_chk() to verify 819 * the hdrbuf and set the hdr_state flag. 820 */ 821 static void 822 process_hdrbuf(void) 823 { 824 int i; 825 826 for (i = 0; i < h_lines; i++) { 827 prefix(hdrbuf[i]); 828 free(hdrbuf[i]); 829 } 830 hdr_state = not_in_hdr; 831 h_lines = 0; 832 } 833