1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <ctype.h> 35 #include <wctype.h> 36 #include <widec.h> 37 #include <dlfcn.h> 38 #include <locale.h> 39 #include <sys/param.h> 40 #include <string.h> 41 42 /* 43 * fmt -- format the concatenation of input files or standard input 44 * onto standard output. Designed for use with Mail ~| 45 * 46 * Syntax: fmt [ -width | -w width ] [ -cs ] [ name ... ] 47 * Author: Kurt Shoens (UCB) 12/7/78 48 */ 49 50 #define NOSTR ((wchar_t *)0) /* Null string pointer for lint */ 51 #define MAXLINES 100 /* maximum mail header lines to verify */ 52 53 wchar_t outbuf[BUFSIZ]; /* Sandbagged output line image */ 54 wchar_t *outp; /* Pointer in above */ 55 int filler; /* Filler amount in outbuf */ 56 57 int pfx; /* Current leading blank count */ 58 int width = 72; /* Width that we will not exceed */ 59 int nojoin = 0; /* split lines only, don't join short ones */ 60 int errs = 0; /* Current number of errors */ 61 62 enum crown_type {c_none, c_reset, c_head, c_lead, c_fixup, c_body}; 63 enum crown_type crown_state; /* Crown margin state */ 64 int crown_head; /* The header offset */ 65 int crown_body; /* The body offset */ 66 /* currently-known initial strings found in mail headers */ 67 wchar_t *headnames[] = { 68 L"Apparently-To", L"Bcc", L"bcc", L"Cc", L"cc", L"Confirmed-By", 69 L"Content", L"content-length", L"From", L"Date", L"id", 70 L"Message-I", L"MIME-Version", L"Precedence", L"Return-Path", 71 L"Received", L"Reply-To", L"Status", L"Subject", L"To", L"X-IMAP", 72 L"X-Lines", L"X-Sender", L"X-Sun", L"X-Status", L"X-UID", 73 0}; 74 75 enum hdr_type { 76 off, /* mail header processing is off */ 77 not_in_hdr, /* not currently processing a mail header */ 78 in_hdr, /* currently filling hdrbuf with potential hdr lines */ 79 flush_hdr, /* flush hdrbuf; not a header, no special processing */ 80 do_hdr /* process hdrbuf as a mail header */ 81 }; 82 /* current state of hdrbuf */ 83 enum hdr_type hdr_state = not_in_hdr; 84 85 wchar_t *hdrbuf[MAXLINES]; /* buffer to hold potential mail header lines */ 86 int h_lines; /* index into lines of hdrbuf */ 87 88 void (*(split))(wchar_t []); 89 extern int scrwidth(wchar_t); 90 extern int ishead(char []); 91 92 93 static void fill_hdrbuf(wchar_t []); 94 static void header_chk(void); 95 static void process_hdrbuf(void); 96 static void leadin(void); 97 static void tabulate(wchar_t []); 98 static void oflush(void); 99 static void pack(wchar_t []); 100 static void msplit(wchar_t []); 101 static void csplit(wchar_t []); 102 static void _wckind_init(void); 103 static void prefix(wchar_t []); 104 static void fmt(FILE *); 105 static int setopt(char *); 106 int _wckind(wchar_t); 107 108 /* 109 * Drive the whole formatter by managing input files. Also, 110 * cause initialization of the output stuff and flush it out 111 * at the end. 112 */ 113 114 int 115 main(int argc, char **argv) 116 { 117 FILE *fi; 118 char sobuf[BUFSIZ]; 119 char *cp; 120 int nofile; 121 char *locale; 122 123 outp = NOSTR; 124 setbuf(stdout, sobuf); 125 setlocale(LC_ALL, ""); 126 locale = setlocale(LC_CTYPE, ""); 127 if (strcmp(locale, "C") == 0) { 128 split = csplit; 129 } else { 130 split = msplit; 131 _wckind_init(); 132 } 133 if (argc < 2) { 134 single: 135 fmt(stdin); 136 oflush(); 137 exit(0); 138 } 139 nofile = 1; 140 while (--argc) { 141 cp = *++argv; 142 if (setopt(cp)) 143 continue; 144 nofile = 0; 145 if ((fi = fopen(cp, "r")) == NULL) { 146 perror(cp); 147 errs++; 148 continue; 149 } 150 fmt(fi); 151 fclose(fi); 152 } 153 if (nofile) 154 goto single; 155 oflush(); 156 fclose(stdout); 157 return (errs); 158 } 159 160 /* 161 * Read up characters from the passed input file, forming lines, 162 * doing ^H processing, expanding tabs, stripping trailing blanks, 163 * and sending each line down for analysis. 164 */ 165 166 static void 167 fmt(FILE *fi) 168 { 169 wchar_t linebuf[BUFSIZ], canonb[BUFSIZ]; 170 wchar_t *cp, *cp2; 171 int col; 172 wchar_t c; 173 char cbuf[BUFSIZ]; /* stores wchar_t string as char string */ 174 175 c = getwc(fi); 176 while (c != EOF) { 177 /* 178 * Collect a line, doing ^H processing. 179 * Leave tabs for now. 180 */ 181 182 cp = linebuf; 183 while (c != L'\n' && c != EOF && cp-linebuf < BUFSIZ-1) { 184 if (c == L'\b') { 185 if (cp > linebuf) 186 cp--; 187 c = getwc(fi); 188 continue; 189 } 190 if (!(iswprint(c)) && c != L'\t') { 191 c = getwc(fi); 192 continue; 193 } 194 *cp++ = c; 195 c = getwc(fi); 196 } 197 *cp = L'\0'; 198 199 /* 200 * Toss anything remaining on the input line. 201 */ 202 203 while (c != L'\n' && c != EOF) 204 c = getwc(fi); 205 /* 206 * Expand tabs on the way to canonb. 207 */ 208 209 col = 0; 210 cp = linebuf; 211 cp2 = canonb; 212 while (c = *cp++) { 213 if (c != L'\t') { 214 col += scrwidth(c); 215 if (cp2-canonb < BUFSIZ-1) 216 *cp2++ = c; 217 continue; 218 } 219 do { 220 if (cp2-canonb < BUFSIZ-1) 221 *cp2++ = L' '; 222 col++; 223 } while ((col & 07) != 0); 224 } 225 226 /* 227 * Swipe trailing blanks from the line. 228 */ 229 230 for (cp2--; cp2 >= canonb && *cp2 == L' '; cp2--); 231 *++cp2 = '\0'; 232 233 /* special processing to look for mail header lines */ 234 switch (hdr_state) { 235 case off: 236 prefix(canonb); 237 case not_in_hdr: 238 /* look for an initial mail header line */ 239 /* skip initial blanks */ 240 for (cp = canonb; *cp == L' '; cp++); 241 /* 242 * Need to convert string from wchar_t to char, 243 * since this is what ishead() expects. Since we 244 * only want to make sure cp points to a "From" line 245 * of the email, we don't have to alloc 246 * BUFSIZ * MB_LEN_MAX to cbuf. 247 */ 248 wcstombs(cbuf, cp, (BUFSIZ - 1)); 249 if (ishead(cbuf)) { 250 hdr_state = in_hdr; 251 fill_hdrbuf(canonb); 252 } else { 253 /* no mail header line; process normally */ 254 prefix(canonb); 255 } 256 break; 257 case in_hdr: 258 /* already saw 1st mail header line; look for more */ 259 if (canonb[0] == L'\0') { 260 /* 261 * blank line means end of mail header; 262 * verify current mail header buffer 263 * then process it accordingly 264 */ 265 header_chk(); 266 process_hdrbuf(); 267 /* now process the current blank line */ 268 prefix(canonb); 269 } else 270 /* 271 * not a blank line--save this line as 272 * a potential mail header line 273 */ 274 fill_hdrbuf(canonb); 275 break; 276 } 277 if (c != EOF) 278 c = getwc(fi); 279 } 280 /* 281 * end of this file--make sure we process the stuff in 282 * hdrbuf before we're finished 283 */ 284 if (hdr_state == in_hdr) { 285 header_chk(); 286 process_hdrbuf(); 287 } 288 } 289 290 /* 291 * Take a line devoid of tabs and other garbage and determine its 292 * blank prefix. If the indent changes, call for a linebreak. 293 * If the input line is blank, echo the blank line on the output. 294 * Finally, if the line minus the prefix is a mail header, try to keep 295 * it on a line by itself. 296 */ 297 298 static void 299 prefix(wchar_t line[]) 300 { 301 wchar_t *cp; 302 int np; 303 int nosplit = 0; /* flag set if line should not be split */ 304 305 if (line[0] == L'\0') { 306 oflush(); 307 putchar('\n'); 308 if (crown_state != c_none) 309 crown_state = c_reset; 310 return; 311 } 312 for (cp = line; *cp == L' '; cp++); 313 np = cp - line; 314 315 /* 316 * The following horrible expression attempts to avoid linebreaks 317 * when the indent changes due to a paragraph. 318 */ 319 320 if (crown_state == c_none && np != pfx && (np > pfx || abs(pfx-np) > 8)) 321 oflush(); 322 /* 323 * if this is a mail header line, don't split it; flush previous 324 * line, if any, so we don't join this line to it 325 */ 326 if (hdr_state == do_hdr) { 327 nosplit = 1; 328 oflush(); 329 } 330 /* flush previous line so we don't join this one to it */ 331 if (nojoin) 332 oflush(); 333 /* nroff-type lines starting with '.' are not split nor joined */ 334 if (!nosplit && (nosplit = (*cp == L'.'))) 335 oflush(); 336 pfx = np; 337 switch (crown_state) { 338 case c_reset: 339 crown_head = pfx; 340 crown_state = c_head; 341 break; 342 case c_lead: 343 crown_body = pfx; 344 crown_state = c_body; 345 break; 346 case c_fixup: 347 crown_body = pfx; 348 crown_state = c_body; 349 if (outp) { 350 wchar_t s[BUFSIZ]; 351 352 *outp = L'\0'; 353 wscpy(s, &outbuf[crown_head]); 354 outp = NOSTR; 355 split(s); 356 } 357 break; 358 } 359 if (nosplit) { 360 /* put whole input line onto outbuf and print it out */ 361 pack(cp); 362 oflush(); 363 } else 364 /* 365 * split puts current line onto outbuf, but splits it 366 * at word boundaries, if it exceeds desired length 367 */ 368 split(cp); 369 if (nojoin) 370 /* 371 * flush current line so next lines, if any, 372 * won't join to this one 373 */ 374 oflush(); 375 } 376 377 /* 378 * Split up the passed line into output "words" which are 379 * maximal strings of non-blanks with the blank separation 380 * attached at the end. Pass these words along to the output 381 * line packer. 382 */ 383 384 static void 385 csplit(wchar_t line[]) 386 { 387 wchar_t *cp, *cp2; 388 wchar_t word[BUFSIZ]; 389 static const wchar_t *srchlist = (const wchar_t *) L".:!?"; 390 391 cp = line; 392 while (*cp) { 393 cp2 = word; 394 395 /* 396 * Collect a 'word,' allowing it to contain escaped 397 * white space. 398 */ 399 400 while (*cp && !(iswspace(*cp))) { 401 if (*cp == '\\' && iswspace(cp[1])) 402 *cp2++ = *cp++; 403 *cp2++ = *cp++; 404 } 405 406 /* 407 * Guarantee a space at end of line. 408 * Two spaces after end of sentence punctuation. 409 */ 410 411 if (*cp == L'\0') { 412 *cp2++ = L' '; 413 if (wschr(srchlist, cp[-1]) != NULL) 414 *cp2++ = L' '; 415 } 416 while (iswspace(*cp)) 417 *cp2++ = *cp++; 418 *cp2 = L'\0'; 419 pack(word); 420 } 421 } 422 423 static void 424 msplit(wchar_t line[]) 425 { 426 wchar_t *cp, *cp2, prev; 427 wchar_t word[BUFSIZ]; 428 static const wchar_t *srchlist = (const wchar_t *) L".:!?"; 429 430 cp = line; 431 while (*cp) { 432 cp2 = word; 433 prev = *cp; 434 435 /* 436 * Collect a 'word,' allowing it to contain escaped 437 * white space. 438 */ 439 440 while (*cp) { 441 if (iswspace(*cp)) 442 break; 443 if (_wckind(*cp) != _wckind(prev)) 444 if (wcsetno(*cp) != 0 || wcsetno(prev) != 0) 445 break; 446 if (*cp == '\\' && iswspace(cp[1])) 447 *cp2++ = *cp++; 448 prev = *cp; 449 *cp2++ = *cp++; 450 } 451 452 /* 453 * Guarantee a space at end of line. 454 * Two spaces after end of sentence punctuation. 455 */ 456 457 if (*cp == L'\0') { 458 *cp2++ = L' '; 459 if (wschr(srchlist, cp[-1]) != NULL) 460 *cp2++ = L' '; 461 } 462 while (iswspace(*cp)) 463 *cp2++ = *cp++; 464 *cp2 = L'\0'; 465 pack(word); 466 } 467 } 468 469 /* 470 * Output section. 471 * Build up line images from the words passed in. Prefix 472 * each line with correct number of blanks. The buffer "outbuf" 473 * contains the current partial line image, including prefixed blanks. 474 * "outp" points to the next available space therein. When outp is NOSTR, 475 * there ain't nothing in there yet. At the bottom of this whole mess, 476 * leading tabs are reinserted. 477 */ 478 479 /* 480 * Pack a word onto the output line. If this is the beginning of 481 * the line, push on the appropriately-sized string of blanks first. 482 * If the word won't fit on the current line, flush and begin a new 483 * line. If the word is too long to fit all by itself on a line, 484 * just give it its own and hope for the best. 485 */ 486 487 static void 488 pack(wchar_t word[]) 489 { 490 wchar_t *cp; 491 int s, t; 492 493 if (outp == NOSTR) 494 leadin(); 495 t = wscol(word); 496 *outp = L'\0'; 497 s = wscol(outbuf); 498 if (t+s <= width) { 499 for (cp = word; *cp; *outp++ = *cp++); 500 return; 501 } 502 if (s > filler) { 503 oflush(); 504 leadin(); 505 } 506 for (cp = word; *cp; *outp++ = *cp++); 507 } 508 509 /* 510 * If there is anything on the current output line, send it on 511 * its way. Set outp to NOSTR to indicate the absence of the current 512 * line prefix. 513 */ 514 515 static void 516 oflush(void) 517 { 518 if (outp == NOSTR) 519 return; 520 *outp = L'\0'; 521 tabulate(outbuf); 522 outp = NOSTR; 523 } 524 525 /* 526 * Take the passed line buffer, insert leading tabs where possible, and 527 * output on standard output (finally). 528 */ 529 530 static void 531 tabulate(wchar_t line[]) 532 { 533 wchar_t *cp; 534 int b, t; 535 536 537 /* Toss trailing blanks in the output line */ 538 cp = line + wslen(line) - 1; 539 while (cp >= line && *cp == L' ') 540 cp--; 541 *++cp = L'\0'; 542 /* Count the leading blank space and tabulate */ 543 for (cp = line; *cp == L' '; cp++); 544 b = cp - line; 545 t = b >> 3; 546 b &= 07; 547 if (t > 0) 548 do 549 putc('\t', stdout); 550 while (--t); 551 if (b > 0) 552 do 553 putc(' ', stdout); 554 while (--b); 555 while (*cp) 556 putwc(*cp++, stdout); 557 putc('\n', stdout); 558 } 559 560 /* 561 * Initialize the output line with the appropriate number of 562 * leading blanks. 563 */ 564 565 static void 566 leadin(void) 567 { 568 int b; 569 wchar_t *cp; 570 int l; 571 572 switch (crown_state) { 573 case c_head: 574 l = crown_head; 575 crown_state = c_lead; 576 break; 577 578 case c_lead: 579 case c_fixup: 580 l = crown_head; 581 crown_state = c_fixup; 582 break; 583 584 case c_body: 585 l = crown_body; 586 break; 587 588 default: 589 l = pfx; 590 break; 591 } 592 filler = l; 593 for (b = 0, cp = outbuf; b < l; b++) 594 *cp++ = L' '; 595 outp = cp; 596 } 597 598 /* 599 * Is s1 a prefix of s2?? 600 */ 601 602 static int 603 ispref(wchar_t *s1, wchar_t *s2) 604 { 605 606 while (*s1 != L'\0' && *s2 != L'\0') 607 if (*s1++ != *s2++) 608 return (0); 609 return (1); 610 } 611 612 /* 613 * Set an input option 614 */ 615 616 static int 617 setopt(char *cp) 618 { 619 static int ws = 0; 620 621 if (*cp == '-') { 622 if (cp[1] == 'c' && cp[2] == '\0') { 623 crown_state = c_reset; 624 return (1); 625 } 626 if (cp[1] == 's' && cp[2] == '\0') { 627 nojoin = 1; 628 return (1); 629 } 630 if (cp[1] == 'w' && cp[2] == '\0') { 631 ws++; 632 return (1); 633 } 634 width = atoi(cp+1); 635 } else if (ws) { 636 width = atoi(cp); 637 ws = 0; 638 } else 639 return (0); 640 if (width <= 0 || width >= BUFSIZ-2) { 641 fprintf(stderr, "fmt: bad width: %d\n", width); 642 exit(1); 643 } 644 return (1); 645 } 646 647 648 #define LIB_WDRESOLVE "/usr/lib/locale/%s/LC_CTYPE/wdresolve.so" 649 #define WCHKIND "_wdchkind_" 650 651 static int _wckind_c_locale(wchar_t); 652 653 static int (*__wckind)(wchar_t) = _wckind_c_locale; 654 static void *dlhandle = NULL; 655 656 657 static void 658 _wckind_init(void) 659 { 660 char *locale; 661 char path[MAXPATHLEN + 1]; 662 663 664 if (dlhandle != NULL) { 665 (void) dlclose(dlhandle); 666 dlhandle = NULL; 667 } 668 669 locale = setlocale(LC_CTYPE, NULL); 670 if (strcmp(locale, "C") == 0) 671 goto c_locale; 672 673 (void) sprintf(path, LIB_WDRESOLVE, locale); 674 675 if ((dlhandle = dlopen(path, RTLD_LAZY)) != NULL) { 676 __wckind = (int (*)(wchar_t))dlsym(dlhandle, WCHKIND); 677 if (__wckind != NULL) 678 return; 679 (void) dlclose(dlhandle); 680 dlhandle = NULL; 681 } 682 683 c_locale: 684 __wckind = _wckind_c_locale; 685 } 686 687 688 int 689 _wckind(wchar_t wc) 690 { 691 return (*__wckind) (wc); 692 } 693 694 695 static int 696 _wckind_c_locale(wchar_t wc) 697 { 698 int ret; 699 700 /* 701 * DEPEND_ON_ANSIC: L notion for the character is new in 702 * ANSI-C, k&r compiler won't work. 703 */ 704 if (iswascii(wc)) 705 ret = (iswalnum(wc) || wc == L'_') ? 0 : 1; 706 else 707 ret = wcsetno(wc) + 1; 708 709 return (ret); 710 } 711 712 /* 713 * header_chk - 714 * Called when done looking for a set mail header lines. 715 * Either a blank line was seen, or EOF was reached. 716 * 717 * Verifies if current hdrbuf of potential mail header lines 718 * is really a mail header. A mail header must be at least 2 719 * lines and more than half of them must start with one of the 720 * known mail header strings in headnames. 721 * 722 * header_chk sets hdr_state to do_hdr if hdrbuf contained a valid 723 * mail header. Otherwise, it sets hdr_state to flush_hdr. 724 * 725 * h_lines = hdrbuf index for next line to be saved; 726 * also indicates current # of lines in potential header 727 */ 728 static void 729 header_chk(void) 730 { 731 wchar_t *cp; /* ptr to current char of line */ 732 wchar_t **hp; /* ptr to current char of a valid */ 733 /* mail header string */ 734 int l; /* index */ 735 /* 736 * number of lines in hdrbuf that look 737 * like mail header lines (start with 738 * a known mail header prefix) 739 */ 740 int hdrcount = 0; 741 /* header must have at least 2 lines (h_lines > 1) */ 742 if (h_lines < 2) { 743 hdr_state = flush_hdr; 744 return; 745 } 746 /* 747 * go through each line in hdrbuf and see how many 748 * look like mail header lines 749 */ 750 for (l = 0; l < h_lines; l++) { 751 /* skip initial blanks */ 752 for (cp = hdrbuf[l]; *cp == L' '; cp++); 753 for (hp = &headnames[0]; *hp != (wchar_t *)0; hp++) 754 if (ispref(*hp, cp)) { 755 hdrcount++; 756 break; 757 } 758 } 759 /* 760 * if over half match, we'll assume this is a header; 761 * set hdr_state to indicate whether to treat 762 * these lines as mail header (do_hdr) or not (flush_hdr) 763 */ 764 if (hdrcount > h_lines / 2) 765 hdr_state = do_hdr; 766 else 767 hdr_state = flush_hdr; 768 } 769 770 /* 771 * fill_hdrbuf - 772 * Save given input line into next element of hdrbuf, 773 * as a potential mail header line, to be processed later 774 * once we decide whether or not the contents of hdrbuf is 775 * really a mail header, via header_chk(). 776 * 777 * Does not allow hdrbuf to exceed MAXLINES lines. 778 * Dynamically allocates space for each line. If we are unable 779 * to allocate space for the current string, stop special mail 780 * header preservation at this point and continue formatting 781 * without it. 782 */ 783 static void 784 fill_hdrbuf(wchar_t line[]) 785 { 786 wchar_t *cp; /* pointer to characters in input line */ 787 int i; /* index into characters a hdrbuf line */ 788 789 if (h_lines >= MAXLINES) { 790 /* 791 * if we run over MAXLINES potential mail header 792 * lines, stop checking--this is most likely NOT a 793 * mail header; flush out the hdrbuf, then process 794 * the current 'line' normally. 795 */ 796 hdr_state = flush_hdr; 797 process_hdrbuf(); 798 prefix(line); 799 return; 800 } 801 hdrbuf[h_lines] = (wchar_t *)malloc(sizeof (wchar_t) * 802 (wslen(line) + 1)); 803 if (hdrbuf[h_lines] == NULL) { 804 perror("malloc"); 805 fprintf(stderr, "fmt: unable to do mail header preservation\n"); 806 errs++; 807 /* 808 * Can't process mail header; flush current contents 809 * of mail header and continue with no more mail 810 * header processing 811 */ 812 if (h_lines == 0) 813 /* hdrbuf is empty; process this line normally */ 814 prefix(line); 815 else { 816 hdr_state = flush_hdr; 817 for (i = 0; i < h_lines; i++) { 818 prefix(hdrbuf[i]); 819 free(hdrbuf[i]); 820 } 821 h_lines = 0; 822 } 823 hdr_state = off; 824 return; 825 } 826 /* save this line as a potential mail header line */ 827 for (i = 0, cp = line; (hdrbuf[h_lines][i] = *cp) != L'\0'; i++, cp++); 828 h_lines++; 829 } 830 831 /* 832 * process_hdrbuf - 833 * Outputs the lines currently stored in hdrbuf, according 834 * to the current hdr_state value, assumed to be either do_hdr 835 * or flush_hdr. 836 * This should be called after doing a header_chk() to verify 837 * the hdrbuf and set the hdr_state flag. 838 */ 839 static void 840 process_hdrbuf(void) 841 { 842 int i; 843 844 for (i = 0; i < h_lines; i++) { 845 prefix(hdrbuf[i]); 846 free(hdrbuf[i]); 847 } 848 hdr_state = not_in_hdr; 849 h_lines = 0; 850 } 851