1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <ctype.h> 35 #include <wctype.h> 36 #include <widec.h> 37 #include <dlfcn.h> 38 #include <locale.h> 39 #include <sys/param.h> 40 #include <string.h> 41 42 /* 43 * fmt -- format the concatenation of input files or standard input 44 * onto standard output. Designed for use with Mail ~| 45 * 46 * Syntax: fmt [ -width | -w width ] [ -cs ] [ name ... ] 47 * Author: Kurt Shoens (UCB) 12/7/78 48 */ 49 50 #define NOSTR ((wchar_t *)0) /* Null string pointer for lint */ 51 #define MAXLINES 100 /* maximum mail header lines to verify */ 52 53 wchar_t outbuf[BUFSIZ]; /* Sandbagged output line image */ 54 wchar_t *outp; /* Pointer in above */ 55 int filler; /* Filler amount in outbuf */ 56 57 int pfx; /* Current leading blank count */ 58 int width = 72; /* Width that we will not exceed */ 59 int nojoin = 0; /* split lines only, don't join short ones */ 60 int errs = 0; /* Current number of errors */ 61 62 enum crown_type {c_none, c_reset, c_head, c_lead, c_fixup, c_body}; 63 enum crown_type crown_state; /* Crown margin state */ 64 int crown_head; /* The header offset */ 65 int crown_body; /* The body offset */ 66 /* currently-known initial strings found in mail headers */ 67 wchar_t *headnames[] = { 68 L"Apparently-To", L"Bcc", L"bcc", L"Cc", L"cc", L"Confirmed-By", 69 L"Content", L"content-length", L"From", L"Date", L"id", 70 L"Message-I", L"MIME-Version", L"Precedence", L"Return-Path", 71 L"Received", L"Reply-To", L"Status", L"Subject", L"To", L"X-IMAP", 72 L"X-Lines", L"X-Sender", L"X-Sun", L"X-Status", L"X-UID", 73 0}; 74 75 enum hdr_type { 76 off, /* mail header processing is off */ 77 not_in_hdr, /* not currently processing a mail header */ 78 in_hdr, /* currently filling hdrbuf with potential hdr lines */ 79 flush_hdr, /* flush hdrbuf; not a header, no special processing */ 80 do_hdr /* process hdrbuf as a mail header */ 81 }; 82 /* current state of hdrbuf */ 83 enum hdr_type hdr_state = not_in_hdr; 84 85 wchar_t *hdrbuf[MAXLINES]; /* buffer to hold potential mail header lines */ 86 int h_lines; /* index into lines of hdrbuf */ 87 88 void (*(split))(wchar_t []); 89 extern int scrwidth(wchar_t); 90 extern int ishead(char []); 91 92 93 static void fill_hdrbuf(wchar_t []); 94 static void header_chk(void); 95 static void process_hdrbuf(void); 96 static void leadin(void); 97 static void tabulate(wchar_t []); 98 static void oflush(void); 99 static void pack(wchar_t []); 100 static void msplit(wchar_t []); 101 static void csplit(wchar_t []); 102 static void _wckind_init(void); 103 static void prefix(wchar_t []); 104 static void fmt(FILE *); 105 static int setopt(char *); 106 int _wckind(wchar_t); 107 108 /* 109 * Drive the whole formatter by managing input files. Also, 110 * cause initialization of the output stuff and flush it out 111 * at the end. 112 */ 113 114 int 115 main(int argc, char **argv) 116 { 117 FILE *fi; 118 char sobuf[BUFSIZ]; 119 char *cp; 120 int nofile; 121 char *locale; 122 123 outp = NOSTR; 124 setbuf(stdout, sobuf); 125 setlocale(LC_ALL, ""); 126 locale = setlocale(LC_CTYPE, ""); 127 if (strcmp(locale, "C") == 0) { 128 split = csplit; 129 } else { 130 split = msplit; 131 _wckind_init(); 132 } 133 if (argc < 2) { 134 single: 135 fmt(stdin); 136 oflush(); 137 exit(0); 138 } 139 nofile = 1; 140 while (--argc) { 141 cp = *++argv; 142 if (setopt(cp)) 143 continue; 144 nofile = 0; 145 if ((fi = fopen(cp, "r")) == NULL) { 146 perror(cp); 147 errs++; 148 continue; 149 } 150 fmt(fi); 151 fclose(fi); 152 } 153 if (nofile) 154 goto single; 155 oflush(); 156 return (errs); 157 } 158 159 /* 160 * Read up characters from the passed input file, forming lines, 161 * doing ^H processing, expanding tabs, stripping trailing blanks, 162 * and sending each line down for analysis. 163 */ 164 165 static void 166 fmt(FILE *fi) 167 { 168 wchar_t linebuf[BUFSIZ], canonb[BUFSIZ]; 169 wchar_t *cp, *cp2; 170 int col; 171 wchar_t c; 172 char cbuf[BUFSIZ]; /* stores wchar_t string as char string */ 173 174 c = getwc(fi); 175 while (c != EOF) { 176 /* 177 * Collect a line, doing ^H processing. 178 * Leave tabs for now. 179 */ 180 181 cp = linebuf; 182 while (c != L'\n' && c != EOF && cp-linebuf < BUFSIZ-1) { 183 if (c == L'\b') { 184 if (cp > linebuf) 185 cp--; 186 c = getwc(fi); 187 continue; 188 } 189 if (!(iswprint(c)) && c != L'\t') { 190 c = getwc(fi); 191 continue; 192 } 193 *cp++ = c; 194 c = getwc(fi); 195 } 196 *cp = L'\0'; 197 198 /* 199 * Toss anything remaining on the input line. 200 */ 201 202 while (c != L'\n' && c != EOF) 203 c = getwc(fi); 204 /* 205 * Expand tabs on the way to canonb. 206 */ 207 208 col = 0; 209 cp = linebuf; 210 cp2 = canonb; 211 while (c = *cp++) { 212 if (c != L'\t') { 213 col += scrwidth(c); 214 if (cp2-canonb < BUFSIZ-1) 215 *cp2++ = c; 216 continue; 217 } 218 do { 219 if (cp2-canonb < BUFSIZ-1) 220 *cp2++ = L' '; 221 col++; 222 } while ((col & 07) != 0); 223 } 224 225 /* 226 * Swipe trailing blanks from the line. 227 */ 228 229 for (cp2--; cp2 >= canonb && *cp2 == L' '; cp2--); 230 *++cp2 = '\0'; 231 232 /* special processing to look for mail header lines */ 233 switch (hdr_state) { 234 case off: 235 prefix(canonb); 236 case not_in_hdr: 237 /* look for an initial mail header line */ 238 /* skip initial blanks */ 239 for (cp = canonb; *cp == L' '; cp++); 240 /* 241 * Need to convert string from wchar_t to char, 242 * since this is what ishead() expects. Since we 243 * only want to make sure cp points to a "From" line 244 * of the email, we don't have to alloc 245 * BUFSIZ * MB_LEN_MAX to cbuf. 246 */ 247 wcstombs(cbuf, cp, (BUFSIZ - 1)); 248 if (ishead(cbuf)) { 249 hdr_state = in_hdr; 250 fill_hdrbuf(canonb); 251 } else { 252 /* no mail header line; process normally */ 253 prefix(canonb); 254 } 255 break; 256 case in_hdr: 257 /* already saw 1st mail header line; look for more */ 258 if (canonb[0] == L'\0') { 259 /* 260 * blank line means end of mail header; 261 * verify current mail header buffer 262 * then process it accordingly 263 */ 264 header_chk(); 265 process_hdrbuf(); 266 /* now process the current blank line */ 267 prefix(canonb); 268 } else 269 /* 270 * not a blank line--save this line as 271 * a potential mail header line 272 */ 273 fill_hdrbuf(canonb); 274 break; 275 } 276 if (c != EOF) 277 c = getwc(fi); 278 } 279 /* 280 * end of this file--make sure we process the stuff in 281 * hdrbuf before we're finished 282 */ 283 if (hdr_state == in_hdr) { 284 header_chk(); 285 process_hdrbuf(); 286 } 287 } 288 289 /* 290 * Take a line devoid of tabs and other garbage and determine its 291 * blank prefix. If the indent changes, call for a linebreak. 292 * If the input line is blank, echo the blank line on the output. 293 * Finally, if the line minus the prefix is a mail header, try to keep 294 * it on a line by itself. 295 */ 296 297 static void 298 prefix(wchar_t line[]) 299 { 300 wchar_t *cp; 301 int np; 302 int nosplit = 0; /* flag set if line should not be split */ 303 304 if (line[0] == L'\0') { 305 oflush(); 306 putchar('\n'); 307 if (crown_state != c_none) 308 crown_state = c_reset; 309 return; 310 } 311 for (cp = line; *cp == L' '; cp++); 312 np = cp - line; 313 314 /* 315 * The following horrible expression attempts to avoid linebreaks 316 * when the indent changes due to a paragraph. 317 */ 318 319 if (crown_state == c_none && np != pfx && (np > pfx || abs(pfx-np) > 8)) 320 oflush(); 321 /* 322 * if this is a mail header line, don't split it; flush previous 323 * line, if any, so we don't join this line to it 324 */ 325 if (hdr_state == do_hdr) { 326 nosplit = 1; 327 oflush(); 328 } 329 /* flush previous line so we don't join this one to it */ 330 if (nojoin) 331 oflush(); 332 /* nroff-type lines starting with '.' are not split nor joined */ 333 if (!nosplit && (nosplit = (*cp == L'.'))) 334 oflush(); 335 pfx = np; 336 switch (crown_state) { 337 case c_reset: 338 crown_head = pfx; 339 crown_state = c_head; 340 break; 341 case c_lead: 342 crown_body = pfx; 343 crown_state = c_body; 344 break; 345 case c_fixup: 346 crown_body = pfx; 347 crown_state = c_body; 348 if (outp) { 349 wchar_t s[BUFSIZ]; 350 351 *outp = L'\0'; 352 wscpy(s, &outbuf[crown_head]); 353 outp = NOSTR; 354 split(s); 355 } 356 break; 357 } 358 if (nosplit) { 359 /* put whole input line onto outbuf and print it out */ 360 pack(cp); 361 oflush(); 362 } else 363 /* 364 * split puts current line onto outbuf, but splits it 365 * at word boundaries, if it exceeds desired length 366 */ 367 split(cp); 368 if (nojoin) 369 /* 370 * flush current line so next lines, if any, 371 * won't join to this one 372 */ 373 oflush(); 374 } 375 376 /* 377 * Split up the passed line into output "words" which are 378 * maximal strings of non-blanks with the blank separation 379 * attached at the end. Pass these words along to the output 380 * line packer. 381 */ 382 383 static void 384 csplit(wchar_t line[]) 385 { 386 wchar_t *cp, *cp2; 387 wchar_t word[BUFSIZ]; 388 static const wchar_t *srchlist = (const wchar_t *) L".:!?"; 389 390 cp = line; 391 while (*cp) { 392 cp2 = word; 393 394 /* 395 * Collect a 'word,' allowing it to contain escaped 396 * white space. 397 */ 398 399 while (*cp && !(iswspace(*cp))) { 400 if (*cp == '\\' && iswspace(cp[1])) 401 *cp2++ = *cp++; 402 *cp2++ = *cp++; 403 } 404 405 /* 406 * Guarantee a space at end of line. 407 * Two spaces after end of sentence punctuation. 408 */ 409 410 if (*cp == L'\0') { 411 *cp2++ = L' '; 412 if (wschr(srchlist, cp[-1]) != NULL) 413 *cp2++ = L' '; 414 } 415 while (iswspace(*cp)) 416 *cp2++ = *cp++; 417 *cp2 = L'\0'; 418 pack(word); 419 } 420 } 421 422 static void 423 msplit(wchar_t line[]) 424 { 425 wchar_t *cp, *cp2, prev; 426 wchar_t word[BUFSIZ]; 427 static const wchar_t *srchlist = (const wchar_t *) L".:!?"; 428 429 cp = line; 430 while (*cp) { 431 cp2 = word; 432 prev = *cp; 433 434 /* 435 * Collect a 'word,' allowing it to contain escaped 436 * white space. 437 */ 438 439 while (*cp) { 440 if (iswspace(*cp)) 441 break; 442 if (_wckind(*cp) != _wckind(prev)) 443 if (wcsetno(*cp) != 0 || wcsetno(prev) != 0) 444 break; 445 if (*cp == '\\' && iswspace(cp[1])) 446 *cp2++ = *cp++; 447 prev = *cp; 448 *cp2++ = *cp++; 449 } 450 451 /* 452 * Guarantee a space at end of line. 453 * Two spaces after end of sentence punctuation. 454 */ 455 456 if (*cp == L'\0') { 457 *cp2++ = L' '; 458 if (wschr(srchlist, cp[-1]) != NULL) 459 *cp2++ = L' '; 460 } 461 while (iswspace(*cp)) 462 *cp2++ = *cp++; 463 *cp2 = L'\0'; 464 pack(word); 465 } 466 } 467 468 /* 469 * Output section. 470 * Build up line images from the words passed in. Prefix 471 * each line with correct number of blanks. The buffer "outbuf" 472 * contains the current partial line image, including prefixed blanks. 473 * "outp" points to the next available space therein. When outp is NOSTR, 474 * there ain't nothing in there yet. At the bottom of this whole mess, 475 * leading tabs are reinserted. 476 */ 477 478 /* 479 * Pack a word onto the output line. If this is the beginning of 480 * the line, push on the appropriately-sized string of blanks first. 481 * If the word won't fit on the current line, flush and begin a new 482 * line. If the word is too long to fit all by itself on a line, 483 * just give it its own and hope for the best. 484 */ 485 486 static void 487 pack(wchar_t word[]) 488 { 489 wchar_t *cp; 490 int s, t; 491 492 if (outp == NOSTR) 493 leadin(); 494 t = wscol(word); 495 *outp = L'\0'; 496 s = wscol(outbuf); 497 if (t+s <= width) { 498 for (cp = word; *cp; *outp++ = *cp++); 499 return; 500 } 501 if (s > filler) { 502 oflush(); 503 leadin(); 504 } 505 for (cp = word; *cp; *outp++ = *cp++); 506 } 507 508 /* 509 * If there is anything on the current output line, send it on 510 * its way. Set outp to NOSTR to indicate the absence of the current 511 * line prefix. 512 */ 513 514 static void 515 oflush(void) 516 { 517 if (outp == NOSTR) 518 return; 519 *outp = L'\0'; 520 tabulate(outbuf); 521 outp = NOSTR; 522 } 523 524 /* 525 * Take the passed line buffer, insert leading tabs where possible, and 526 * output on standard output (finally). 527 */ 528 529 static void 530 tabulate(wchar_t line[]) 531 { 532 wchar_t *cp; 533 int b, t; 534 535 536 /* Toss trailing blanks in the output line */ 537 cp = line + wslen(line) - 1; 538 while (cp >= line && *cp == L' ') 539 cp--; 540 *++cp = L'\0'; 541 /* Count the leading blank space and tabulate */ 542 for (cp = line; *cp == L' '; cp++); 543 b = cp - line; 544 t = b >> 3; 545 b &= 07; 546 if (t > 0) 547 do 548 putc('\t', stdout); 549 while (--t); 550 if (b > 0) 551 do 552 putc(' ', stdout); 553 while (--b); 554 while (*cp) 555 putwc(*cp++, stdout); 556 putc('\n', stdout); 557 } 558 559 /* 560 * Initialize the output line with the appropriate number of 561 * leading blanks. 562 */ 563 564 static void 565 leadin(void) 566 { 567 int b; 568 wchar_t *cp; 569 int l; 570 571 switch (crown_state) { 572 case c_head: 573 l = crown_head; 574 crown_state = c_lead; 575 break; 576 577 case c_lead: 578 case c_fixup: 579 l = crown_head; 580 crown_state = c_fixup; 581 break; 582 583 case c_body: 584 l = crown_body; 585 break; 586 587 default: 588 l = pfx; 589 break; 590 } 591 filler = l; 592 for (b = 0, cp = outbuf; b < l; b++) 593 *cp++ = L' '; 594 outp = cp; 595 } 596 597 /* 598 * Is s1 a prefix of s2?? 599 */ 600 601 static int 602 ispref(wchar_t *s1, wchar_t *s2) 603 { 604 605 while (*s1 != L'\0' && *s2 != L'\0') 606 if (*s1++ != *s2++) 607 return (0); 608 return (1); 609 } 610 611 /* 612 * Set an input option 613 */ 614 615 static int 616 setopt(char *cp) 617 { 618 static int ws = 0; 619 620 if (*cp == '-') { 621 if (cp[1] == 'c' && cp[2] == '\0') { 622 crown_state = c_reset; 623 return (1); 624 } 625 if (cp[1] == 's' && cp[2] == '\0') { 626 nojoin = 1; 627 return (1); 628 } 629 if (cp[1] == 'w' && cp[2] == '\0') { 630 ws++; 631 return (1); 632 } 633 width = atoi(cp+1); 634 } else if (ws) { 635 width = atoi(cp); 636 ws = 0; 637 } else 638 return (0); 639 if (width <= 0 || width >= BUFSIZ-2) { 640 fprintf(stderr, "fmt: bad width: %d\n", width); 641 exit(1); 642 } 643 return (1); 644 } 645 646 647 #define LIB_WDRESOLVE "/usr/lib/locale/%s/LC_CTYPE/wdresolve.so" 648 #define WCHKIND "_wdchkind_" 649 650 static int _wckind_c_locale(wchar_t); 651 652 static int (*__wckind)(wchar_t) = _wckind_c_locale; 653 static void *dlhandle = NULL; 654 655 656 static void 657 _wckind_init(void) 658 { 659 char *locale; 660 char path[MAXPATHLEN + 1]; 661 662 663 if (dlhandle != NULL) { 664 (void) dlclose(dlhandle); 665 dlhandle = NULL; 666 } 667 668 locale = setlocale(LC_CTYPE, NULL); 669 if (strcmp(locale, "C") == 0) 670 goto c_locale; 671 672 (void) sprintf(path, LIB_WDRESOLVE, locale); 673 674 if ((dlhandle = dlopen(path, RTLD_LAZY)) != NULL) { 675 __wckind = (int (*)(wchar_t))dlsym(dlhandle, WCHKIND); 676 if (__wckind != NULL) 677 return; 678 (void) dlclose(dlhandle); 679 dlhandle = NULL; 680 } 681 682 c_locale: 683 __wckind = _wckind_c_locale; 684 } 685 686 687 int 688 _wckind(wchar_t wc) 689 { 690 return (*__wckind) (wc); 691 } 692 693 694 static int 695 _wckind_c_locale(wchar_t wc) 696 { 697 int ret; 698 699 /* 700 * DEPEND_ON_ANSIC: L notion for the character is new in 701 * ANSI-C, k&r compiler won't work. 702 */ 703 if (iswascii(wc)) 704 ret = (iswalnum(wc) || wc == L'_') ? 0 : 1; 705 else 706 ret = wcsetno(wc) + 1; 707 708 return (ret); 709 } 710 711 /* 712 * header_chk - 713 * Called when done looking for a set mail header lines. 714 * Either a blank line was seen, or EOF was reached. 715 * 716 * Verifies if current hdrbuf of potential mail header lines 717 * is really a mail header. A mail header must be at least 2 718 * lines and more than half of them must start with one of the 719 * known mail header strings in headnames. 720 * 721 * header_chk sets hdr_state to do_hdr if hdrbuf contained a valid 722 * mail header. Otherwise, it sets hdr_state to flush_hdr. 723 * 724 * h_lines = hdrbuf index for next line to be saved; 725 * also indicates current # of lines in potential header 726 */ 727 static void 728 header_chk(void) 729 { 730 wchar_t *cp; /* ptr to current char of line */ 731 wchar_t **hp; /* ptr to current char of a valid */ 732 /* mail header string */ 733 int l; /* index */ 734 /* 735 * number of lines in hdrbuf that look 736 * like mail header lines (start with 737 * a known mail header prefix) 738 */ 739 int hdrcount = 0; 740 /* header must have at least 2 lines (h_lines > 1) */ 741 if (h_lines < 2) { 742 hdr_state = flush_hdr; 743 return; 744 } 745 /* 746 * go through each line in hdrbuf and see how many 747 * look like mail header lines 748 */ 749 for (l = 0; l < h_lines; l++) { 750 /* skip initial blanks */ 751 for (cp = hdrbuf[l]; *cp == L' '; cp++); 752 for (hp = &headnames[0]; *hp != (wchar_t *)0; hp++) 753 if (ispref(*hp, cp)) { 754 hdrcount++; 755 break; 756 } 757 } 758 /* 759 * if over half match, we'll assume this is a header; 760 * set hdr_state to indicate whether to treat 761 * these lines as mail header (do_hdr) or not (flush_hdr) 762 */ 763 if (hdrcount > h_lines / 2) 764 hdr_state = do_hdr; 765 else 766 hdr_state = flush_hdr; 767 } 768 769 /* 770 * fill_hdrbuf - 771 * Save given input line into next element of hdrbuf, 772 * as a potential mail header line, to be processed later 773 * once we decide whether or not the contents of hdrbuf is 774 * really a mail header, via header_chk(). 775 * 776 * Does not allow hdrbuf to exceed MAXLINES lines. 777 * Dynamically allocates space for each line. If we are unable 778 * to allocate space for the current string, stop special mail 779 * header preservation at this point and continue formatting 780 * without it. 781 */ 782 static void 783 fill_hdrbuf(wchar_t line[]) 784 { 785 wchar_t *cp; /* pointer to characters in input line */ 786 int i; /* index into characters a hdrbuf line */ 787 788 if (h_lines >= MAXLINES) { 789 /* 790 * if we run over MAXLINES potential mail header 791 * lines, stop checking--this is most likely NOT a 792 * mail header; flush out the hdrbuf, then process 793 * the current 'line' normally. 794 */ 795 hdr_state = flush_hdr; 796 process_hdrbuf(); 797 prefix(line); 798 return; 799 } 800 hdrbuf[h_lines] = (wchar_t *)malloc(sizeof (wchar_t) * 801 (wslen(line) + 1)); 802 if (hdrbuf[h_lines] == NULL) { 803 perror("malloc"); 804 fprintf(stderr, "fmt: unable to do mail header preservation\n"); 805 errs++; 806 /* 807 * Can't process mail header; flush current contents 808 * of mail header and continue with no more mail 809 * header processing 810 */ 811 if (h_lines == 0) 812 /* hdrbuf is empty; process this line normally */ 813 prefix(line); 814 else { 815 hdr_state = flush_hdr; 816 for (i = 0; i < h_lines; i++) { 817 prefix(hdrbuf[i]); 818 free(hdrbuf[i]); 819 } 820 h_lines = 0; 821 } 822 hdr_state = off; 823 return; 824 } 825 /* save this line as a potential mail header line */ 826 for (i = 0, cp = line; (hdrbuf[h_lines][i] = *cp) != L'\0'; i++, cp++); 827 h_lines++; 828 } 829 830 /* 831 * process_hdrbuf - 832 * Outputs the lines currently stored in hdrbuf, according 833 * to the current hdr_state value, assumed to be either do_hdr 834 * or flush_hdr. 835 * This should be called after doing a header_chk() to verify 836 * the hdrbuf and set the hdr_state flag. 837 */ 838 static void 839 process_hdrbuf(void) 840 { 841 int i; 842 843 for (i = 0; i < h_lines; i++) { 844 prefix(hdrbuf[i]); 845 free(hdrbuf[i]); 846 } 847 hdr_state = not_in_hdr; 848 h_lines = 0; 849 } 850