1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <ctype.h> 33 #include <wctype.h> 34 #include <widec.h> 35 #include <dlfcn.h> 36 #include <locale.h> 37 #include <sys/param.h> 38 #include <string.h> 39 40 /* 41 * fmt -- format the concatenation of input files or standard input 42 * onto standard output. Designed for use with Mail ~| 43 * 44 * Syntax: fmt [ -width | -w width ] [ -cs ] [ name ... ] 45 * Author: Kurt Shoens (UCB) 12/7/78 46 */ 47 48 #define NOSTR ((wchar_t *)0) /* Null string pointer for lint */ 49 #define MAXLINES 100 /* maximum mail header lines to verify */ 50 51 wchar_t outbuf[BUFSIZ]; /* Sandbagged output line image */ 52 wchar_t *outp; /* Pointer in above */ 53 int filler; /* Filler amount in outbuf */ 54 char sobuf[BUFSIZ]; /* Global buffer */ 55 56 int pfx; /* Current leading blank count */ 57 int width = 72; /* Width that we will not exceed */ 58 int nojoin = 0; /* split lines only, don't join short ones */ 59 int errs = 0; /* Current number of errors */ 60 61 enum crown_type {c_none, c_reset, c_head, c_lead, c_fixup, c_body}; 62 enum crown_type crown_state; /* Crown margin state */ 63 int crown_head; /* The header offset */ 64 int crown_body; /* The body offset */ 65 /* currently-known initial strings found in mail headers */ 66 wchar_t *headnames[] = { 67 L"Apparently-To", L"Bcc", L"bcc", L"Cc", L"cc", L"Confirmed-By", 68 L"Content", L"content-length", L"From", L"Date", L"id", 69 L"Message-I", L"MIME-Version", L"Precedence", L"Return-Path", 70 L"Received", L"Reply-To", L"Status", L"Subject", L"To", L"X-IMAP", 71 L"X-Lines", L"X-Sender", L"X-Sun", L"X-Status", L"X-UID", 72 0}; 73 74 enum hdr_type { 75 off, /* mail header processing is off */ 76 not_in_hdr, /* not currently processing a mail header */ 77 in_hdr, /* currently filling hdrbuf with potential hdr lines */ 78 flush_hdr, /* flush hdrbuf; not a header, no special processing */ 79 do_hdr /* process hdrbuf as a mail header */ 80 }; 81 /* current state of hdrbuf */ 82 enum hdr_type hdr_state = not_in_hdr; 83 84 wchar_t *hdrbuf[MAXLINES]; /* buffer to hold potential mail header lines */ 85 int h_lines; /* index into lines of hdrbuf */ 86 87 void (*(split))(wchar_t []); 88 extern int scrwidth(wchar_t); 89 extern int ishead(char []); 90 91 92 static void fill_hdrbuf(wchar_t []); 93 static void header_chk(void); 94 static void process_hdrbuf(void); 95 static void leadin(void); 96 static void tabulate(wchar_t []); 97 static void oflush(void); 98 static void pack(wchar_t []); 99 static void msplit(wchar_t []); 100 static void csplit(wchar_t []); 101 static void _wckind_init(void); 102 static void prefix(wchar_t []); 103 static void fmt(FILE *); 104 static int setopt(char *); 105 int _wckind(wchar_t); 106 107 /* 108 * Drive the whole formatter by managing input files. Also, 109 * cause initialization of the output stuff and flush it out 110 * at the end. 111 */ 112 113 int 114 main(int argc, char **argv) 115 { 116 FILE *fi; 117 char *cp; 118 int nofile; 119 char *locale; 120 121 outp = NOSTR; 122 setbuf(stdout, sobuf); 123 setlocale(LC_ALL, ""); 124 locale = setlocale(LC_CTYPE, ""); 125 if (strcmp(locale, "C") == 0) { 126 split = csplit; 127 } else { 128 split = msplit; 129 _wckind_init(); 130 } 131 if (argc < 2) { 132 single: 133 fmt(stdin); 134 oflush(); 135 exit(0); 136 } 137 nofile = 1; 138 while (--argc) { 139 cp = *++argv; 140 if (setopt(cp)) 141 continue; 142 nofile = 0; 143 if ((fi = fopen(cp, "r")) == NULL) { 144 perror(cp); 145 errs++; 146 continue; 147 } 148 fmt(fi); 149 fclose(fi); 150 } 151 if (nofile) 152 goto single; 153 oflush(); 154 fclose(stdout); 155 return (errs); 156 } 157 158 /* 159 * Read up characters from the passed input file, forming lines, 160 * doing ^H processing, expanding tabs, stripping trailing blanks, 161 * and sending each line down for analysis. 162 */ 163 164 static void 165 fmt(FILE *fi) 166 { 167 wchar_t linebuf[BUFSIZ], canonb[BUFSIZ]; 168 wchar_t *cp, *cp2; 169 int col; 170 wchar_t c; 171 char cbuf[BUFSIZ]; /* stores wchar_t string as char string */ 172 173 c = getwc(fi); 174 while (c != EOF) { 175 /* 176 * Collect a line, doing ^H processing. 177 * Leave tabs for now. 178 */ 179 180 cp = linebuf; 181 while (c != L'\n' && c != EOF && cp-linebuf < BUFSIZ-1) { 182 if (c == L'\b') { 183 if (cp > linebuf) 184 cp--; 185 c = getwc(fi); 186 continue; 187 } 188 if (!(iswprint(c)) && c != L'\t') { 189 c = getwc(fi); 190 continue; 191 } 192 *cp++ = c; 193 c = getwc(fi); 194 } 195 *cp = L'\0'; 196 197 /* 198 * Toss anything remaining on the input line. 199 */ 200 201 while (c != L'\n' && c != EOF) 202 c = getwc(fi); 203 /* 204 * Expand tabs on the way to canonb. 205 */ 206 207 col = 0; 208 cp = linebuf; 209 cp2 = canonb; 210 while (c = *cp++) { 211 if (c != L'\t') { 212 col += scrwidth(c); 213 if (cp2-canonb < BUFSIZ-1) 214 *cp2++ = c; 215 continue; 216 } 217 do { 218 if (cp2-canonb < BUFSIZ-1) 219 *cp2++ = L' '; 220 col++; 221 } while ((col & 07) != 0); 222 } 223 224 /* 225 * Swipe trailing blanks from the line. 226 */ 227 228 for (cp2--; cp2 >= canonb && *cp2 == L' '; cp2--) { 229 } 230 *++cp2 = '\0'; 231 232 /* special processing to look for mail header lines */ 233 switch (hdr_state) { 234 case off: 235 prefix(canonb); 236 case not_in_hdr: 237 /* look for an initial mail header line */ 238 /* skip initial blanks */ 239 for (cp = canonb; *cp == L' '; cp++) { 240 } 241 /* 242 * Need to convert string from wchar_t to char, 243 * since this is what ishead() expects. Since we 244 * only want to make sure cp points to a "From" line 245 * of the email, we don't have to alloc 246 * BUFSIZ * MB_LEN_MAX to cbuf. 247 */ 248 wcstombs(cbuf, cp, (BUFSIZ - 1)); 249 if (ishead(cbuf)) { 250 hdr_state = in_hdr; 251 fill_hdrbuf(canonb); 252 } else { 253 /* no mail header line; process normally */ 254 prefix(canonb); 255 } 256 break; 257 case in_hdr: 258 /* already saw 1st mail header line; look for more */ 259 if (canonb[0] == L'\0') { 260 /* 261 * blank line means end of mail header; 262 * verify current mail header buffer 263 * then process it accordingly 264 */ 265 header_chk(); 266 process_hdrbuf(); 267 /* now process the current blank line */ 268 prefix(canonb); 269 } else 270 /* 271 * not a blank line--save this line as 272 * a potential mail header line 273 */ 274 fill_hdrbuf(canonb); 275 break; 276 } 277 if (c != EOF) 278 c = getwc(fi); 279 } 280 /* 281 * end of this file--make sure we process the stuff in 282 * hdrbuf before we're finished 283 */ 284 if (hdr_state == in_hdr) { 285 header_chk(); 286 process_hdrbuf(); 287 } 288 } 289 290 /* 291 * Take a line devoid of tabs and other garbage and determine its 292 * blank prefix. If the indent changes, call for a linebreak. 293 * If the input line is blank, echo the blank line on the output. 294 * Finally, if the line minus the prefix is a mail header, try to keep 295 * it on a line by itself. 296 */ 297 298 static void 299 prefix(wchar_t line[]) 300 { 301 wchar_t *cp; 302 int np; 303 int nosplit = 0; /* flag set if line should not be split */ 304 305 if (line[0] == L'\0') { 306 oflush(); 307 putchar('\n'); 308 if (crown_state != c_none) 309 crown_state = c_reset; 310 return; 311 } 312 for (cp = line; *cp == L' '; cp++) { 313 } 314 np = cp - line; 315 316 /* 317 * The following horrible expression attempts to avoid linebreaks 318 * when the indent changes due to a paragraph. 319 */ 320 321 if (crown_state == c_none && np != pfx && (np > pfx || abs(pfx-np) > 8)) 322 oflush(); 323 /* 324 * if this is a mail header line, don't split it; flush previous 325 * line, if any, so we don't join this line to it 326 */ 327 if (hdr_state == do_hdr) { 328 nosplit = 1; 329 oflush(); 330 } 331 /* flush previous line so we don't join this one to it */ 332 if (nojoin) 333 oflush(); 334 /* nroff-type lines starting with '.' are not split nor joined */ 335 if (!nosplit && (nosplit = (*cp == L'.'))) 336 oflush(); 337 pfx = np; 338 switch (crown_state) { 339 case c_reset: 340 crown_head = pfx; 341 crown_state = c_head; 342 break; 343 case c_lead: 344 crown_body = pfx; 345 crown_state = c_body; 346 break; 347 case c_fixup: 348 crown_body = pfx; 349 crown_state = c_body; 350 if (outp) { 351 wchar_t s[BUFSIZ]; 352 353 *outp = L'\0'; 354 wscpy(s, &outbuf[crown_head]); 355 outp = NOSTR; 356 split(s); 357 } 358 break; 359 } 360 if (nosplit) { 361 /* put whole input line onto outbuf and print it out */ 362 pack(cp); 363 oflush(); 364 } else 365 /* 366 * split puts current line onto outbuf, but splits it 367 * at word boundaries, if it exceeds desired length 368 */ 369 split(cp); 370 if (nojoin) 371 /* 372 * flush current line so next lines, if any, 373 * won't join to this one 374 */ 375 oflush(); 376 } 377 378 /* 379 * Split up the passed line into output "words" which are 380 * maximal strings of non-blanks with the blank separation 381 * attached at the end. Pass these words along to the output 382 * line packer. 383 */ 384 385 static void 386 csplit(wchar_t line[]) 387 { 388 wchar_t *cp, *cp2; 389 wchar_t word[BUFSIZ]; 390 static const wchar_t *srchlist = (const wchar_t *) L".:!?"; 391 392 cp = line; 393 while (*cp) { 394 cp2 = word; 395 396 /* 397 * Collect a 'word,' allowing it to contain escaped 398 * white space. 399 */ 400 401 while (*cp && !(iswspace(*cp))) { 402 if (*cp == '\\' && iswspace(cp[1])) 403 *cp2++ = *cp++; 404 *cp2++ = *cp++; 405 } 406 407 /* 408 * Guarantee a space at end of line. 409 * Two spaces after end of sentence punctuation. 410 */ 411 412 if (*cp == L'\0') { 413 *cp2++ = L' '; 414 if (wschr(srchlist, cp[-1]) != NULL) 415 *cp2++ = L' '; 416 } 417 while (iswspace(*cp)) 418 *cp2++ = *cp++; 419 *cp2 = L'\0'; 420 pack(word); 421 } 422 } 423 424 static void 425 msplit(wchar_t line[]) 426 { 427 wchar_t *cp, *cp2, prev; 428 wchar_t word[BUFSIZ]; 429 static const wchar_t *srchlist = (const wchar_t *) L".:!?"; 430 431 cp = line; 432 while (*cp) { 433 cp2 = word; 434 prev = *cp; 435 436 /* 437 * Collect a 'word,' allowing it to contain escaped 438 * white space. 439 */ 440 441 while (*cp) { 442 if (iswspace(*cp)) 443 break; 444 if (_wckind(*cp) != _wckind(prev)) 445 if (wcsetno(*cp) != 0 || wcsetno(prev) != 0) 446 break; 447 if (*cp == '\\' && iswspace(cp[1])) 448 *cp2++ = *cp++; 449 prev = *cp; 450 *cp2++ = *cp++; 451 } 452 453 /* 454 * Guarantee a space at end of line. 455 * Two spaces after end of sentence punctuation. 456 */ 457 458 if (*cp == L'\0') { 459 *cp2++ = L' '; 460 if (wschr(srchlist, cp[-1]) != NULL) 461 *cp2++ = L' '; 462 } 463 while (iswspace(*cp)) 464 *cp2++ = *cp++; 465 *cp2 = L'\0'; 466 pack(word); 467 } 468 } 469 470 /* 471 * Output section. 472 * Build up line images from the words passed in. Prefix 473 * each line with correct number of blanks. The buffer "outbuf" 474 * contains the current partial line image, including prefixed blanks. 475 * "outp" points to the next available space therein. When outp is NOSTR, 476 * there ain't nothing in there yet. At the bottom of this whole mess, 477 * leading tabs are reinserted. 478 */ 479 480 /* 481 * Pack a word onto the output line. If this is the beginning of 482 * the line, push on the appropriately-sized string of blanks first. 483 * If the word won't fit on the current line, flush and begin a new 484 * line. If the word is too long to fit all by itself on a line, 485 * just give it its own and hope for the best. 486 */ 487 488 static void 489 pack(wchar_t word[]) 490 { 491 wchar_t *cp; 492 int s, t; 493 494 if (outp == NOSTR) 495 leadin(); 496 t = wscol(word); 497 *outp = L'\0'; 498 s = wscol(outbuf); 499 if (t+s <= width) { 500 for (cp = word; *cp; *outp++ = *cp++) { 501 } 502 return; 503 } 504 if (s > filler) { 505 oflush(); 506 leadin(); 507 } 508 for (cp = word; *cp; *outp++ = *cp++) { 509 } 510 } 511 512 /* 513 * If there is anything on the current output line, send it on 514 * its way. Set outp to NOSTR to indicate the absence of the current 515 * line prefix. 516 */ 517 518 static void 519 oflush(void) 520 { 521 if (outp == NOSTR) 522 return; 523 *outp = L'\0'; 524 tabulate(outbuf); 525 outp = NOSTR; 526 } 527 528 /* 529 * Take the passed line buffer, insert leading tabs where possible, and 530 * output on standard output (finally). 531 */ 532 533 static void 534 tabulate(wchar_t line[]) 535 { 536 wchar_t *cp; 537 int b, t; 538 539 540 /* Toss trailing blanks in the output line */ 541 cp = line + wslen(line) - 1; 542 while (cp >= line && *cp == L' ') 543 cp--; 544 *++cp = L'\0'; 545 /* Count the leading blank space and tabulate */ 546 for (cp = line; *cp == L' '; cp++) { 547 } 548 b = cp - line; 549 t = b >> 3; 550 b &= 07; 551 if (t > 0) 552 do { 553 putc('\t', stdout); 554 } while (--t); 555 if (b > 0) 556 do { 557 putc(' ', stdout); 558 } while (--b); 559 while (*cp) 560 putwc(*cp++, stdout); 561 putc('\n', stdout); 562 } 563 564 /* 565 * Initialize the output line with the appropriate number of 566 * leading blanks. 567 */ 568 569 static void 570 leadin(void) 571 { 572 int b; 573 wchar_t *cp; 574 int l; 575 576 switch (crown_state) { 577 case c_head: 578 l = crown_head; 579 crown_state = c_lead; 580 break; 581 582 case c_lead: 583 case c_fixup: 584 l = crown_head; 585 crown_state = c_fixup; 586 break; 587 588 case c_body: 589 l = crown_body; 590 break; 591 592 default: 593 l = pfx; 594 break; 595 } 596 filler = l; 597 for (b = 0, cp = outbuf; b < l; b++) 598 *cp++ = L' '; 599 outp = cp; 600 } 601 602 /* 603 * Is s1 a prefix of s2?? 604 */ 605 606 static int 607 ispref(wchar_t *s1, wchar_t *s2) 608 { 609 610 while (*s1 != L'\0' && *s2 != L'\0') 611 if (*s1++ != *s2++) 612 return (0); 613 return (1); 614 } 615 616 /* 617 * Set an input option 618 */ 619 620 static int 621 setopt(char *cp) 622 { 623 static int ws = 0; 624 625 if (*cp == '-') { 626 if (cp[1] == 'c' && cp[2] == '\0') { 627 crown_state = c_reset; 628 return (1); 629 } 630 if (cp[1] == 's' && cp[2] == '\0') { 631 nojoin = 1; 632 return (1); 633 } 634 if (cp[1] == 'w' && cp[2] == '\0') { 635 ws++; 636 return (1); 637 } 638 width = atoi(cp+1); 639 } else if (ws) { 640 width = atoi(cp); 641 ws = 0; 642 } else 643 return (0); 644 if (width <= 0 || width >= BUFSIZ-2) { 645 fprintf(stderr, "fmt: bad width: %d\n", width); 646 exit(1); 647 } 648 return (1); 649 } 650 651 652 #define LIB_WDRESOLVE "/usr/lib/locale/%s/LC_CTYPE/wdresolve.so" 653 #define WCHKIND "_wdchkind_" 654 655 static int _wckind_c_locale(wchar_t); 656 657 static int (*__wckind)(wchar_t) = _wckind_c_locale; 658 static void *dlhandle = NULL; 659 660 661 static void 662 _wckind_init(void) 663 { 664 char *locale; 665 char path[MAXPATHLEN + 1]; 666 667 668 if (dlhandle != NULL) { 669 (void) dlclose(dlhandle); 670 dlhandle = NULL; 671 } 672 673 locale = setlocale(LC_CTYPE, NULL); 674 if (strcmp(locale, "C") == 0) 675 goto c_locale; 676 677 (void) sprintf(path, LIB_WDRESOLVE, locale); 678 679 if ((dlhandle = dlopen(path, RTLD_LAZY)) != NULL) { 680 __wckind = (int (*)(wchar_t))dlsym(dlhandle, WCHKIND); 681 if (__wckind != NULL) 682 return; 683 (void) dlclose(dlhandle); 684 dlhandle = NULL; 685 } 686 687 c_locale: 688 __wckind = _wckind_c_locale; 689 } 690 691 692 int 693 _wckind(wchar_t wc) 694 { 695 return (*__wckind) (wc); 696 } 697 698 699 static int 700 _wckind_c_locale(wchar_t wc) 701 { 702 int ret; 703 704 /* 705 * DEPEND_ON_ANSIC: L notion for the character is new in 706 * ANSI-C, k&r compiler won't work. 707 */ 708 if (iswascii(wc)) 709 ret = (iswalnum(wc) || wc == L'_') ? 0 : 1; 710 else 711 ret = wcsetno(wc) + 1; 712 713 return (ret); 714 } 715 716 /* 717 * header_chk - 718 * Called when done looking for a set mail header lines. 719 * Either a blank line was seen, or EOF was reached. 720 * 721 * Verifies if current hdrbuf of potential mail header lines 722 * is really a mail header. A mail header must be at least 2 723 * lines and more than half of them must start with one of the 724 * known mail header strings in headnames. 725 * 726 * header_chk sets hdr_state to do_hdr if hdrbuf contained a valid 727 * mail header. Otherwise, it sets hdr_state to flush_hdr. 728 * 729 * h_lines = hdrbuf index for next line to be saved; 730 * also indicates current # of lines in potential header 731 */ 732 static void 733 header_chk(void) 734 { 735 wchar_t *cp; /* ptr to current char of line */ 736 wchar_t **hp; /* ptr to current char of a valid */ 737 /* mail header string */ 738 int l; /* index */ 739 /* 740 * number of lines in hdrbuf that look 741 * like mail header lines (start with 742 * a known mail header prefix) 743 */ 744 int hdrcount = 0; 745 /* header must have at least 2 lines (h_lines > 1) */ 746 if (h_lines < 2) { 747 hdr_state = flush_hdr; 748 return; 749 } 750 /* 751 * go through each line in hdrbuf and see how many 752 * look like mail header lines 753 */ 754 for (l = 0; l < h_lines; l++) { 755 /* skip initial blanks */ 756 for (cp = hdrbuf[l]; *cp == L' '; cp++) { 757 } 758 for (hp = &headnames[0]; *hp != (wchar_t *)0; hp++) 759 if (ispref(*hp, cp)) { 760 hdrcount++; 761 break; 762 } 763 } 764 /* 765 * if over half match, we'll assume this is a header; 766 * set hdr_state to indicate whether to treat 767 * these lines as mail header (do_hdr) or not (flush_hdr) 768 */ 769 if (hdrcount > h_lines / 2) 770 hdr_state = do_hdr; 771 else 772 hdr_state = flush_hdr; 773 } 774 775 /* 776 * fill_hdrbuf - 777 * Save given input line into next element of hdrbuf, 778 * as a potential mail header line, to be processed later 779 * once we decide whether or not the contents of hdrbuf is 780 * really a mail header, via header_chk(). 781 * 782 * Does not allow hdrbuf to exceed MAXLINES lines. 783 * Dynamically allocates space for each line. If we are unable 784 * to allocate space for the current string, stop special mail 785 * header preservation at this point and continue formatting 786 * without it. 787 */ 788 static void 789 fill_hdrbuf(wchar_t line[]) 790 { 791 wchar_t *cp; /* pointer to characters in input line */ 792 int i; /* index into characters a hdrbuf line */ 793 794 if (h_lines >= MAXLINES) { 795 /* 796 * if we run over MAXLINES potential mail header 797 * lines, stop checking--this is most likely NOT a 798 * mail header; flush out the hdrbuf, then process 799 * the current 'line' normally. 800 */ 801 hdr_state = flush_hdr; 802 process_hdrbuf(); 803 prefix(line); 804 return; 805 } 806 hdrbuf[h_lines] = (wchar_t *)malloc(sizeof (wchar_t) * 807 (wslen(line) + 1)); 808 if (hdrbuf[h_lines] == NULL) { 809 perror("malloc"); 810 fprintf(stderr, "fmt: unable to do mail header preservation\n"); 811 errs++; 812 /* 813 * Can't process mail header; flush current contents 814 * of mail header and continue with no more mail 815 * header processing 816 */ 817 if (h_lines == 0) 818 /* hdrbuf is empty; process this line normally */ 819 prefix(line); 820 else { 821 hdr_state = flush_hdr; 822 for (i = 0; i < h_lines; i++) { 823 prefix(hdrbuf[i]); 824 free(hdrbuf[i]); 825 } 826 h_lines = 0; 827 } 828 hdr_state = off; 829 return; 830 } 831 /* save this line as a potential mail header line */ 832 for (i = 0, cp = line; (hdrbuf[h_lines][i] = *cp) != L'\0'; i++, cp++) { 833 } 834 h_lines++; 835 } 836 837 /* 838 * process_hdrbuf - 839 * Outputs the lines currently stored in hdrbuf, according 840 * to the current hdr_state value, assumed to be either do_hdr 841 * or flush_hdr. 842 * This should be called after doing a header_chk() to verify 843 * the hdrbuf and set the hdr_state flag. 844 */ 845 static void 846 process_hdrbuf(void) 847 { 848 int i; 849 850 for (i = 0; i < h_lines; i++) { 851 prefix(hdrbuf[i]); 852 free(hdrbuf[i]); 853 } 854 hdr_state = not_in_hdr; 855 h_lines = 0; 856 } 857