/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ #include #include #include #include #include #include #include #include #include /* * fmt -- format the concatenation of input files or standard input * onto standard output. Designed for use with Mail ~| * * Syntax: fmt [ -width | -w width ] [ -cs ] [ name ... ] * Author: Kurt Shoens (UCB) 12/7/78 */ #define NOSTR ((wchar_t *)0) /* Null string pointer for lint */ #define MAXLINES 100 /* maximum mail header lines to verify */ wchar_t outbuf[BUFSIZ]; /* Sandbagged output line image */ wchar_t *outp; /* Pointer in above */ int filler; /* Filler amount in outbuf */ char sobuf[BUFSIZ]; /* Global buffer */ int pfx; /* Current leading blank count */ int width = 72; /* Width that we will not exceed */ int nojoin = 0; /* split lines only, don't join short ones */ int errs = 0; /* Current number of errors */ enum crown_type {c_none, c_reset, c_head, c_lead, c_fixup, c_body}; enum crown_type crown_state; /* Crown margin state */ int crown_head; /* The header offset */ int crown_body; /* The body offset */ /* currently-known initial strings found in mail headers */ wchar_t *headnames[] = { L"Apparently-To", L"Bcc", L"bcc", L"Cc", L"cc", L"Confirmed-By", L"Content", L"content-length", L"From", L"Date", L"id", L"Message-I", L"MIME-Version", L"Precedence", L"Return-Path", L"Received", L"Reply-To", L"Status", L"Subject", L"To", L"X-IMAP", L"X-Lines", L"X-Sender", L"X-Sun", L"X-Status", L"X-UID", 0}; enum hdr_type { off, /* mail header processing is off */ not_in_hdr, /* not currently processing a mail header */ in_hdr, /* currently filling hdrbuf with potential hdr lines */ flush_hdr, /* flush hdrbuf; not a header, no special processing */ do_hdr /* process hdrbuf as a mail header */ }; /* current state of hdrbuf */ enum hdr_type hdr_state = not_in_hdr; wchar_t *hdrbuf[MAXLINES]; /* buffer to hold potential mail header lines */ int h_lines; /* index into lines of hdrbuf */ void (*(split))(wchar_t []); extern int scrwidth(wchar_t); extern boolean_t is_headline(const char *); static void fill_hdrbuf(wchar_t []); static void header_chk(void); static void process_hdrbuf(void); static void leadin(void); static void tabulate(wchar_t []); static void oflush(void); static void pack(wchar_t []); static void msplit(wchar_t []); static void csplit(wchar_t []); static void _wckind_init(void); static void prefix(wchar_t []); static void fmt(FILE *); static int setopt(char *); int _wckind(wchar_t); /* * Drive the whole formatter by managing input files. Also, * cause initialization of the output stuff and flush it out * at the end. */ int main(int argc, char **argv) { FILE *fi; char *cp; int nofile; char *locale; outp = NOSTR; setbuf(stdout, sobuf); setlocale(LC_ALL, ""); locale = setlocale(LC_CTYPE, ""); if (strcmp(locale, "C") == 0) { split = csplit; } else { split = msplit; _wckind_init(); } if (argc < 2) { single: fmt(stdin); oflush(); exit(0); } nofile = 1; while (--argc) { cp = *++argv; if (setopt(cp)) continue; nofile = 0; if ((fi = fopen(cp, "r")) == NULL) { perror(cp); errs++; continue; } fmt(fi); fclose(fi); } if (nofile) goto single; oflush(); fclose(stdout); return (errs); } /* * Read up characters from the passed input file, forming lines, * doing ^H processing, expanding tabs, stripping trailing blanks, * and sending each line down for analysis. */ static void fmt(FILE *fi) { wchar_t linebuf[BUFSIZ], canonb[BUFSIZ]; wchar_t *cp, *cp2; int col; wchar_t c; char cbuf[BUFSIZ]; /* stores wchar_t string as char string */ c = getwc(fi); while (c != EOF) { /* * Collect a line, doing ^H processing. * Leave tabs for now. */ cp = linebuf; while (c != L'\n' && c != EOF && cp-linebuf < BUFSIZ-1) { if (c == L'\b') { if (cp > linebuf) cp--; c = getwc(fi); continue; } if (!(iswprint(c)) && c != L'\t') { c = getwc(fi); continue; } *cp++ = c; c = getwc(fi); } *cp = L'\0'; /* * Toss anything remaining on the input line. */ while (c != L'\n' && c != EOF) c = getwc(fi); /* * Expand tabs on the way to canonb. */ col = 0; cp = linebuf; cp2 = canonb; while (c = *cp++) { if (c != L'\t') { col += scrwidth(c); if (cp2-canonb < BUFSIZ-1) *cp2++ = c; continue; } do { if (cp2-canonb < BUFSIZ-1) *cp2++ = L' '; col++; } while ((col & 07) != 0); } /* * Swipe trailing blanks from the line. */ for (cp2--; cp2 >= canonb && *cp2 == L' '; cp2--) { } *++cp2 = '\0'; /* special processing to look for mail header lines */ switch (hdr_state) { case off: prefix(canonb); case not_in_hdr: /* look for an initial mail header line */ /* skip initial blanks */ for (cp = canonb; *cp == L' '; cp++) { } /* * Need to convert string from wchar_t to char, * since this is what is_headline() expects. Since we * only want to make sure cp points to a "From" line * of the email, we don't have to alloc * BUFSIZ * MB_LEN_MAX to cbuf. */ wcstombs(cbuf, cp, (BUFSIZ - 1)); if (is_headline(cbuf) == B_TRUE) { hdr_state = in_hdr; fill_hdrbuf(canonb); } else { /* no mail header line; process normally */ prefix(canonb); } break; case in_hdr: /* already saw 1st mail header line; look for more */ if (canonb[0] == L'\0') { /* * blank line means end of mail header; * verify current mail header buffer * then process it accordingly */ header_chk(); process_hdrbuf(); /* now process the current blank line */ prefix(canonb); } else /* * not a blank line--save this line as * a potential mail header line */ fill_hdrbuf(canonb); break; } if (c != EOF) c = getwc(fi); } /* * end of this file--make sure we process the stuff in * hdrbuf before we're finished */ if (hdr_state == in_hdr) { header_chk(); process_hdrbuf(); } } /* * Take a line devoid of tabs and other garbage and determine its * blank prefix. If the indent changes, call for a linebreak. * If the input line is blank, echo the blank line on the output. * Finally, if the line minus the prefix is a mail header, try to keep * it on a line by itself. */ static void prefix(wchar_t line[]) { wchar_t *cp; int np; int nosplit = 0; /* flag set if line should not be split */ if (line[0] == L'\0') { oflush(); putchar('\n'); if (crown_state != c_none) crown_state = c_reset; return; } for (cp = line; *cp == L' '; cp++) { } np = cp - line; /* * The following horrible expression attempts to avoid linebreaks * when the indent changes due to a paragraph. */ if (crown_state == c_none && np != pfx && (np > pfx || abs(pfx-np) > 8)) oflush(); /* * if this is a mail header line, don't split it; flush previous * line, if any, so we don't join this line to it */ if (hdr_state == do_hdr) { nosplit = 1; oflush(); } /* flush previous line so we don't join this one to it */ if (nojoin) oflush(); /* nroff-type lines starting with '.' are not split nor joined */ if (!nosplit && (nosplit = (*cp == L'.'))) oflush(); pfx = np; switch (crown_state) { case c_reset: crown_head = pfx; crown_state = c_head; break; case c_lead: crown_body = pfx; crown_state = c_body; break; case c_fixup: crown_body = pfx; crown_state = c_body; if (outp) { wchar_t s[BUFSIZ]; *outp = L'\0'; wscpy(s, &outbuf[crown_head]); outp = NOSTR; split(s); } break; } if (nosplit) { /* put whole input line onto outbuf and print it out */ pack(cp); oflush(); } else /* * split puts current line onto outbuf, but splits it * at word boundaries, if it exceeds desired length */ split(cp); if (nojoin) /* * flush current line so next lines, if any, * won't join to this one */ oflush(); } /* * Split up the passed line into output "words" which are * maximal strings of non-blanks with the blank separation * attached at the end. Pass these words along to the output * line packer. */ static void csplit(wchar_t line[]) { wchar_t *cp, *cp2; wchar_t word[BUFSIZ]; static const wchar_t *srchlist = (const wchar_t *) L".:!?"; cp = line; while (*cp) { cp2 = word; /* * Collect a 'word,' allowing it to contain escaped * white space. */ while (*cp && !(iswspace(*cp))) { if (*cp == '\\' && iswspace(cp[1])) *cp2++ = *cp++; *cp2++ = *cp++; } /* * Guarantee a space at end of line. * Two spaces after end of sentence punctuation. */ if (*cp == L'\0') { *cp2++ = L' '; if (wschr(srchlist, cp[-1]) != NULL) *cp2++ = L' '; } while (iswspace(*cp)) *cp2++ = *cp++; *cp2 = L'\0'; pack(word); } } static void msplit(wchar_t line[]) { wchar_t *cp, *cp2, prev; wchar_t word[BUFSIZ]; static const wchar_t *srchlist = (const wchar_t *) L".:!?"; cp = line; while (*cp) { cp2 = word; prev = *cp; /* * Collect a 'word,' allowing it to contain escaped * white space. */ while (*cp) { if (iswspace(*cp)) break; if (_wckind(*cp) != _wckind(prev)) if (wcsetno(*cp) != 0 || wcsetno(prev) != 0) break; if (*cp == '\\' && iswspace(cp[1])) *cp2++ = *cp++; prev = *cp; *cp2++ = *cp++; } /* * Guarantee a space at end of line. * Two spaces after end of sentence punctuation. */ if (*cp == L'\0') { *cp2++ = L' '; if (wschr(srchlist, cp[-1]) != NULL) *cp2++ = L' '; } while (iswspace(*cp)) *cp2++ = *cp++; *cp2 = L'\0'; pack(word); } } /* * Output section. * Build up line images from the words passed in. Prefix * each line with correct number of blanks. The buffer "outbuf" * contains the current partial line image, including prefixed blanks. * "outp" points to the next available space therein. When outp is NOSTR, * there ain't nothing in there yet. At the bottom of this whole mess, * leading tabs are reinserted. */ /* * Pack a word onto the output line. If this is the beginning of * the line, push on the appropriately-sized string of blanks first. * If the word won't fit on the current line, flush and begin a new * line. If the word is too long to fit all by itself on a line, * just give it its own and hope for the best. */ static void pack(wchar_t word[]) { wchar_t *cp; int s, t; if (outp == NOSTR) leadin(); t = wscol(word); *outp = L'\0'; s = wscol(outbuf); if (t+s <= width) { for (cp = word; *cp; *outp++ = *cp++) { } return; } if (s > filler) { oflush(); leadin(); } for (cp = word; *cp; *outp++ = *cp++) { } } /* * If there is anything on the current output line, send it on * its way. Set outp to NOSTR to indicate the absence of the current * line prefix. */ static void oflush(void) { if (outp == NOSTR) return; *outp = L'\0'; tabulate(outbuf); outp = NOSTR; } /* * Take the passed line buffer, insert leading tabs where possible, and * output on standard output (finally). */ static void tabulate(wchar_t line[]) { wchar_t *cp; int b, t; /* Toss trailing blanks in the output line */ cp = line + wslen(line) - 1; while (cp >= line && *cp == L' ') cp--; *++cp = L'\0'; /* Count the leading blank space and tabulate */ for (cp = line; *cp == L' '; cp++) { } b = cp - line; t = b >> 3; b &= 07; if (t > 0) do { putc('\t', stdout); } while (--t); if (b > 0) do { putc(' ', stdout); } while (--b); while (*cp) putwc(*cp++, stdout); putc('\n', stdout); } /* * Initialize the output line with the appropriate number of * leading blanks. */ static void leadin(void) { int b; wchar_t *cp; int l; switch (crown_state) { case c_head: l = crown_head; crown_state = c_lead; break; case c_lead: case c_fixup: l = crown_head; crown_state = c_fixup; break; case c_body: l = crown_body; break; default: l = pfx; break; } filler = l; for (b = 0, cp = outbuf; b < l; b++) *cp++ = L' '; outp = cp; } /* * Is s1 a prefix of s2?? */ static int ispref(wchar_t *s1, wchar_t *s2) { while (*s1 != L'\0' && *s2 != L'\0') if (*s1++ != *s2++) return (0); return (1); } /* * Set an input option */ static int setopt(char *cp) { static int ws = 0; if (*cp == '-') { if (cp[1] == 'c' && cp[2] == '\0') { crown_state = c_reset; return (1); } if (cp[1] == 's' && cp[2] == '\0') { nojoin = 1; return (1); } if (cp[1] == 'w' && cp[2] == '\0') { ws++; return (1); } width = atoi(cp+1); } else if (ws) { width = atoi(cp); ws = 0; } else return (0); if (width <= 0 || width >= BUFSIZ-2) { fprintf(stderr, "fmt: bad width: %d\n", width); exit(1); } return (1); } #define LIB_WDRESOLVE "/usr/lib/locale/%s/LC_CTYPE/wdresolve.so" #define WCHKIND "_wdchkind_" static int _wckind_c_locale(wchar_t); static int (*__wckind)(wchar_t) = _wckind_c_locale; static void *dlhandle = NULL; static void _wckind_init(void) { char *locale; char path[MAXPATHLEN + 1]; if (dlhandle != NULL) { (void) dlclose(dlhandle); dlhandle = NULL; } locale = setlocale(LC_CTYPE, NULL); if (strcmp(locale, "C") == 0) goto c_locale; (void) sprintf(path, LIB_WDRESOLVE, locale); if ((dlhandle = dlopen(path, RTLD_LAZY)) != NULL) { __wckind = (int (*)(wchar_t))dlsym(dlhandle, WCHKIND); if (__wckind != NULL) return; (void) dlclose(dlhandle); dlhandle = NULL; } c_locale: __wckind = _wckind_c_locale; } int _wckind(wchar_t wc) { return (*__wckind) (wc); } static int _wckind_c_locale(wchar_t wc) { int ret; /* * DEPEND_ON_ANSIC: L notion for the character is new in * ANSI-C, k&r compiler won't work. */ if (iswascii(wc)) ret = (iswalnum(wc) || wc == L'_') ? 0 : 1; else ret = wcsetno(wc) + 1; return (ret); } /* * header_chk - * Called when done looking for a set mail header lines. * Either a blank line was seen, or EOF was reached. * * Verifies if current hdrbuf of potential mail header lines * is really a mail header. A mail header must be at least 2 * lines and more than half of them must start with one of the * known mail header strings in headnames. * * header_chk sets hdr_state to do_hdr if hdrbuf contained a valid * mail header. Otherwise, it sets hdr_state to flush_hdr. * * h_lines = hdrbuf index for next line to be saved; * also indicates current # of lines in potential header */ static void header_chk(void) { wchar_t *cp; /* ptr to current char of line */ wchar_t **hp; /* ptr to current char of a valid */ /* mail header string */ int l; /* index */ /* * number of lines in hdrbuf that look * like mail header lines (start with * a known mail header prefix) */ int hdrcount = 0; /* header must have at least 2 lines (h_lines > 1) */ if (h_lines < 2) { hdr_state = flush_hdr; return; } /* * go through each line in hdrbuf and see how many * look like mail header lines */ for (l = 0; l < h_lines; l++) { /* skip initial blanks */ for (cp = hdrbuf[l]; *cp == L' '; cp++) { } for (hp = &headnames[0]; *hp != (wchar_t *)0; hp++) if (ispref(*hp, cp)) { hdrcount++; break; } } /* * if over half match, we'll assume this is a header; * set hdr_state to indicate whether to treat * these lines as mail header (do_hdr) or not (flush_hdr) */ if (hdrcount > h_lines / 2) hdr_state = do_hdr; else hdr_state = flush_hdr; } /* * fill_hdrbuf - * Save given input line into next element of hdrbuf, * as a potential mail header line, to be processed later * once we decide whether or not the contents of hdrbuf is * really a mail header, via header_chk(). * * Does not allow hdrbuf to exceed MAXLINES lines. * Dynamically allocates space for each line. If we are unable * to allocate space for the current string, stop special mail * header preservation at this point and continue formatting * without it. */ static void fill_hdrbuf(wchar_t line[]) { wchar_t *cp; /* pointer to characters in input line */ int i; /* index into characters a hdrbuf line */ if (h_lines >= MAXLINES) { /* * if we run over MAXLINES potential mail header * lines, stop checking--this is most likely NOT a * mail header; flush out the hdrbuf, then process * the current 'line' normally. */ hdr_state = flush_hdr; process_hdrbuf(); prefix(line); return; } hdrbuf[h_lines] = (wchar_t *)malloc(sizeof (wchar_t) * (wslen(line) + 1)); if (hdrbuf[h_lines] == NULL) { perror("malloc"); fprintf(stderr, "fmt: unable to do mail header preservation\n"); errs++; /* * Can't process mail header; flush current contents * of mail header and continue with no more mail * header processing */ if (h_lines == 0) /* hdrbuf is empty; process this line normally */ prefix(line); else { hdr_state = flush_hdr; for (i = 0; i < h_lines; i++) { prefix(hdrbuf[i]); free(hdrbuf[i]); } h_lines = 0; } hdr_state = off; return; } /* save this line as a potential mail header line */ for (i = 0, cp = line; (hdrbuf[h_lines][i] = *cp) != L'\0'; i++, cp++) { } h_lines++; } /* * process_hdrbuf - * Outputs the lines currently stored in hdrbuf, according * to the current hdr_state value, assumed to be either do_hdr * or flush_hdr. * This should be called after doing a header_chk() to verify * the hdrbuf and set the hdr_state flag. */ static void process_hdrbuf(void) { int i; for (i = 0; i < h_lines; i++) { prefix(hdrbuf[i]); free(hdrbuf[i]); } hdr_state = not_in_hdr; h_lines = 0; }