/*********************************************************************** * * * This software is part of the ast package * * Copyright (c) 1992-2009 AT&T Intellectual Property * * and is licensed under the * * Common Public License, Version 1.0 * * by AT&T Intellectual Property * * * * A copy of the License is available at * * http://www.opensource.org/licenses/cpl1.0.txt * * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * * * * Information and Software Systems Research * * AT&T Research * * Florham Park NJ * * * * Glenn Fowler * * David Korn * * * ***********************************************************************/ #pragma prototyped static const char usage[] = "[-?\n@(#)$Id: fmt (AT&T Research) 2007-01-02 $\n]" USAGE_LICENSE "[+NAME?fmt - simple text formatter]" "[+DESCRIPTION?\bfmt\b reads the input files and left justifies space " "separated words into lines \awidth\a characters or less in length and " "writes the lines to the standard output. The standard input is read if " "\b-\b or no files are specified. Blank lines and interword spacing are " "preserved in the output. Indentation is preserved, and lines with " "identical indentation are joined and justified.]" "[+?\bfmt\b is meant to format mail messages prior to sending, but may " "also be useful for other simple tasks. For example, in \bvi\b(1) the " "command \b:!}fmt\b will justify the lines in the current paragraph.]" "[c:crown-margin?Preserve the indentation of the first two lines within " "a paragraph, and align the left margin of each subsequent line with " "that of the second line.]" "[o:optget?Format concatenated \boptget\b(3) usage strings.]" "[s:split-only?Split lines only; do not join short lines to form longer " "ones.]" "[u:uniform-spacing?One space between words, two after sentences.]" "[w:width?Set the output line width to \acolumns\a.]#[columns:=72]" "\n\n" "[ file ... ]" "\n\n" "[+SEE ALSO?\bmailx\b(1), \bnroff\b(1), \btroff\b(1), \bvi\b(1), " "\boptget\b(3)]" ; #include #include typedef struct Fmt_s { long flags; char* outp; char* outbuf; char* endbuf; Sfio_t* in; Sfio_t* out; int indent; int nextdent; int nwords; int prefix; int quote; int retain; int section; } Fmt_t; #define INDENT 4 #define TABSZ 8 #define isoption(fp,c) ((fp)->flags&(1L<<((c)-'a'))) #define setoption(fp,c) ((fp)->flags|=(1L<<((c)-'a'))) #define clroption(fp,c) ((fp)->flags&=~(1L<<((c)-'a'))) static void outline(Fmt_t* fp) { register char* cp = fp->outbuf; int n = 0; int c; int d; if (!fp->outp) return; while (fp->outp[-1] == ' ') fp->outp--; *fp->outp = 0; while (*cp++ == ' ') n++; if (n >= TABSZ) { n /= TABSZ; cp = &fp->outbuf[TABSZ*n]; while (n--) *--cp = '\t'; } else cp = fp->outbuf; fp->nwords = 0; if (!isoption(fp, 'o')) sfputr(fp->out, cp, '\n'); else if (*cp) { n = fp->indent; if (*cp != '[') { if (*cp == ' ') cp++; n += INDENT; } while (n--) sfputc(fp->out, ' '); if (fp->quote) { if ((d = (fp->outp - cp)) <= 0) c = 0; else if ((c = fp->outp[-1]) == 'n' && d > 1 && fp->outp[-2] == '\\') c = '}'; sfprintf(fp->out, "\"%s%s\"\n", cp, c == ']' || c == '{' || c == '}' ? "" : " "); } else sfputr(fp->out, cp, '\n'); if (fp->nextdent) { fp->indent += fp->nextdent; fp->endbuf -= fp->nextdent; fp->nextdent = 0; } } fp->outp = 0; } static void split(Fmt_t* fp, char* buf, int splice) { register char* cp; register char* ep; register char* qp; register int c = 1; register int q = 0; register int n; int prefix; for (ep = buf; *ep == ' '; ep++); prefix = ep - buf; /* * preserve blank lines */ if ((*ep == 0 || *buf == '.') && !isoption(fp, 'o')) { if (*ep) prefix = strlen(buf); outline(fp); strcpy(fp->outbuf, buf); fp->outp = fp->outbuf+prefix; outline(fp); return; } if (fp->prefix < prefix && !isoption(fp, 'c')) outline(fp); if (!fp->outp || prefix < fp->prefix) fp->prefix = prefix; while (c) { cp = ep; while (*ep == ' ') ep++; if (cp != ep && isoption(fp, 'u')) cp = ep-1; while (c = *ep) { if (c == ' ') break; ep++; /* * skip over \space */ if (c == '\\' && *ep) ep++; } n = (ep-cp); if (n && isoption(fp, 'o')) { for (qp = cp; qp < ep; qp++) if (*qp == '\\') qp++; else if (*qp == '"') q = !q; if (*(ep-1) == '"') goto skip; } if (fp->nwords > 0 && &fp->outp[n] >= fp->endbuf && !fp->retain && !q) outline(fp); skip: if (fp->nwords == 0) { if (fp->prefix) memset(fp->outbuf, ' ', fp->prefix); fp->outp = &fp->outbuf[fp->prefix]; while (*cp == ' ') cp++; n = (ep-cp); } memcpy(fp->outp, cp, n); fp->outp += n; fp->nwords++; } if (isoption(fp, 's') || *buf == 0) outline(fp); else if (fp->outp) { /* * two spaces at ends of sentences */ if (!isoption(fp, 'o') && strchr(".:!?", fp->outp[-1])) *fp->outp++ = ' '; if (!splice && !fp->retain && (!fp->quote || (fp->outp - fp->outbuf) < 2 || fp->outp[-2] != '\\' || fp->outp[-1] != 'n' && fp->outp[-1] != 't' && fp->outp[-1] != ' ')) *fp->outp++ = ' '; } } static int dofmt(Fmt_t* fp) { register int c; int b; int x; int splice; char* cp; char* dp; char* ep; char* lp; char* tp; char buf[8192]; cp = 0; while (cp || (cp = sfgetr(fp->in, '\n', 0)) && !(splice = 0) && (lp = cp + sfvalue(fp->in) - 1) || (cp = sfgetr(fp->in, '\n', SF_LASTR)) && (splice = 1) && (lp = cp + sfvalue(fp->in))) { if (isoption(fp, 'o')) { if (!isoption(fp, 'i')) { setoption(fp, 'i'); b = 0; while (cp < lp) { if (*cp == ' ') b += 1; else if (*cp == '\t') b += INDENT; else break; cp++; } fp->indent = roundof(b, INDENT); } else while (cp < lp && (*cp == ' ' || *cp == '\t')) cp++; if (!isoption(fp, 'q') && cp < lp) { setoption(fp, 'q'); if (*cp == '"') { ep = lp; while (--ep > cp) if (*ep == '"') { fp->quote = 1; break; } else if (*ep != ' ' && *ep != '\t') break; } } } again: dp = buf; ep = 0; for (b = 1;; b = 0) { if (cp >= lp) { cp = 0; break; } c = *cp++; if (isoption(fp, 'o')) { if (c == '\\') { x = 0; c = ' '; cp--; while (cp < lp) { if (*cp == '\\') { cp++; if ((lp - cp) < 1) { c = '\\'; break; } if (*cp == 'n') { cp++; c = '\n'; if ((lp - cp) > 2) { if (*cp == ']' || *cp == '@' && *(cp + 1) == '(') { *dp++ = '\\'; *dp++ = 'n'; c = *cp++; break; } if (*cp == '\\' && *(cp + 1) == 'n') { cp += 2; *dp++ = '\n'; break; } } } else if (*cp == 't' || *cp == ' ') { cp++; x = 1; c = ' '; } else { if (x && dp != buf && *(dp - 1) != ' ') *dp++ = ' '; *dp++ = '\\'; c = *cp++; break; } } else if (*cp == ' ' || *cp == '\t') { cp++; c = ' '; x = 1; } else { if (x && c != '\n' && dp != buf && *(dp - 1) != ' ') *dp++ = ' '; break; } } if (c == '\n') { c = 0; goto flush; } if (c == ' ' && (dp == buf || *(dp - 1) == ' ')) continue; } else if (c == '"') { if (b || cp >= lp) { if (fp->quote) continue; fp->section = 0; } } else if (c == '\a') { *dp++ = '\\'; c = 'a'; } else if (c == '\b') { *dp++ = '\\'; c = 'b'; } else if (c == '\f') { *dp++ = '\\'; c = 'f'; } else if (c == '\v') { *dp++ = '\\'; c = 'v'; } else if (c == ']' && (cp >= lp || *cp != ':' && *cp != '#' && *cp != '!')) { if (cp < lp && *cp == ']') { cp++; *dp++ = c; } else { fp->section = 1; fp->retain = 0; flush: *dp++ = c; *dp = 0; split(fp, buf, 0); outline(fp); goto again; } } else if (fp->section) { if (c == '[') { if (b) fp->retain = 1; else { cp--; c = 0; goto flush; } fp->section = 0; } else if (c == '{') { x = 1; for (tp = cp; tp < lp; tp++) { if (*tp == '[' || *tp == '\n') break; if (*tp == ' ' || *tp == '\t' || *tp == '"') continue; if (*tp == '\\' && (lp - tp) > 1) { if (*++tp == 'n') break; if (*tp == 't' || *tp == '\n') continue; } x = 0; break; } if (x) { if (fp->endbuf > (fp->outbuf + fp->indent + 2*INDENT)) fp->nextdent = 2*INDENT; goto flush; } else fp->section = 0; } else if (c == '}') { if (fp->indent && (b || *(cp - 2) != 'f')) { if (b) { fp->indent -= 2*INDENT; fp->endbuf += 2*INDENT; } else { cp--; c = 0; } goto flush; } else fp->section = 0; } else if (c == ' ' || c == '\t') continue; else fp->section = 0; } else if (c == '?' && (cp >= lp || *cp != '?')) { if (fp->retain) { cp--; while (cp < lp && *cp != ' ' && *cp != '\t' && *cp != ']' && dp < &buf[sizeof(buf)-3]) *dp++ = *cp++; if (cp < lp && (*cp == ' ' || *cp == '\t')) *dp++ = *cp++; *dp = 0; split(fp, buf, 0); dp = buf; ep = 0; fp->retain = 0; if (fp->outp >= fp->endbuf) outline(fp); continue; } } else if (c == ' ' || c == '\t') for (c = ' '; *cp == ' ' || *cp == '\t'; cp++); } else if (c == '\b') { if (dp > buf) { dp--; if (ep) ep--; } continue; } else if (c == '\t') { /* * expand tabs */ if (!ep) ep = dp; c = isoption(fp, 'o') ? 1 : TABSZ - (dp - buf) % TABSZ; if (dp >= &buf[sizeof(buf) - c - 3]) { cp--; break; } while (c-- > 0) *dp++ = ' '; continue; } else if (!isprint(c)) continue; if (dp >= &buf[sizeof(buf) - 3]) { tp = dp; while (--tp > buf) if (isspace(*tp)) { cp -= dp - tp; dp = tp; break; } ep = 0; break; } if (c != ' ') ep = 0; else if (!ep) ep = dp; *dp++ = c; } if (ep) *ep = 0; else *dp = 0; split(fp, buf, splice); } return 0; } int b_fmt(int argc, char** argv, void *context) { register int n; char* cp; Fmt_t fmt; char outbuf[8 * 1024]; fmt.flags = 0; fmt.out = sfstdout; fmt.outbuf = outbuf; fmt.outp = 0; fmt.endbuf = &outbuf[72]; fmt.indent = 0; fmt.nextdent = 0; fmt.nwords = 0; fmt.prefix = 0; fmt.quote = 0; fmt.retain = 0; fmt.section = 1; cmdinit(argc, argv, context, ERROR_CATALOG, 0); while (n = optget(argv, usage)) switch (n) { case 'c': case 'o': case 's': case 'u': setoption(&fmt, n); break; case 'w': if (opt_info.num < TABSZ || opt_info.num>= sizeof(outbuf)) error(2, "width out of range"); fmt.endbuf = &outbuf[opt_info.num]; break; case ':': error(2, "%s", opt_info.arg); break; case '?': error(ERROR_usage(2), "%s", opt_info.arg); break; } argv += opt_info.index; if (error_info.errors) error(ERROR_usage(2), "%s", optusage(NiL)); if (isoption(&fmt, 'o')) setoption(&fmt, 'c'); if (isoption(&fmt, 's')) clroption(&fmt, 'u'); if (cp = *argv) argv++; do { if (!cp || streq(cp, "-")) fmt.in = sfstdin; else if (!(fmt.in = sfopen(NiL, cp, "r"))) { error(ERROR_system(0), "%s: cannot open", cp); error_info.errors = 1; continue; } dofmt(&fmt); if (fmt.in != sfstdin) sfclose(fmt.in); } while (cp = *argv++); outline(&fmt); if (sfsync(sfstdout)) error(ERROR_system(0), "write error"); return error_info.errors != 0; }