/*********************************************************************** * * * This software is part of the ast package * * Copyright (c) 2000-2008 AT&T Intellectual Property * * and is licensed under the * * Common Public License, Version 1.0 * * by AT&T Intellectual Property * * * * A copy of the License is available at * * http://www.opensource.org/licenses/cpl1.0.txt * * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * * * * Information and Software Systems Research * * AT&T Research * * Florham Park NJ * * * * Glenn Fowler * * * ***********************************************************************/ #pragma prototyped /* * Glenn Fowler * AT&T Research */ static const char usage[] = "[-?\n@(#)$Id: msgcvt (AT&T Research) 2000-05-01 $\n]" USAGE_LICENSE "[+NAME?msgcvt - convert message file to/from html]" "[+DESCRIPTION?\bmsgcvt\b reads a \bgencat\b(1) format file on the standard" " input and converts it to \bhtml\b on the standard output. The input" " file must contain the control statement \b$quote \"\b and use the \"" " character to quote message text. The output is in a form suitable for" " automatic translation by web sites like" " \bhttp://babelfish.altavista.com/\b or filters like" " \btranslate\b(1).]" "[h:html?Generate \bhtml\b from \bgencat\b(1) input. This is the default.]" "[m:msg?Generate a \bgencat\b(1) message file from (presumably translated)" " \bhtml\b. Wide characters are UTF-8 encoded.]" "[r:raw?The message file is raw message text, one message per line, with no" " quoting or line numbering.]" "[+SEE ALSO?\bgencat\b(1), \bmsgcc\b(1), \bmsggen\b(1), \btranslate\b(1)]" ; #include #include #include #define MSG_RAW (1<<0) #define MSG_SPLICE (1<<1) #define SPACE(s) (isspace(*s)&&(s+=1)||*s=='\\'&&(*(s+1)=='n'||*(s+1)=='t')&&(s+=2)) typedef void (*Convert_f)(Sfio_t*, Sfio_t*, int); typedef struct { const char* name; int code; } Code_t; static const Code_t codes[] = { "aacute", 225, "Aacute", 193, "acirc", 226, "Acirc", 194, "aelig", 230, "AElig", 198, "agrave", 224, "Agrave", 192, "amp", '&', "aring", 229, "Aring", 197, "atilde", 227, "Atilde", 195, "auml", 228, "Auml", 196, "ccedil", 231, "Ccedil", 199, "copy", 169, "eacute", 233, "Eacute", 201, "ecirc", 234, "Ecirc", 202, "egrave", 232, "Egrave", 200, "euml", 235, "Euml", 203, "gt", '>', "iacute", 237, "Iacute", 205, "icirc", 238, "Icirc", 206, "igrave", 236, "Igrave", 204, "iuml", 239, "Iuml", 207, "lt", '<', "nbsp", ' ', "ntilde", 241, "Ntilde", 209, "oacute", 243, "Oacute", 211, "ocirc", 244, "Ocirc", 212, "ograve", 242, "Ograve", 210, "oslash", 248, "Oslash", 216, "otilde", 245, "Otilde", 213, "ouml", 246, "Ouml", 214, "quot", '"', "reg", 174, "szlig", 223, "uacute", 250, "Uacute", 218, "ucirc", 251, "Ucirc", 219, "ugrave", 249, "Ugrave", 217, "uuml", 252, "Uuml", 220, "yuml", 255, }; static int decode(Sfio_t* ip) { register int c; register int i; char name[32]; if ((c = sfgetc(ip)) == EOF) return '&'; name[0] = c; i = 1; if (c != '#' && !isalpha(c)) goto bad; while ((c = sfgetc(ip)) != EOF && c != ';') { if (c == '&') i = 0; else { name[i++] = c; if (!isalnum(c) && (i > 1 || c != '#') || i >= (elementsof(name) - 1)) goto bad; } } name[i] = 0; if (name[0] == '#') { switch (c = strtol(name + 1, NiL, 10)) { case 91: c = '['; break; case 93: c = ']'; break; } } else { for (i = 0; i < elementsof(codes); i++) if (streq(codes[i].name, name)) { c = codes[i].code; break; } if (i >= elementsof(codes)) goto bad; } return c; bad: name[i] = 0; if (c == ';') error(1, "&%s: unknown HTML special character -- & assumed", name); else error(1, "&%s: invalid HTML special character -- & assumed", name); while (i--) sfungetc(ip, name[i]); return '&'; } static int sfpututf(Sfio_t* op, register int w) { if (!(w & ~0x7F)) return sfputc(op, w); else if (!(w & ~0x7FF)) sfputc(op, 0xC0 + (w >> 6)); else if (!(w & ~0xFFFF)) { sfputc(op, 0xE0 + (w >> 12)); sfputc(op, 0x80 + (w >> 6 ) & 0x3F); } else return sfputc(op, '?'); return sfputc(op, 0x80 + (w & 0x3F)); } static int sfnext(Sfio_t* ip) { register int c; while (isspace(c = sfgetc(ip))); return c; } static void html2msg(register Sfio_t* ip, register Sfio_t* op, int flags) { register int c; register int q; again: while ((c = sfgetc(ip)) != EOF) if (c == '<') { if ((c = sfnext(ip)) == 'O' && (c = sfnext(ip)) == 'L' && isspace(c = sfgetc(ip)) && (c = sfnext(ip)) == 'S' && (c = sfnext(ip)) == 'T' && (c = sfnext(ip)) == 'A' && (c = sfnext(ip)) == 'R' && (c = sfnext(ip)) == 'T' && (c = sfnext(ip)) == '=' && (c = sfnext(ip)) == '"' && (c = sfnext(ip)) == '5' && (c = sfnext(ip)) == '5' && (c = sfnext(ip)) == '0' && (c = sfnext(ip)) == '7' && (c = sfnext(ip)) == '1' && (c = sfnext(ip)) == '7' && (c = sfnext(ip)) == '"' && (c = sfnext(ip)) == '>') break; while (c != EOF && c != '>') c = sfgetc(ip); } if ((c = sfnext(ip)) != EOF) sfungetc(ip, c); q = 0; for (;;) { switch (c = sfgetc(ip)) { case EOF: break; case '&': c = decode(ip); sfpututf(op, c); if (isspace(c)) { while (isspace(c = sfgetc(ip))); if (c == EOF) break; sfungetc(ip, c); } continue; case '<': switch (c = sfnext(ip)) { case '/': if ((c = sfnext(ip)) == 'O' && (c = sfgetc(ip)) == 'L' && (c = sfnext(ip)) == '>') { if (q) { sfputc(op, q); q = '"'; } goto again; } break; case 'B': if ((c = sfgetc(ip)) == 'R' && (c = sfnext(ip)) == '>') sfputc(op, ' '); break; case 'L': if ((c = sfgetc(ip)) == 'I' && (c = sfnext(ip)) == '>' && isdigit(c = sfnext(ip))) { if (q) sfputc(op, q); else q = '"'; sfputc(op, '\n'); do { sfputc(op, c); } while (isdigit(c = sfgetc(ip))); if (c == EOF) break; sfputc(op, ' '); sfputc(op, '"'); if (isspace(c)) c = sfnext(ip); if (c == '<' && (c = sfnext(ip)) == 'L' && (c = sfgetc(ip)) == 'I' && (c = sfnext(ip)) == '>') /* great */; continue; } break; case 'P': if ((c = sfnext(ip)) == '>') sfputc(op, '\n'); else if (c == 'C' && (c = sfgetc(ip)) == 'L' && (c = sfgetc(ip)) == 'A' && (c = sfgetc(ip)) == 'S' && (c = sfgetc(ip)) == 'S' && (c = sfnext(ip)) == '=' && (c = sfnext(ip)) == '"') for (;;) { switch (c = sfgetc(ip)) { case EOF: case '"': break; case '&': c = decode(ip); sfpututf(op, c); continue; default: sfpututf(op, c); continue; } break; } break; } while (c != EOF && c != '>') c = sfgetc(ip); if (c == EOF || (c = sfgetc(ip)) == EOF) break; sfungetc(ip, c); continue; case '"': if (!flags) sfputc(op, '\\'); sfputc(op, c); continue; case '\n': if (flags) { sfputc(op, c); continue; } /*FALLTHROUGH*/ case ' ': case '\t': while ((c = sfgetc(ip)) != EOF) if (c == '&') { c = decode(ip); if (!isspace(c)) sfputc(op, ' '); sfpututf(op, c); break; } else if (!isspace(c)) { if (c == '<') { c = sfgetc(ip); if (c == EOF) break; sfungetc(ip, c); sfungetc(ip, '<'); if (c != 'L' && c != '/') sfputc(op, ' '); } else { if (c != EOF) sfungetc(ip, c); sfputc(op, ' '); } break; } continue; case '\r': case '[': case ']': continue; default: sfpututf(op, c); continue; } break; } if (q) sfputc(op, q); sfputc(op, '\n'); } static void encode(Sfio_t* op, register int c) { if (c == '<') sfprintf(op, "<"); else if (c == '>') sfprintf(op, ">"); else if (c == '"') sfprintf(op, """); else if (c == '&') sfprintf(op, "&"); else if (c == '[') sfprintf(op, "["); else if (c == ']') sfprintf(op, "]"); else sfputc(op, c); } static void msg2html(register Sfio_t* ip, register Sfio_t* op, register int flags) { register char* s; register int c; register int q; register int p; sfprintf(op, "\n"); sfprintf(op, "
    \n"); p = q = 0; while (s = sfgetr(ip, '\n', 1)) { error_info.line++; if (flags) sfprintf(op, "

    "); else { if (*s == '$') { if (p) sfprintf(op, "

    "); else p = 1; sfprintf(op, "

    \n"); continue; } p = 0; if (!isdigit(*s)) continue; sfprintf(op, "

  1. "); while (isdigit(c = *s++)) sfputc(op, c); sfprintf(op, "
  2. "); while (c && c != '"') c = *s++; if (!c) s--; else if (isspace(*s)) { s++; sfprintf(op, "
    "); } } for (;;) { switch (c = *s++) { case 0: flags &= ~MSG_SPLICE; if (q) { q = 0; sfprintf(op, "\">"); } sfputc(op, '\n'); break; case '<': sfprintf(op, "<"); continue; case '>': sfprintf(op, ">"); continue; case '&': sfprintf(op, "&"); continue; case '[': sfprintf(op, "["); continue; case ']': sfprintf(op, "]"); continue; case '$': if (!q) { q = 1; sfprintf(op, "

    "); } else sfprintf(op, "
    "); continue; } c = ' '; /*FALLTHROUGH*/ default: if (q) { q = 0; sfprintf(op, "\">"); } sfputc(op, c); continue; } break; } } sfprintf(op, "

\n"); sfprintf(op, "\n"); error_info.line = 0; } int main(int argc, char** argv) { int flags = 0; Convert_f convert = msg2html; NoP(argc); error_info.id = "msgcvt"; for (;;) { switch (optget(argv, usage)) { case 'h': convert = msg2html; continue; case 'm': convert = html2msg; continue; case 'r': flags |= MSG_RAW; continue; case '?': error(ERROR_USAGE|4, "%s", opt_info.arg); continue; case ':': error(2, "%s", opt_info.arg); continue; } break; } argv += opt_info.index; if (error_info.errors) error(ERROR_USAGE|4, "%s", optusage(NiL)); (*convert)(sfstdin, sfstdout, flags); return error_info.errors != 0; }