15bb6a25fSPoul-Henning Kamp #define CHARSET_MAX 41 25bb6a25fSPoul-Henning Kamp 35bb6a25fSPoul-Henning Kamp static const char * 45bb6a25fSPoul-Henning Kamp getTok(const char **pp) 55bb6a25fSPoul-Henning Kamp { 65bb6a25fSPoul-Henning Kamp enum { inAtom, inString, init, inComment }; 75bb6a25fSPoul-Henning Kamp int state = init; 85bb6a25fSPoul-Henning Kamp const char *tokStart = 0; 95bb6a25fSPoul-Henning Kamp for (;;) { 105bb6a25fSPoul-Henning Kamp switch (**pp) { 115bb6a25fSPoul-Henning Kamp case '\0': 125bb6a25fSPoul-Henning Kamp return 0; 135bb6a25fSPoul-Henning Kamp case ' ': 145bb6a25fSPoul-Henning Kamp case '\r': 155bb6a25fSPoul-Henning Kamp case '\t': 165bb6a25fSPoul-Henning Kamp case '\n': 175bb6a25fSPoul-Henning Kamp if (state == inAtom) 185bb6a25fSPoul-Henning Kamp return tokStart; 195bb6a25fSPoul-Henning Kamp break; 205bb6a25fSPoul-Henning Kamp case '(': 215bb6a25fSPoul-Henning Kamp if (state == inAtom) 225bb6a25fSPoul-Henning Kamp return tokStart; 235bb6a25fSPoul-Henning Kamp if (state != inString) 245bb6a25fSPoul-Henning Kamp state++; 255bb6a25fSPoul-Henning Kamp break; 265bb6a25fSPoul-Henning Kamp case ')': 275bb6a25fSPoul-Henning Kamp if (state > init) 285bb6a25fSPoul-Henning Kamp --state; 295bb6a25fSPoul-Henning Kamp else if (state != inString) 305bb6a25fSPoul-Henning Kamp return 0; 315bb6a25fSPoul-Henning Kamp break; 325bb6a25fSPoul-Henning Kamp case ';': 335bb6a25fSPoul-Henning Kamp case '/': 345bb6a25fSPoul-Henning Kamp case '=': 355bb6a25fSPoul-Henning Kamp if (state == inAtom) 365bb6a25fSPoul-Henning Kamp return tokStart; 375bb6a25fSPoul-Henning Kamp if (state == init) 385bb6a25fSPoul-Henning Kamp return (*pp)++; 395bb6a25fSPoul-Henning Kamp break; 405bb6a25fSPoul-Henning Kamp case '\\': 415bb6a25fSPoul-Henning Kamp ++*pp; 425bb6a25fSPoul-Henning Kamp if (**pp == '\0') 435bb6a25fSPoul-Henning Kamp return 0; 445bb6a25fSPoul-Henning Kamp break; 455bb6a25fSPoul-Henning Kamp case '"': 465bb6a25fSPoul-Henning Kamp switch (state) { 475bb6a25fSPoul-Henning Kamp case inString: 485bb6a25fSPoul-Henning Kamp ++*pp; 495bb6a25fSPoul-Henning Kamp return tokStart; 505bb6a25fSPoul-Henning Kamp case inAtom: 515bb6a25fSPoul-Henning Kamp return tokStart; 525bb6a25fSPoul-Henning Kamp case init: 535bb6a25fSPoul-Henning Kamp tokStart = *pp; 545bb6a25fSPoul-Henning Kamp state = inString; 555bb6a25fSPoul-Henning Kamp break; 565bb6a25fSPoul-Henning Kamp } 575bb6a25fSPoul-Henning Kamp break; 585bb6a25fSPoul-Henning Kamp default: 595bb6a25fSPoul-Henning Kamp if (state == init) { 605bb6a25fSPoul-Henning Kamp tokStart = *pp; 615bb6a25fSPoul-Henning Kamp state = inAtom; 625bb6a25fSPoul-Henning Kamp } 635bb6a25fSPoul-Henning Kamp break; 645bb6a25fSPoul-Henning Kamp } 655bb6a25fSPoul-Henning Kamp ++*pp; 665bb6a25fSPoul-Henning Kamp } 675bb6a25fSPoul-Henning Kamp /* not reached */ 685bb6a25fSPoul-Henning Kamp } 695bb6a25fSPoul-Henning Kamp 705bb6a25fSPoul-Henning Kamp /* key must be lowercase ASCII */ 715bb6a25fSPoul-Henning Kamp 725bb6a25fSPoul-Henning Kamp static int 735bb6a25fSPoul-Henning Kamp matchkey(const char *start, const char *end, const char *key) 745bb6a25fSPoul-Henning Kamp { 755bb6a25fSPoul-Henning Kamp if (!start) 765bb6a25fSPoul-Henning Kamp return 0; 775bb6a25fSPoul-Henning Kamp for (; start != end; start++, key++) 785bb6a25fSPoul-Henning Kamp if (*start != *key && *start != 'A' + (*key - 'a')) 795bb6a25fSPoul-Henning Kamp return 0; 805bb6a25fSPoul-Henning Kamp return *key == '\0'; 815bb6a25fSPoul-Henning Kamp } 825bb6a25fSPoul-Henning Kamp 835bb6a25fSPoul-Henning Kamp void 845bb6a25fSPoul-Henning Kamp getXMLCharset(const char *buf, char *charset) 855bb6a25fSPoul-Henning Kamp { 865bb6a25fSPoul-Henning Kamp const char *next, *p; 875bb6a25fSPoul-Henning Kamp 885bb6a25fSPoul-Henning Kamp charset[0] = '\0'; 895bb6a25fSPoul-Henning Kamp next = buf; 905bb6a25fSPoul-Henning Kamp p = getTok(&next); 915bb6a25fSPoul-Henning Kamp if (matchkey(p, next, "text")) 925bb6a25fSPoul-Henning Kamp strcpy(charset, "us-ascii"); 935bb6a25fSPoul-Henning Kamp else if (!matchkey(p, next, "application")) 945bb6a25fSPoul-Henning Kamp return; 955bb6a25fSPoul-Henning Kamp p = getTok(&next); 965bb6a25fSPoul-Henning Kamp if (!p || *p != '/') 975bb6a25fSPoul-Henning Kamp return; 985bb6a25fSPoul-Henning Kamp p = getTok(&next); 995bb6a25fSPoul-Henning Kamp if (matchkey(p, next, "xml")) 1005bb6a25fSPoul-Henning Kamp isXml = 1; 1015bb6a25fSPoul-Henning Kamp p = getTok(&next); 1025bb6a25fSPoul-Henning Kamp while (p) { 1035bb6a25fSPoul-Henning Kamp if (*p == ';') { 1045bb6a25fSPoul-Henning Kamp p = getTok(&next); 1055bb6a25fSPoul-Henning Kamp if (matchkey(p, next, "charset")) { 1065bb6a25fSPoul-Henning Kamp p = getTok(&next); 1075bb6a25fSPoul-Henning Kamp if (p && *p == '=') { 1085bb6a25fSPoul-Henning Kamp p = getTok(&next); 1095bb6a25fSPoul-Henning Kamp if (p) { 1105bb6a25fSPoul-Henning Kamp char *s = charset; 1115bb6a25fSPoul-Henning Kamp if (*p == '"') { 1125bb6a25fSPoul-Henning Kamp while (++p != next - 1) { 1135bb6a25fSPoul-Henning Kamp if (*p == '\\') 1145bb6a25fSPoul-Henning Kamp ++p; 1155bb6a25fSPoul-Henning Kamp if (s == charset + CHARSET_MAX - 1) { 1165bb6a25fSPoul-Henning Kamp charset[0] = '\0'; 1175bb6a25fSPoul-Henning Kamp break; 1185bb6a25fSPoul-Henning Kamp } 1195bb6a25fSPoul-Henning Kamp *s++ = *p; 1205bb6a25fSPoul-Henning Kamp } 1215bb6a25fSPoul-Henning Kamp *s++ = '\0'; 1225bb6a25fSPoul-Henning Kamp } 1235bb6a25fSPoul-Henning Kamp else { 1245bb6a25fSPoul-Henning Kamp if (next - p > CHARSET_MAX - 1) 1255bb6a25fSPoul-Henning Kamp break; 1265bb6a25fSPoul-Henning Kamp while (p != next) 1275bb6a25fSPoul-Henning Kamp *s++ = *p++; 1285bb6a25fSPoul-Henning Kamp *s = 0; 1295bb6a25fSPoul-Henning Kamp break; 1305bb6a25fSPoul-Henning Kamp } 1315bb6a25fSPoul-Henning Kamp } 1325bb6a25fSPoul-Henning Kamp } 1335bb6a25fSPoul-Henning Kamp } 1345bb6a25fSPoul-Henning Kamp } 1355bb6a25fSPoul-Henning Kamp else 1365bb6a25fSPoul-Henning Kamp p = getTok(&next); 1375bb6a25fSPoul-Henning Kamp } 1385bb6a25fSPoul-Henning Kamp } 1395bb6a25fSPoul-Henning Kamp 1405bb6a25fSPoul-Henning Kamp int 1415bb6a25fSPoul-Henning Kamp main(int argc, char **argv) 1425bb6a25fSPoul-Henning Kamp { 1435bb6a25fSPoul-Henning Kamp char buf[CHARSET_MAX]; 1445bb6a25fSPoul-Henning Kamp getXMLCharset(argv[1], buf); 1455bb6a25fSPoul-Henning Kamp printf("charset = \"%s\"\n", buf); 1465bb6a25fSPoul-Henning Kamp return 0; 1475bb6a25fSPoul-Henning Kamp } 148