1 #define CHARSET_MAX 41 2 3 static const char * 4 getTok(const char **pp) 5 { 6 enum { inAtom, inString, init, inComment }; 7 int state = init; 8 const char *tokStart = 0; 9 for (;;) { 10 switch (**pp) { 11 case '\0': 12 return 0; 13 case ' ': 14 case '\r': 15 case '\t': 16 case '\n': 17 if (state == inAtom) 18 return tokStart; 19 break; 20 case '(': 21 if (state == inAtom) 22 return tokStart; 23 if (state != inString) 24 state++; 25 break; 26 case ')': 27 if (state > init) 28 --state; 29 else if (state != inString) 30 return 0; 31 break; 32 case ';': 33 case '/': 34 case '=': 35 if (state == inAtom) 36 return tokStart; 37 if (state == init) 38 return (*pp)++; 39 break; 40 case '\\': 41 ++*pp; 42 if (**pp == '\0') 43 return 0; 44 break; 45 case '"': 46 switch (state) { 47 case inString: 48 ++*pp; 49 return tokStart; 50 case inAtom: 51 return tokStart; 52 case init: 53 tokStart = *pp; 54 state = inString; 55 break; 56 } 57 break; 58 default: 59 if (state == init) { 60 tokStart = *pp; 61 state = inAtom; 62 } 63 break; 64 } 65 ++*pp; 66 } 67 /* not reached */ 68 } 69 70 /* key must be lowercase ASCII */ 71 72 static int 73 matchkey(const char *start, const char *end, const char *key) 74 { 75 if (!start) 76 return 0; 77 for (; start != end; start++, key++) 78 if (*start != *key && *start != 'A' + (*key - 'a')) 79 return 0; 80 return *key == '\0'; 81 } 82 83 void 84 getXMLCharset(const char *buf, char *charset) 85 { 86 const char *next, *p; 87 88 charset[0] = '\0'; 89 next = buf; 90 p = getTok(&next); 91 if (matchkey(p, next, "text")) 92 strcpy(charset, "us-ascii"); 93 else if (!matchkey(p, next, "application")) 94 return; 95 p = getTok(&next); 96 if (!p || *p != '/') 97 return; 98 p = getTok(&next); 99 if (matchkey(p, next, "xml")) 100 isXml = 1; 101 p = getTok(&next); 102 while (p) { 103 if (*p == ';') { 104 p = getTok(&next); 105 if (matchkey(p, next, "charset")) { 106 p = getTok(&next); 107 if (p && *p == '=') { 108 p = getTok(&next); 109 if (p) { 110 char *s = charset; 111 if (*p == '"') { 112 while (++p != next - 1) { 113 if (*p == '\\') 114 ++p; 115 if (s == charset + CHARSET_MAX - 1) { 116 charset[0] = '\0'; 117 break; 118 } 119 *s++ = *p; 120 } 121 *s++ = '\0'; 122 } 123 else { 124 if (next - p > CHARSET_MAX - 1) 125 break; 126 while (p != next) 127 *s++ = *p++; 128 *s = 0; 129 break; 130 } 131 } 132 } 133 } 134 } 135 else 136 p = getTok(&next); 137 } 138 } 139 140 int 141 main(int argc, char **argv) 142 { 143 char buf[CHARSET_MAX]; 144 getXMLCharset(argv[1], buf); 145 printf("charset = \"%s\"\n", buf); 146 return 0; 147 } 148