1 /* 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 11 Copyright (c) 2016-2018 Sebastian Pipping <sebastian@pipping.org> 12 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 13 Licensed under the MIT license: 14 15 Permission is hereby granted, free of charge, to any person obtaining 16 a copy of this software and associated documentation files (the 17 "Software"), to deal in the Software without restriction, including 18 without limitation the rights to use, copy, modify, merge, publish, 19 distribute, sublicense, and/or sell copies of the Software, and to permit 20 persons to whom the Software is furnished to do so, subject to the 21 following conditions: 22 23 The above copyright notice and this permission notice shall be included 24 in all copies or substantial portions of the Software. 25 26 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 29 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 30 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 31 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 32 USE OR OTHER DEALINGS IN THE SOFTWARE. 33 */ 34 35 #include <string.h> 36 #include "xmlmime.h" 37 38 static const char * 39 getTok(const char **pp) { 40 /* inComment means one level of nesting; inComment+1 means two levels etc */ 41 enum { inAtom, inString, init, inComment }; 42 int state = init; 43 const char *tokStart = 0; 44 for (;;) { 45 switch (**pp) { 46 case '\0': 47 if (state == inAtom) 48 return tokStart; 49 return 0; 50 case ' ': 51 case '\r': 52 case '\t': 53 case '\n': 54 if (state == inAtom) 55 return tokStart; 56 break; 57 case '(': 58 if (state == inAtom) 59 return tokStart; 60 if (state != inString) 61 state++; 62 break; 63 case ')': 64 if (state > init) 65 --state; 66 else if (state != inString) 67 return 0; 68 break; 69 case ';': 70 case '/': 71 case '=': 72 if (state == inAtom) 73 return tokStart; 74 if (state == init) 75 return (*pp)++; 76 break; 77 case '\\': 78 ++*pp; 79 if (**pp == '\0') 80 return 0; 81 break; 82 case '"': 83 switch (state) { 84 case inString: 85 ++*pp; 86 return tokStart; 87 case inAtom: 88 return tokStart; 89 case init: 90 tokStart = *pp; 91 state = inString; 92 break; 93 } 94 break; 95 default: 96 if (state == init) { 97 tokStart = *pp; 98 state = inAtom; 99 } 100 break; 101 } 102 ++*pp; 103 } 104 /* not reached */ 105 } 106 107 /* key must be lowercase ASCII */ 108 109 static int 110 matchkey(const char *start, const char *end, const char *key) { 111 if (! start) 112 return 0; 113 for (; start != end; start++, key++) 114 if (*start != *key && *start != 'A' + (*key - 'a')) 115 return 0; 116 return *key == '\0'; 117 } 118 119 void 120 getXMLCharset(const char *buf, char *charset) { 121 const char *next, *p; 122 123 charset[0] = '\0'; 124 next = buf; 125 p = getTok(&next); 126 if (matchkey(p, next, "text")) 127 strcpy(charset, "us-ascii"); 128 else if (! matchkey(p, next, "application")) 129 return; 130 p = getTok(&next); 131 if (! p || *p != '/') 132 return; 133 p = getTok(&next); 134 /* BEGIN disabled code */ 135 if (0) { 136 if (! matchkey(p, next, "xml") && charset[0] == '\0') 137 return; 138 } 139 /* END disabled code */ 140 p = getTok(&next); 141 while (p) { 142 if (*p == ';') { 143 p = getTok(&next); 144 if (matchkey(p, next, "charset")) { 145 p = getTok(&next); 146 if (p && *p == '=') { 147 p = getTok(&next); 148 if (p) { 149 char *s = charset; 150 if (*p == '"') { 151 while (++p != next - 1) { 152 if (*p == '\\') 153 ++p; 154 if (s == charset + CHARSET_MAX - 1) { 155 charset[0] = '\0'; 156 break; 157 } 158 *s++ = *p; 159 } 160 *s++ = '\0'; 161 } else { 162 if (next - p > CHARSET_MAX - 1) 163 break; 164 while (p != next) 165 *s++ = *p++; 166 *s = 0; 167 break; 168 } 169 } 170 } 171 break; 172 } 173 } else 174 p = getTok(&next); 175 } 176 } 177 178 #ifdef TEST 179 180 # include <stdio.h> 181 182 int 183 main(int argc, char *argv[]) { 184 char buf[CHARSET_MAX]; 185 if (argc <= 1) 186 return 1; 187 printf("%s\n", argv[1]); 188 getXMLCharset(argv[1], buf); 189 printf("charset=\"%s\"\n", buf); 190 return 0; 191 } 192 193 #endif /* TEST */ 194