1 /* 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2000-2017 Expat development team 11 Licensed under the MIT license: 12 13 Permission is hereby granted, free of charge, to any person obtaining 14 a copy of this software and associated documentation files (the 15 "Software"), to deal in the Software without restriction, including 16 without limitation the rights to use, copy, modify, merge, publish, 17 distribute, sublicense, and/or sell copies of the Software, and to permit 18 persons to whom the Software is furnished to do so, subject to the 19 following conditions: 20 21 The above copyright notice and this permission notice shall be included 22 in all copies or substantial portions of the Software. 23 24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 30 USE OR OTHER DEALINGS IN THE SOFTWARE. 31 */ 32 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <stddef.h> 36 #include <string.h> 37 #include <fcntl.h> 38 39 #ifdef _WIN32 40 # include "winconfig.h" 41 #elif defined(HAVE_EXPAT_CONFIG_H) 42 # include <expat_config.h> 43 #endif /* ndef _WIN32 */ 44 45 #include "expat.h" 46 #include "internal.h" /* for UNUSED_P only */ 47 #include "xmlfile.h" 48 #include "xmltchar.h" 49 #include "filemap.h" 50 51 #if defined(_MSC_VER) 52 # include <io.h> 53 #endif 54 55 #ifdef HAVE_UNISTD_H 56 # include <unistd.h> 57 #endif 58 59 #ifndef O_BINARY 60 # ifdef _O_BINARY 61 # define O_BINARY _O_BINARY 62 # else 63 # define O_BINARY 0 64 # endif 65 #endif 66 67 #ifdef _DEBUG 68 # define READ_SIZE 16 69 #else 70 # define READ_SIZE (1024 * 8) 71 #endif 72 73 typedef struct { 74 XML_Parser parser; 75 int *retPtr; 76 } PROCESS_ARGS; 77 78 static int processStream(const XML_Char *filename, XML_Parser parser); 79 80 static void 81 reportError(XML_Parser parser, const XML_Char *filename) { 82 enum XML_Error code = XML_GetErrorCode(parser); 83 const XML_Char *message = XML_ErrorString(code); 84 if (message) 85 ftprintf(stdout, 86 T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%") 87 T(XML_FMT_INT_MOD) T("u") T(": %s\n"), 88 filename, XML_GetErrorLineNumber(parser), 89 XML_GetErrorColumnNumber(parser), message); 90 else 91 ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code); 92 } 93 94 /* This implementation will give problems on files larger than INT_MAX. */ 95 static void 96 processFile(const void *data, size_t size, const XML_Char *filename, 97 void *args) { 98 XML_Parser parser = ((PROCESS_ARGS *)args)->parser; 99 int *retPtr = ((PROCESS_ARGS *)args)->retPtr; 100 if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) { 101 reportError(parser, filename); 102 *retPtr = 0; 103 } else 104 *retPtr = 1; 105 } 106 107 #if defined(_WIN32) 108 109 static int 110 isAsciiLetter(XML_Char c) { 111 return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z')); 112 } 113 114 #endif /* _WIN32 */ 115 116 static const XML_Char * 117 resolveSystemId(const XML_Char *base, const XML_Char *systemId, 118 XML_Char **toFree) { 119 XML_Char *s; 120 *toFree = 0; 121 if (! base || *systemId == T('/') 122 #if defined(_WIN32) 123 || *systemId == T('\\') 124 || (isAsciiLetter(systemId[0]) && systemId[1] == T(':')) 125 #endif 126 ) 127 return systemId; 128 *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2) 129 * sizeof(XML_Char)); 130 if (! *toFree) 131 return systemId; 132 tcscpy(*toFree, base); 133 s = *toFree; 134 if (tcsrchr(s, T('/'))) 135 s = tcsrchr(s, T('/')) + 1; 136 #if defined(_WIN32) 137 if (tcsrchr(s, T('\\'))) 138 s = tcsrchr(s, T('\\')) + 1; 139 #endif 140 tcscpy(s, systemId); 141 return *toFree; 142 } 143 144 static int 145 externalEntityRefFilemap(XML_Parser parser, const XML_Char *context, 146 const XML_Char *base, const XML_Char *systemId, 147 const XML_Char *publicId) { 148 int result; 149 XML_Char *s; 150 const XML_Char *filename; 151 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); 152 int filemapRes; 153 PROCESS_ARGS args; 154 UNUSED_P(publicId); 155 args.retPtr = &result; 156 args.parser = entParser; 157 filename = resolveSystemId(base, systemId, &s); 158 XML_SetBase(entParser, filename); 159 filemapRes = filemap(filename, processFile, &args); 160 switch (filemapRes) { 161 case 0: 162 result = 0; 163 break; 164 case 2: 165 ftprintf(stderr, 166 T("%s: file too large for memory-mapping") 167 T(", switching to streaming\n"), 168 filename); 169 result = processStream(filename, entParser); 170 break; 171 } 172 free(s); 173 XML_ParserFree(entParser); 174 return result; 175 } 176 177 static int 178 processStream(const XML_Char *filename, XML_Parser parser) { 179 /* passing NULL for filename means read intput from stdin */ 180 int fd = 0; /* 0 is the fileno for stdin */ 181 182 if (filename != NULL) { 183 fd = topen(filename, O_BINARY | O_RDONLY); 184 if (fd < 0) { 185 tperror(filename); 186 return 0; 187 } 188 } 189 for (;;) { 190 int nread; 191 char *buf = (char *)XML_GetBuffer(parser, READ_SIZE); 192 if (! buf) { 193 if (filename != NULL) 194 close(fd); 195 ftprintf(stderr, T("%s: out of memory\n"), 196 filename != NULL ? filename : T("xmlwf")); 197 return 0; 198 } 199 nread = read(fd, buf, READ_SIZE); 200 if (nread < 0) { 201 tperror(filename != NULL ? filename : T("STDIN")); 202 if (filename != NULL) 203 close(fd); 204 return 0; 205 } 206 if (XML_ParseBuffer(parser, nread, nread == 0) == XML_STATUS_ERROR) { 207 reportError(parser, filename != NULL ? filename : T("STDIN")); 208 if (filename != NULL) 209 close(fd); 210 return 0; 211 } 212 if (nread == 0) { 213 if (filename != NULL) 214 close(fd); 215 break; 216 ; 217 } 218 } 219 return 1; 220 } 221 222 static int 223 externalEntityRefStream(XML_Parser parser, const XML_Char *context, 224 const XML_Char *base, const XML_Char *systemId, 225 const XML_Char *publicId) { 226 XML_Char *s; 227 const XML_Char *filename; 228 int ret; 229 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); 230 UNUSED_P(publicId); 231 filename = resolveSystemId(base, systemId, &s); 232 XML_SetBase(entParser, filename); 233 ret = processStream(filename, entParser); 234 free(s); 235 XML_ParserFree(entParser); 236 return ret; 237 } 238 239 int 240 XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) { 241 int result; 242 243 if (! XML_SetBase(parser, filename)) { 244 ftprintf(stderr, T("%s: out of memory"), filename); 245 exit(1); 246 } 247 248 if (flags & XML_EXTERNAL_ENTITIES) 249 XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE) 250 ? externalEntityRefFilemap 251 : externalEntityRefStream); 252 if (flags & XML_MAP_FILE) { 253 int filemapRes; 254 PROCESS_ARGS args; 255 args.retPtr = &result; 256 args.parser = parser; 257 filemapRes = filemap(filename, processFile, &args); 258 switch (filemapRes) { 259 case 0: 260 result = 0; 261 break; 262 case 2: 263 ftprintf(stderr, 264 T("%s: file too large for memory-mapping") 265 T(", switching to streaming\n"), 266 filename); 267 result = processStream(filename, parser); 268 break; 269 } 270 } else 271 result = processStream(filename, parser); 272 return result; 273 } 274