1 /* 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net> 13 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 14 Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org> 15 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> 16 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 17 Copyright (c) 2021 Dong-hee Na <donghee.na@python.org> 18 Licensed under the MIT license: 19 20 Permission is hereby granted, free of charge, to any person obtaining 21 a copy of this software and associated documentation files (the 22 "Software"), to deal in the Software without restriction, including 23 without limitation the rights to use, copy, modify, merge, publish, 24 distribute, sublicense, and/or sell copies of the Software, and to permit 25 persons to whom the Software is furnished to do so, subject to the 26 following conditions: 27 28 The above copyright notice and this permission notice shall be included 29 in all copies or substantial portions of the Software. 30 31 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 32 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 33 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 34 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 35 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 36 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 37 USE OR OTHER DEALINGS IN THE SOFTWARE. 38 */ 39 40 #include <expat_config.h> 41 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <stddef.h> 45 #include <string.h> 46 #include <fcntl.h> 47 48 #ifdef _WIN32 49 # include "winconfig.h" 50 #endif 51 52 #include "expat.h" 53 #include "internal.h" /* for UNUSED_P only */ 54 #include "xmlfile.h" 55 #include "xmltchar.h" 56 #include "filemap.h" 57 58 #if defined(_MSC_VER) 59 # include <io.h> 60 #endif 61 62 #ifdef HAVE_UNISTD_H 63 # include <unistd.h> 64 #endif 65 66 #ifndef O_BINARY 67 # ifdef _O_BINARY 68 # define O_BINARY _O_BINARY 69 # else 70 # define O_BINARY 0 71 # endif 72 #endif 73 74 #ifdef _DEBUG 75 # define READ_SIZE 16 76 #else 77 # define READ_SIZE (1024 * 8) 78 #endif 79 80 typedef struct { 81 XML_Parser parser; 82 int *retPtr; 83 } PROCESS_ARGS; 84 85 static int processStream(const XML_Char *filename, XML_Parser parser); 86 87 static void 88 reportError(XML_Parser parser, const XML_Char *filename) { 89 enum XML_Error code = XML_GetErrorCode(parser); 90 const XML_Char *message = XML_ErrorString(code); 91 if (message) 92 ftprintf(stdout, 93 T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%") 94 T(XML_FMT_INT_MOD) T("u") T(": %s\n"), 95 filename, XML_GetErrorLineNumber(parser), 96 XML_GetErrorColumnNumber(parser), message); 97 else 98 ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code); 99 } 100 101 /* This implementation will give problems on files larger than INT_MAX. */ 102 static void 103 processFile(const void *data, size_t size, const XML_Char *filename, 104 void *args) { 105 XML_Parser parser = ((PROCESS_ARGS *)args)->parser; 106 int *retPtr = ((PROCESS_ARGS *)args)->retPtr; 107 if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) { 108 reportError(parser, filename); 109 *retPtr = 0; 110 } else 111 *retPtr = 1; 112 } 113 114 #if defined(_WIN32) 115 116 static int 117 isAsciiLetter(XML_Char c) { 118 return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z')); 119 } 120 121 #endif /* _WIN32 */ 122 123 static const XML_Char * 124 resolveSystemId(const XML_Char *base, const XML_Char *systemId, 125 XML_Char **toFree) { 126 XML_Char *s; 127 *toFree = 0; 128 if (! base || *systemId == T('/') 129 #if defined(_WIN32) 130 || *systemId == T('\\') 131 || (isAsciiLetter(systemId[0]) && systemId[1] == T(':')) 132 #endif 133 ) 134 return systemId; 135 *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2) 136 * sizeof(XML_Char)); 137 if (! *toFree) 138 return systemId; 139 tcscpy(*toFree, base); 140 s = *toFree; 141 if (tcsrchr(s, T('/'))) 142 s = tcsrchr(s, T('/')) + 1; 143 #if defined(_WIN32) 144 if (tcsrchr(s, T('\\'))) 145 s = tcsrchr(s, T('\\')) + 1; 146 #endif 147 tcscpy(s, systemId); 148 return *toFree; 149 } 150 151 static int 152 externalEntityRefFilemap(XML_Parser parser, const XML_Char *context, 153 const XML_Char *base, const XML_Char *systemId, 154 const XML_Char *publicId) { 155 int result; 156 XML_Char *s; 157 const XML_Char *filename; 158 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); 159 int filemapRes; 160 PROCESS_ARGS args; 161 UNUSED_P(publicId); 162 args.retPtr = &result; 163 args.parser = entParser; 164 filename = resolveSystemId(base, systemId, &s); 165 XML_SetBase(entParser, filename); 166 filemapRes = filemap(filename, processFile, &args); 167 switch (filemapRes) { 168 case 0: 169 result = 0; 170 break; 171 case 2: 172 ftprintf(stderr, 173 T("%s: file too large for memory-mapping") 174 T(", switching to streaming\n"), 175 filename); 176 result = processStream(filename, entParser); 177 break; 178 } 179 free(s); 180 XML_ParserFree(entParser); 181 return result; 182 } 183 184 static int 185 processStream(const XML_Char *filename, XML_Parser parser) { 186 /* passing NULL for filename means read input from stdin */ 187 int fd = 0; /* 0 is the fileno for stdin */ 188 189 if (filename != NULL) { 190 fd = topen(filename, O_BINARY | O_RDONLY); 191 if (fd < 0) { 192 tperror(filename); 193 return 0; 194 } 195 } 196 for (;;) { 197 int nread; 198 char *buf = (char *)XML_GetBuffer(parser, READ_SIZE); 199 if (! buf) { 200 if (filename != NULL) 201 close(fd); 202 ftprintf(stderr, T("%s: out of memory\n"), 203 filename != NULL ? filename : T("xmlwf")); 204 return 0; 205 } 206 nread = read(fd, buf, READ_SIZE); 207 if (nread < 0) { 208 tperror(filename != NULL ? filename : T("STDIN")); 209 if (filename != NULL) 210 close(fd); 211 return 0; 212 } 213 if (XML_ParseBuffer(parser, nread, nread == 0) == XML_STATUS_ERROR) { 214 reportError(parser, filename != NULL ? filename : T("STDIN")); 215 if (filename != NULL) 216 close(fd); 217 return 0; 218 } 219 if (nread == 0) { 220 if (filename != NULL) 221 close(fd); 222 break; 223 ; 224 } 225 } 226 return 1; 227 } 228 229 static int 230 externalEntityRefStream(XML_Parser parser, const XML_Char *context, 231 const XML_Char *base, const XML_Char *systemId, 232 const XML_Char *publicId) { 233 XML_Char *s; 234 const XML_Char *filename; 235 int ret; 236 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); 237 UNUSED_P(publicId); 238 filename = resolveSystemId(base, systemId, &s); 239 XML_SetBase(entParser, filename); 240 ret = processStream(filename, entParser); 241 free(s); 242 XML_ParserFree(entParser); 243 return ret; 244 } 245 246 int 247 XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) { 248 int result; 249 250 if (! XML_SetBase(parser, filename)) { 251 ftprintf(stderr, T("%s: out of memory"), filename); 252 exit(1); 253 } 254 255 if (flags & XML_EXTERNAL_ENTITIES) 256 XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE) 257 ? externalEntityRefFilemap 258 : externalEntityRefStream); 259 if (flags & XML_MAP_FILE) { 260 int filemapRes; 261 PROCESS_ARGS args; 262 args.retPtr = &result; 263 args.parser = parser; 264 filemapRes = filemap(filename, processFile, &args); 265 switch (filemapRes) { 266 case 0: 267 result = 0; 268 break; 269 case 2: 270 ftprintf(stderr, 271 T("%s: file too large for memory-mapping") 272 T(", switching to streaming\n"), 273 filename); 274 result = processStream(filename, parser); 275 break; 276 } 277 } else 278 result = processStream(filename, parser); 279 return result; 280 } 281