1 /* 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net> 13 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 14 Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org> 15 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> 16 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 17 Copyright (c) 2021 Donghee Na <donghee.na@python.org> 18 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org> 19 Licensed under the MIT license: 20 21 Permission is hereby granted, free of charge, to any person obtaining 22 a copy of this software and associated documentation files (the 23 "Software"), to deal in the Software without restriction, including 24 without limitation the rights to use, copy, modify, merge, publish, 25 distribute, sublicense, and/or sell copies of the Software, and to permit 26 persons to whom the Software is furnished to do so, subject to the 27 following conditions: 28 29 The above copyright notice and this permission notice shall be included 30 in all copies or substantial portions of the Software. 31 32 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 33 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 34 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 35 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 36 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 37 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 38 USE OR OTHER DEALINGS IN THE SOFTWARE. 39 */ 40 41 #include "expat_config.h" 42 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <stddef.h> 46 #include <string.h> 47 #include <fcntl.h> 48 49 #ifdef _WIN32 50 # include "winconfig.h" 51 #endif 52 53 #include "expat.h" 54 #include "internal.h" /* for UNUSED_P only */ 55 #include "xmlfile.h" 56 #include "xmltchar.h" 57 #include "filemap.h" 58 59 #if defined(_MSC_VER) 60 # include <io.h> 61 #endif 62 63 #ifdef HAVE_UNISTD_H 64 # include <unistd.h> 65 #endif 66 67 #ifndef O_BINARY 68 # ifdef _O_BINARY 69 # define O_BINARY _O_BINARY 70 # else 71 # define O_BINARY 0 72 # endif 73 #endif 74 75 int g_read_size_bytes = 1024 * 8; 76 77 typedef struct { 78 XML_Parser parser; 79 int *retPtr; 80 } PROCESS_ARGS; 81 82 static int processStream(const XML_Char *filename, XML_Parser parser); 83 84 static void 85 reportError(XML_Parser parser, const XML_Char *filename) { 86 enum XML_Error code = XML_GetErrorCode(parser); 87 const XML_Char *message = XML_ErrorString(code); 88 if (message) 89 ftprintf(stdout, 90 T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%") 91 T(XML_FMT_INT_MOD) T("u") T(": %s\n"), 92 filename, XML_GetErrorLineNumber(parser), 93 XML_GetErrorColumnNumber(parser), message); 94 else 95 ftprintf(stderr, T("%s: (unknown message %u)\n"), filename, 96 (unsigned int)code); 97 } 98 99 /* This implementation will give problems on files larger than INT_MAX. */ 100 static void 101 processFile(const void *data, size_t size, const XML_Char *filename, 102 void *args) { 103 XML_Parser parser = ((PROCESS_ARGS *)args)->parser; 104 int *retPtr = ((PROCESS_ARGS *)args)->retPtr; 105 if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) { 106 reportError(parser, filename); 107 *retPtr = 0; 108 } else 109 *retPtr = 1; 110 } 111 112 #if defined(_WIN32) 113 114 static int 115 isAsciiLetter(XML_Char c) { 116 return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z')); 117 } 118 119 #endif /* _WIN32 */ 120 121 static const XML_Char * 122 resolveSystemId(const XML_Char *base, const XML_Char *systemId, 123 XML_Char **toFree) { 124 XML_Char *s; 125 *toFree = 0; 126 if (! base || *systemId == T('/') 127 #if defined(_WIN32) 128 || *systemId == T('\\') 129 || (isAsciiLetter(systemId[0]) && systemId[1] == T(':')) 130 #endif 131 ) 132 return systemId; 133 *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2) 134 * sizeof(XML_Char)); 135 if (! *toFree) 136 return systemId; 137 tcscpy(*toFree, base); 138 s = *toFree; 139 if (tcsrchr(s, T('/'))) 140 s = tcsrchr(s, T('/')) + 1; 141 #if defined(_WIN32) 142 if (tcsrchr(s, T('\\'))) 143 s = tcsrchr(s, T('\\')) + 1; 144 #endif 145 tcscpy(s, systemId); 146 return *toFree; 147 } 148 149 static int 150 externalEntityRefFilemap(XML_Parser parser, const XML_Char *context, 151 const XML_Char *base, const XML_Char *systemId, 152 const XML_Char *publicId) { 153 int result; 154 XML_Char *s; 155 const XML_Char *filename; 156 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); 157 int filemapRes; 158 PROCESS_ARGS args; 159 UNUSED_P(publicId); 160 args.retPtr = &result; 161 args.parser = entParser; 162 filename = resolveSystemId(base, systemId, &s); 163 XML_SetBase(entParser, filename); 164 filemapRes = filemap(filename, processFile, &args); 165 switch (filemapRes) { 166 case 0: 167 result = 0; 168 break; 169 case 2: 170 ftprintf(stderr, 171 T("%s: file too large for memory-mapping") 172 T(", switching to streaming\n"), 173 filename); 174 result = processStream(filename, entParser); 175 break; 176 } 177 free(s); 178 XML_ParserFree(entParser); 179 return result; 180 } 181 182 static int 183 processStream(const XML_Char *filename, XML_Parser parser) { 184 /* passing NULL for filename means read input from stdin */ 185 int fd = 0; /* 0 is the fileno for stdin */ 186 187 if (filename != NULL) { 188 fd = topen(filename, O_BINARY | O_RDONLY); 189 if (fd < 0) { 190 tperror(filename); 191 return 0; 192 } 193 } 194 for (;;) { 195 int nread; 196 char *buf = (char *)XML_GetBuffer(parser, g_read_size_bytes); 197 if (! buf) { 198 if (filename != NULL) 199 close(fd); 200 ftprintf(stderr, T("%s: out of memory\n"), 201 filename != NULL ? filename : T("xmlwf")); 202 return 0; 203 } 204 nread = read(fd, buf, g_read_size_bytes); 205 if (nread < 0) { 206 tperror(filename != NULL ? filename : T("STDIN")); 207 if (filename != NULL) 208 close(fd); 209 return 0; 210 } 211 if (XML_ParseBuffer(parser, nread, nread == 0) == XML_STATUS_ERROR) { 212 reportError(parser, filename != NULL ? filename : T("STDIN")); 213 if (filename != NULL) 214 close(fd); 215 return 0; 216 } 217 if (nread == 0) { 218 if (filename != NULL) 219 close(fd); 220 break; 221 ; 222 } 223 } 224 return 1; 225 } 226 227 static int 228 externalEntityRefStream(XML_Parser parser, const XML_Char *context, 229 const XML_Char *base, const XML_Char *systemId, 230 const XML_Char *publicId) { 231 XML_Char *s; 232 const XML_Char *filename; 233 int ret; 234 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); 235 UNUSED_P(publicId); 236 filename = resolveSystemId(base, systemId, &s); 237 XML_SetBase(entParser, filename); 238 ret = processStream(filename, entParser); 239 free(s); 240 XML_ParserFree(entParser); 241 return ret; 242 } 243 244 int 245 XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) { 246 int result; 247 248 if (! XML_SetBase(parser, filename)) { 249 ftprintf(stderr, T("%s: out of memory"), filename); 250 exit(1); 251 } 252 253 if (flags & XML_EXTERNAL_ENTITIES) 254 XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE) 255 ? externalEntityRefFilemap 256 : externalEntityRefStream); 257 if (flags & XML_MAP_FILE) { 258 int filemapRes; 259 PROCESS_ARGS args; 260 args.retPtr = &result; 261 args.parser = parser; 262 filemapRes = filemap(filename, processFile, &args); 263 switch (filemapRes) { 264 case 0: 265 result = 0; 266 break; 267 case 2: 268 ftprintf(stderr, 269 T("%s: file too large for memory-mapping") 270 T(", switching to streaming\n"), 271 filename); 272 result = processStream(filename, parser); 273 break; 274 } 275 } else 276 result = processStream(filename, parser); 277 return result; 278 } 279