1 /* 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net> 13 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 14 Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org> 15 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> 16 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 17 Copyright (c) 2021 Donghee Na <donghee.na@python.org> 18 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org> 19 Licensed under the MIT license: 20 21 Permission is hereby granted, free of charge, to any person obtaining 22 a copy of this software and associated documentation files (the 23 "Software"), to deal in the Software without restriction, including 24 without limitation the rights to use, copy, modify, merge, publish, 25 distribute, sublicense, and/or sell copies of the Software, and to permit 26 persons to whom the Software is furnished to do so, subject to the 27 following conditions: 28 29 The above copyright notice and this permission notice shall be included 30 in all copies or substantial portions of the Software. 31 32 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 33 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 34 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 35 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 36 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 37 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 38 USE OR OTHER DEALINGS IN THE SOFTWARE. 39 */ 40 41 #include "expat_config.h" 42 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <stddef.h> 46 #include <string.h> 47 #include <fcntl.h> 48 49 #ifdef _WIN32 50 # include "winconfig.h" 51 #endif 52 53 #include "expat.h" 54 #include "internal.h" /* for UNUSED_P only */ 55 #include "xmlfile.h" 56 #include "xmltchar.h" 57 #include "filemap.h" 58 59 /* Function "read": */ 60 #if defined(_MSC_VER) 61 # include <io.h> 62 /* https://msdn.microsoft.com/en-us/library/wyssk1bs(v=vs.100).aspx */ 63 # define EXPAT_read _read 64 # define EXPAT_read_count_t int 65 # define EXPAT_read_req_t unsigned int 66 #else /* POSIX */ 67 # include <unistd.h> 68 /* https://pubs.opengroup.org/onlinepubs/009695399/functions/read.html */ 69 # define EXPAT_read read 70 # define EXPAT_read_count_t ssize_t 71 # define EXPAT_read_req_t size_t 72 #endif 73 74 #ifndef O_BINARY 75 # ifdef _O_BINARY 76 # define O_BINARY _O_BINARY 77 # else 78 # define O_BINARY 0 79 # endif 80 #endif 81 82 int g_read_size_bytes = 1024 * 8; 83 84 typedef struct { 85 XML_Parser parser; 86 int *retPtr; 87 } PROCESS_ARGS; 88 89 static int processStream(const XML_Char *filename, XML_Parser parser); 90 91 static void 92 reportError(XML_Parser parser, const XML_Char *filename) { 93 enum XML_Error code = XML_GetErrorCode(parser); 94 const XML_Char *message = XML_ErrorString(code); 95 if (message) 96 ftprintf(stdout, 97 T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%") 98 T(XML_FMT_INT_MOD) T("u") T(": %s\n"), 99 filename, XML_GetCurrentLineNumber(parser), 100 XML_GetCurrentColumnNumber(parser), message); 101 else 102 ftprintf(stderr, T("%s: (unknown message %u)\n"), filename, 103 (unsigned int)code); 104 } 105 106 /* This implementation will give problems on files larger than INT_MAX. */ 107 static void 108 processFile(const void *data, size_t size, const XML_Char *filename, 109 void *args) { 110 XML_Parser parser = ((PROCESS_ARGS *)args)->parser; 111 int *retPtr = ((PROCESS_ARGS *)args)->retPtr; 112 if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) { 113 reportError(parser, filename); 114 *retPtr = 0; 115 } else 116 *retPtr = 1; 117 } 118 119 #if defined(_WIN32) 120 121 static int 122 isAsciiLetter(XML_Char c) { 123 return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z')); 124 } 125 126 #endif /* _WIN32 */ 127 128 static const XML_Char * 129 resolveSystemId(const XML_Char *base, const XML_Char *systemId, 130 XML_Char **toFree) { 131 XML_Char *s; 132 *toFree = 0; 133 if (! base || *systemId == T('/') 134 #if defined(_WIN32) 135 || *systemId == T('\\') 136 || (isAsciiLetter(systemId[0]) && systemId[1] == T(':')) 137 #endif 138 ) 139 return systemId; 140 *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2) 141 * sizeof(XML_Char)); 142 if (! *toFree) 143 return systemId; 144 tcscpy(*toFree, base); 145 s = *toFree; 146 if (tcsrchr(s, T('/'))) 147 s = tcsrchr(s, T('/')) + 1; 148 #if defined(_WIN32) 149 if (tcsrchr(s, T('\\'))) 150 s = tcsrchr(s, T('\\')) + 1; 151 #endif 152 tcscpy(s, systemId); 153 return *toFree; 154 } 155 156 static int 157 externalEntityRefFilemap(XML_Parser parser, const XML_Char *context, 158 const XML_Char *base, const XML_Char *systemId, 159 const XML_Char *publicId) { 160 int result; 161 XML_Char *s; 162 const XML_Char *filename; 163 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); 164 int filemapRes; 165 PROCESS_ARGS args; 166 UNUSED_P(publicId); 167 args.retPtr = &result; 168 args.parser = entParser; 169 filename = resolveSystemId(base, systemId, &s); 170 XML_SetBase(entParser, filename); 171 filemapRes = filemap(filename, processFile, &args); 172 switch (filemapRes) { 173 case 0: 174 result = 0; 175 break; 176 case 2: 177 ftprintf(stderr, 178 T("%s: file too large for memory-mapping") 179 T(", switching to streaming\n"), 180 filename); 181 result = processStream(filename, entParser); 182 break; 183 } 184 free(s); 185 XML_ParserFree(entParser); 186 return result; 187 } 188 189 static int 190 processStream(const XML_Char *filename, XML_Parser parser) { 191 /* passing NULL for filename means read input from stdin */ 192 int fd = 0; /* 0 is the fileno for stdin */ 193 194 if (filename != NULL) { 195 fd = topen(filename, O_BINARY | O_RDONLY); 196 if (fd < 0) { 197 tperror(filename); 198 return 0; 199 } 200 } 201 for (;;) { 202 EXPAT_read_count_t nread; 203 char *buf = (char *)XML_GetBuffer(parser, g_read_size_bytes); 204 if (! buf) { 205 if (filename != NULL) 206 close(fd); 207 ftprintf(stderr, T("%s: out of memory\n"), 208 filename != NULL ? filename : T("xmlwf")); 209 return 0; 210 } 211 nread = EXPAT_read(fd, buf, (EXPAT_read_req_t)g_read_size_bytes); 212 if (nread < 0) { 213 tperror(filename != NULL ? filename : T("STDIN")); 214 if (filename != NULL) 215 close(fd); 216 return 0; 217 } 218 if (XML_ParseBuffer(parser, (int)nread, nread == 0) == XML_STATUS_ERROR) { 219 reportError(parser, filename != NULL ? filename : T("STDIN")); 220 if (filename != NULL) 221 close(fd); 222 return 0; 223 } 224 if (nread == 0) { 225 if (filename != NULL) 226 close(fd); 227 break; 228 ; 229 } 230 } 231 return 1; 232 } 233 234 static int 235 externalEntityRefStream(XML_Parser parser, const XML_Char *context, 236 const XML_Char *base, const XML_Char *systemId, 237 const XML_Char *publicId) { 238 XML_Char *s; 239 const XML_Char *filename; 240 int ret; 241 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); 242 UNUSED_P(publicId); 243 filename = resolveSystemId(base, systemId, &s); 244 XML_SetBase(entParser, filename); 245 ret = processStream(filename, entParser); 246 free(s); 247 XML_ParserFree(entParser); 248 return ret; 249 } 250 251 int 252 XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) { 253 int result; 254 255 if (! XML_SetBase(parser, filename)) { 256 ftprintf(stderr, T("%s: out of memory"), filename); 257 exit(1); 258 } 259 260 if (flags & XML_EXTERNAL_ENTITIES) 261 XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE) 262 ? externalEntityRefFilemap 263 : externalEntityRefStream); 264 if (flags & XML_MAP_FILE) { 265 int filemapRes; 266 PROCESS_ARGS args; 267 args.retPtr = &result; 268 args.parser = parser; 269 filemapRes = filemap(filename, processFile, &args); 270 switch (filemapRes) { 271 case 0: 272 result = 0; 273 break; 274 case 2: 275 ftprintf(stderr, 276 T("%s: file too large for memory-mapping") 277 T(", switching to streaming\n"), 278 filename); 279 result = processStream(filename, parser); 280 break; 281 } 282 } else 283 result = processStream(filename, parser); 284 return result; 285 } 286