1 /*
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
14 Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org>
15 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
16 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
17 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
18 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org>
19 Copyright (c) 2025 Alfonso Gregory <gfunni234@gmail.com>
20 Copyright (c) 2026 Matthew Fernandez <matthew.fernandez@gmail.com>
21 Licensed under the MIT license:
22
23 Permission is hereby granted, free of charge, to any person obtaining
24 a copy of this software and associated documentation files (the
25 "Software"), to deal in the Software without restriction, including
26 without limitation the rights to use, copy, modify, merge, publish,
27 distribute, sublicense, and/or sell copies of the Software, and to permit
28 persons to whom the Software is furnished to do so, subject to the
29 following conditions:
30
31 The above copyright notice and this permission notice shall be included
32 in all copies or substantial portions of the Software.
33
34 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
37 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
38 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
39 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
40 USE OR OTHER DEALINGS IN THE SOFTWARE.
41 */
42
43 #include "expat_config.h"
44
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <stddef.h>
48 #include <string.h>
49 #include <fcntl.h>
50
51 #ifdef _WIN32
52 # include "winconfig.h"
53 #endif
54
55 #include "expat.h"
56 #include "internal.h" /* for UNUSED_P only */
57 #include "xmlfile.h"
58 #include "xmltchar.h"
59 #include "filemap.h"
60
61 /* Function "read": */
62 #if defined(_MSC_VER)
63 # include <io.h>
64 /* https://msdn.microsoft.com/en-us/library/wyssk1bs(v=vs.100).aspx */
65 # define EXPAT_read _read
66 # define EXPAT_read_count_t int
67 # define EXPAT_read_req_t unsigned int
68 #else /* POSIX */
69 # include <unistd.h>
70 /* https://pubs.opengroup.org/onlinepubs/009695399/functions/read.html */
71 # define EXPAT_read read
72 # define EXPAT_read_count_t ssize_t
73 # define EXPAT_read_req_t size_t
74 #endif
75
76 #ifndef O_BINARY
77 # ifdef _O_BINARY
78 # define O_BINARY _O_BINARY
79 # else
80 # define O_BINARY 0
81 # endif
82 #endif
83
84 int g_read_size_bytes = 1024 * 8;
85
86 typedef struct {
87 XML_Parser parser;
88 int *retPtr;
89 } PROCESS_ARGS;
90
91 static int processStream(const XML_Char *filename, XML_Parser parser);
92
93 static void
reportError(XML_Parser parser,const XML_Char * filename)94 reportError(XML_Parser parser, const XML_Char *filename) {
95 enum XML_Error code = XML_GetErrorCode(parser);
96 const XML_Char *message = XML_ErrorString(code);
97 if (message)
98 ftprintf(stdout,
99 T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%")
100 T(XML_FMT_INT_MOD) T("u") T(": %s\n"),
101 filename, XML_GetCurrentLineNumber(parser),
102 XML_GetCurrentColumnNumber(parser), message);
103 else
104 ftprintf(stderr, T("%s: (unknown message %u)\n"), filename,
105 (unsigned int)code);
106 }
107
108 /* This implementation will give problems on files larger than INT_MAX. */
109 static void
processFile(const void * data,size_t size,const XML_Char * filename,void * args)110 processFile(const void *data, size_t size, const XML_Char *filename,
111 void *args) {
112 XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
113 int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
114 if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
115 reportError(parser, filename);
116 *retPtr = 0;
117 } else
118 *retPtr = 1;
119 }
120
121 #if defined(_WIN32)
122
123 static int
isAsciiLetter(XML_Char c)124 isAsciiLetter(XML_Char c) {
125 return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
126 }
127
128 #endif /* _WIN32 */
129
130 static const XML_Char *
resolveSystemId(const XML_Char * base,const XML_Char * systemId,XML_Char ** toFree)131 resolveSystemId(const XML_Char *base, const XML_Char *systemId,
132 XML_Char **toFree) {
133 XML_Char *s;
134 *toFree = 0;
135 if (! base || *systemId == T('/')
136 #if defined(_WIN32)
137 || *systemId == T('\\')
138 || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
139 #endif
140 )
141 return systemId;
142 *toFree = malloc((tcslen(base) + tcslen(systemId) + 2) * sizeof(XML_Char));
143 if (! *toFree)
144 return systemId;
145 tcscpy(*toFree, base);
146 s = *toFree;
147 if (tcsrchr(s, T('/')))
148 s = tcsrchr(s, T('/')) + 1;
149 #if defined(_WIN32)
150 if (tcsrchr(s, T('\\')))
151 s = tcsrchr(s, T('\\')) + 1;
152 #endif
153 tcscpy(s, systemId);
154 return *toFree;
155 }
156
157 static int
externalEntityRefFilemap(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)158 externalEntityRefFilemap(XML_Parser parser, const XML_Char *context,
159 const XML_Char *base, const XML_Char *systemId,
160 const XML_Char *publicId) {
161 int result;
162 XML_Char *s;
163 const XML_Char *filename;
164 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
165 int filemapRes;
166 PROCESS_ARGS args;
167 UNUSED_P(publicId);
168 args.retPtr = &result;
169 args.parser = entParser;
170 filename = resolveSystemId(base, systemId, &s);
171 XML_SetBase(entParser, filename);
172 filemapRes = filemap(filename, processFile, &args);
173 switch (filemapRes) {
174 case 0:
175 result = 0;
176 break;
177 case 2:
178 ftprintf(stderr,
179 T("%s: file too large for memory-mapping")
180 T(", switching to streaming\n"),
181 filename);
182 result = processStream(filename, entParser);
183 break;
184 }
185 free(s);
186 XML_ParserFree(entParser);
187 return result;
188 }
189
190 static int
processStream(const XML_Char * filename,XML_Parser parser)191 processStream(const XML_Char *filename, XML_Parser parser) {
192 /* passing NULL for filename means read input from stdin */
193 int fd = 0; /* 0 is the fileno for stdin */
194
195 if (filename != NULL) {
196 fd = topen(filename, O_BINARY | O_RDONLY);
197 if (fd < 0) {
198 tperror(filename);
199 return 0;
200 }
201 }
202 for (;;) {
203 EXPAT_read_count_t nread;
204 char *buf = (char *)XML_GetBuffer(parser, g_read_size_bytes);
205 if (! buf) {
206 if (filename != NULL)
207 close(fd);
208 ftprintf(stderr, T("%s: out of memory\n"),
209 filename != NULL ? filename : T("xmlwf"));
210 return 0;
211 }
212 nread = EXPAT_read(fd, buf, (EXPAT_read_req_t)g_read_size_bytes);
213 if (nread < 0) {
214 tperror(filename != NULL ? filename : T("STDIN"));
215 if (filename != NULL)
216 close(fd);
217 return 0;
218 }
219 if (XML_ParseBuffer(parser, (int)nread, nread == 0) == XML_STATUS_ERROR) {
220 reportError(parser, filename != NULL ? filename : T("STDIN"));
221 if (filename != NULL)
222 close(fd);
223 return 0;
224 }
225 if (nread == 0) {
226 if (filename != NULL)
227 close(fd);
228 break;
229 }
230 }
231 return 1;
232 }
233
234 static int
externalEntityRefStream(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)235 externalEntityRefStream(XML_Parser parser, const XML_Char *context,
236 const XML_Char *base, const XML_Char *systemId,
237 const XML_Char *publicId) {
238 XML_Char *s;
239 const XML_Char *filename;
240 int ret;
241 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
242 UNUSED_P(publicId);
243 filename = resolveSystemId(base, systemId, &s);
244 XML_SetBase(entParser, filename);
245 ret = processStream(filename, entParser);
246 free(s);
247 XML_ParserFree(entParser);
248 return ret;
249 }
250
251 int
XML_ProcessFile(XML_Parser parser,const XML_Char * filename,unsigned flags)252 XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) {
253 int result;
254
255 if (! XML_SetBase(parser, filename)) {
256 ftprintf(stderr, T("%s: out of memory"), filename);
257 exit(1);
258 }
259
260 if (flags & XML_EXTERNAL_ENTITIES)
261 XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE)
262 ? externalEntityRefFilemap
263 : externalEntityRefStream);
264 if (flags & XML_MAP_FILE) {
265 int filemapRes;
266 PROCESS_ARGS args;
267 args.retPtr = &result;
268 args.parser = parser;
269 filemapRes = filemap(filename, processFile, &args);
270 switch (filemapRes) {
271 case 0:
272 result = 0;
273 break;
274 case 2:
275 ftprintf(stderr,
276 T("%s: file too large for memory-mapping")
277 T(", switching to streaming\n"),
278 filename);
279 result = processStream(filename, parser);
280 break;
281 }
282 } else
283 result = processStream(filename, parser);
284 return result;
285 }
286