xref: /freebsd/contrib/expat/xmlwf/xmlfile.c (revision 8d485a8490fe1cd60e7b6a00d3c8a8cc116a56fb)
1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12    Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
14    Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org>
15    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
16    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
17    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
18    Copyright (c) 2024      Hanno Böck <hanno@gentoo.org>
19    Licensed under the MIT license:
20 
21    Permission is  hereby granted,  free of charge,  to any  person obtaining
22    a  copy  of  this  software   and  associated  documentation  files  (the
23    "Software"),  to  deal in  the  Software  without restriction,  including
24    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
25    distribute, sublicense, and/or sell copies of the Software, and to permit
26    persons  to whom  the Software  is  furnished to  do so,  subject to  the
27    following conditions:
28 
29    The above copyright  notice and this permission notice  shall be included
30    in all copies or substantial portions of the Software.
31 
32    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
33    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
34    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
35    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
36    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
37    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
38    USE OR OTHER DEALINGS IN THE SOFTWARE.
39 */
40 
41 #include "expat_config.h"
42 
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <stddef.h>
46 #include <string.h>
47 #include <fcntl.h>
48 
49 #ifdef _WIN32
50 #  include "winconfig.h"
51 #endif
52 
53 #include "expat.h"
54 #include "internal.h" /* for UNUSED_P only */
55 #include "xmlfile.h"
56 #include "xmltchar.h"
57 #include "filemap.h"
58 
59 /* Function "read": */
60 #if defined(_MSC_VER)
61 #  include <io.h>
62 /* https://msdn.microsoft.com/en-us/library/wyssk1bs(v=vs.100).aspx */
63 #  define EXPAT_read _read
64 #  define EXPAT_read_count_t int
65 #  define EXPAT_read_req_t unsigned int
66 #else /* POSIX */
67 #  include <unistd.h>
68 /* https://pubs.opengroup.org/onlinepubs/009695399/functions/read.html */
69 #  define EXPAT_read read
70 #  define EXPAT_read_count_t ssize_t
71 #  define EXPAT_read_req_t size_t
72 #endif
73 
74 #ifndef O_BINARY
75 #  ifdef _O_BINARY
76 #    define O_BINARY _O_BINARY
77 #  else
78 #    define O_BINARY 0
79 #  endif
80 #endif
81 
82 int g_read_size_bytes = 1024 * 8;
83 
84 typedef struct {
85   XML_Parser parser;
86   int *retPtr;
87 } PROCESS_ARGS;
88 
89 static int processStream(const XML_Char *filename, XML_Parser parser);
90 
91 static void
92 reportError(XML_Parser parser, const XML_Char *filename) {
93   enum XML_Error code = XML_GetErrorCode(parser);
94   const XML_Char *message = XML_ErrorString(code);
95   if (message)
96     ftprintf(stdout,
97              T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%")
98                  T(XML_FMT_INT_MOD) T("u") T(": %s\n"),
99              filename, XML_GetCurrentLineNumber(parser),
100              XML_GetCurrentColumnNumber(parser), message);
101   else
102     ftprintf(stderr, T("%s: (unknown message %u)\n"), filename,
103              (unsigned int)code);
104 }
105 
106 /* This implementation will give problems on files larger than INT_MAX. */
107 static void
108 processFile(const void *data, size_t size, const XML_Char *filename,
109             void *args) {
110   XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
111   int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
112   if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
113     reportError(parser, filename);
114     *retPtr = 0;
115   } else
116     *retPtr = 1;
117 }
118 
119 #if defined(_WIN32)
120 
121 static int
122 isAsciiLetter(XML_Char c) {
123   return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
124 }
125 
126 #endif /* _WIN32 */
127 
128 static const XML_Char *
129 resolveSystemId(const XML_Char *base, const XML_Char *systemId,
130                 XML_Char **toFree) {
131   XML_Char *s;
132   *toFree = 0;
133   if (! base || *systemId == T('/')
134 #if defined(_WIN32)
135       || *systemId == T('\\')
136       || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
137 #endif
138   )
139     return systemId;
140   *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)
141                                * sizeof(XML_Char));
142   if (! *toFree)
143     return systemId;
144   tcscpy(*toFree, base);
145   s = *toFree;
146   if (tcsrchr(s, T('/')))
147     s = tcsrchr(s, T('/')) + 1;
148 #if defined(_WIN32)
149   if (tcsrchr(s, T('\\')))
150     s = tcsrchr(s, T('\\')) + 1;
151 #endif
152   tcscpy(s, systemId);
153   return *toFree;
154 }
155 
156 static int
157 externalEntityRefFilemap(XML_Parser parser, const XML_Char *context,
158                          const XML_Char *base, const XML_Char *systemId,
159                          const XML_Char *publicId) {
160   int result;
161   XML_Char *s;
162   const XML_Char *filename;
163   XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
164   int filemapRes;
165   PROCESS_ARGS args;
166   UNUSED_P(publicId);
167   args.retPtr = &result;
168   args.parser = entParser;
169   filename = resolveSystemId(base, systemId, &s);
170   XML_SetBase(entParser, filename);
171   filemapRes = filemap(filename, processFile, &args);
172   switch (filemapRes) {
173   case 0:
174     result = 0;
175     break;
176   case 2:
177     ftprintf(stderr,
178              T("%s: file too large for memory-mapping")
179                  T(", switching to streaming\n"),
180              filename);
181     result = processStream(filename, entParser);
182     break;
183   }
184   free(s);
185   XML_ParserFree(entParser);
186   return result;
187 }
188 
189 static int
190 processStream(const XML_Char *filename, XML_Parser parser) {
191   /* passing NULL for filename means read input from stdin */
192   int fd = 0; /* 0 is the fileno for stdin */
193 
194   if (filename != NULL) {
195     fd = topen(filename, O_BINARY | O_RDONLY);
196     if (fd < 0) {
197       tperror(filename);
198       return 0;
199     }
200   }
201   for (;;) {
202     EXPAT_read_count_t nread;
203     char *buf = (char *)XML_GetBuffer(parser, g_read_size_bytes);
204     if (! buf) {
205       if (filename != NULL)
206         close(fd);
207       ftprintf(stderr, T("%s: out of memory\n"),
208                filename != NULL ? filename : T("xmlwf"));
209       return 0;
210     }
211     nread = EXPAT_read(fd, buf, (EXPAT_read_req_t)g_read_size_bytes);
212     if (nread < 0) {
213       tperror(filename != NULL ? filename : T("STDIN"));
214       if (filename != NULL)
215         close(fd);
216       return 0;
217     }
218     if (XML_ParseBuffer(parser, (int)nread, nread == 0) == XML_STATUS_ERROR) {
219       reportError(parser, filename != NULL ? filename : T("STDIN"));
220       if (filename != NULL)
221         close(fd);
222       return 0;
223     }
224     if (nread == 0) {
225       if (filename != NULL)
226         close(fd);
227       break;
228       ;
229     }
230   }
231   return 1;
232 }
233 
234 static int
235 externalEntityRefStream(XML_Parser parser, const XML_Char *context,
236                         const XML_Char *base, const XML_Char *systemId,
237                         const XML_Char *publicId) {
238   XML_Char *s;
239   const XML_Char *filename;
240   int ret;
241   XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
242   UNUSED_P(publicId);
243   filename = resolveSystemId(base, systemId, &s);
244   XML_SetBase(entParser, filename);
245   ret = processStream(filename, entParser);
246   free(s);
247   XML_ParserFree(entParser);
248   return ret;
249 }
250 
251 int
252 XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) {
253   int result;
254 
255   if (! XML_SetBase(parser, filename)) {
256     ftprintf(stderr, T("%s: out of memory"), filename);
257     exit(1);
258   }
259 
260   if (flags & XML_EXTERNAL_ENTITIES)
261     XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE)
262                                                 ? externalEntityRefFilemap
263                                                 : externalEntityRefStream);
264   if (flags & XML_MAP_FILE) {
265     int filemapRes;
266     PROCESS_ARGS args;
267     args.retPtr = &result;
268     args.parser = parser;
269     filemapRes = filemap(filename, processFile, &args);
270     switch (filemapRes) {
271     case 0:
272       result = 0;
273       break;
274     case 2:
275       ftprintf(stderr,
276                T("%s: file too large for memory-mapping")
277                    T(", switching to streaming\n"),
278                filename);
279       result = processStream(filename, parser);
280       break;
281     }
282   } else
283     result = processStream(filename, parser);
284   return result;
285 }
286