xref: /freebsd/contrib/expat/xmlwf/xmlfile.c (revision f5f40dd63bc7acbb5312b26ac1ea1103c12352a6)
1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12    Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
14    Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org>
15    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
16    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
17    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
18    Licensed under the MIT license:
19 
20    Permission is  hereby granted,  free of charge,  to any  person obtaining
21    a  copy  of  this  software   and  associated  documentation  files  (the
22    "Software"),  to  deal in  the  Software  without restriction,  including
23    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
24    distribute, sublicense, and/or sell copies of the Software, and to permit
25    persons  to whom  the Software  is  furnished to  do so,  subject to  the
26    following conditions:
27 
28    The above copyright  notice and this permission notice  shall be included
29    in all copies or substantial portions of the Software.
30 
31    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
32    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
33    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
34    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
35    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
36    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
37    USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39 
40 #include "expat_config.h"
41 
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <stddef.h>
45 #include <string.h>
46 #include <fcntl.h>
47 
48 #ifdef _WIN32
49 #  include "winconfig.h"
50 #endif
51 
52 #include "expat.h"
53 #include "internal.h" /* for UNUSED_P only */
54 #include "xmlfile.h"
55 #include "xmltchar.h"
56 #include "filemap.h"
57 
58 #if defined(_MSC_VER)
59 #  include <io.h>
60 #endif
61 
62 #ifdef HAVE_UNISTD_H
63 #  include <unistd.h>
64 #endif
65 
66 #ifndef O_BINARY
67 #  ifdef _O_BINARY
68 #    define O_BINARY _O_BINARY
69 #  else
70 #    define O_BINARY 0
71 #  endif
72 #endif
73 
74 int g_read_size_bytes = 1024 * 8;
75 
76 typedef struct {
77   XML_Parser parser;
78   int *retPtr;
79 } PROCESS_ARGS;
80 
81 static int processStream(const XML_Char *filename, XML_Parser parser);
82 
83 static void
84 reportError(XML_Parser parser, const XML_Char *filename) {
85   enum XML_Error code = XML_GetErrorCode(parser);
86   const XML_Char *message = XML_ErrorString(code);
87   if (message)
88     ftprintf(stdout,
89              T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%")
90                  T(XML_FMT_INT_MOD) T("u") T(": %s\n"),
91              filename, XML_GetErrorLineNumber(parser),
92              XML_GetErrorColumnNumber(parser), message);
93   else
94     ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code);
95 }
96 
97 /* This implementation will give problems on files larger than INT_MAX. */
98 static void
99 processFile(const void *data, size_t size, const XML_Char *filename,
100             void *args) {
101   XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
102   int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
103   if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
104     reportError(parser, filename);
105     *retPtr = 0;
106   } else
107     *retPtr = 1;
108 }
109 
110 #if defined(_WIN32)
111 
112 static int
113 isAsciiLetter(XML_Char c) {
114   return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
115 }
116 
117 #endif /* _WIN32 */
118 
119 static const XML_Char *
120 resolveSystemId(const XML_Char *base, const XML_Char *systemId,
121                 XML_Char **toFree) {
122   XML_Char *s;
123   *toFree = 0;
124   if (! base || *systemId == T('/')
125 #if defined(_WIN32)
126       || *systemId == T('\\')
127       || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
128 #endif
129   )
130     return systemId;
131   *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)
132                                * sizeof(XML_Char));
133   if (! *toFree)
134     return systemId;
135   tcscpy(*toFree, base);
136   s = *toFree;
137   if (tcsrchr(s, T('/')))
138     s = tcsrchr(s, T('/')) + 1;
139 #if defined(_WIN32)
140   if (tcsrchr(s, T('\\')))
141     s = tcsrchr(s, T('\\')) + 1;
142 #endif
143   tcscpy(s, systemId);
144   return *toFree;
145 }
146 
147 static int
148 externalEntityRefFilemap(XML_Parser parser, const XML_Char *context,
149                          const XML_Char *base, const XML_Char *systemId,
150                          const XML_Char *publicId) {
151   int result;
152   XML_Char *s;
153   const XML_Char *filename;
154   XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
155   int filemapRes;
156   PROCESS_ARGS args;
157   UNUSED_P(publicId);
158   args.retPtr = &result;
159   args.parser = entParser;
160   filename = resolveSystemId(base, systemId, &s);
161   XML_SetBase(entParser, filename);
162   filemapRes = filemap(filename, processFile, &args);
163   switch (filemapRes) {
164   case 0:
165     result = 0;
166     break;
167   case 2:
168     ftprintf(stderr,
169              T("%s: file too large for memory-mapping")
170                  T(", switching to streaming\n"),
171              filename);
172     result = processStream(filename, entParser);
173     break;
174   }
175   free(s);
176   XML_ParserFree(entParser);
177   return result;
178 }
179 
180 static int
181 processStream(const XML_Char *filename, XML_Parser parser) {
182   /* passing NULL for filename means read input from stdin */
183   int fd = 0; /* 0 is the fileno for stdin */
184 
185   if (filename != NULL) {
186     fd = topen(filename, O_BINARY | O_RDONLY);
187     if (fd < 0) {
188       tperror(filename);
189       return 0;
190     }
191   }
192   for (;;) {
193     int nread;
194     char *buf = (char *)XML_GetBuffer(parser, g_read_size_bytes);
195     if (! buf) {
196       if (filename != NULL)
197         close(fd);
198       ftprintf(stderr, T("%s: out of memory\n"),
199                filename != NULL ? filename : T("xmlwf"));
200       return 0;
201     }
202     nread = read(fd, buf, g_read_size_bytes);
203     if (nread < 0) {
204       tperror(filename != NULL ? filename : T("STDIN"));
205       if (filename != NULL)
206         close(fd);
207       return 0;
208     }
209     if (XML_ParseBuffer(parser, nread, nread == 0) == XML_STATUS_ERROR) {
210       reportError(parser, filename != NULL ? filename : T("STDIN"));
211       if (filename != NULL)
212         close(fd);
213       return 0;
214     }
215     if (nread == 0) {
216       if (filename != NULL)
217         close(fd);
218       break;
219       ;
220     }
221   }
222   return 1;
223 }
224 
225 static int
226 externalEntityRefStream(XML_Parser parser, const XML_Char *context,
227                         const XML_Char *base, const XML_Char *systemId,
228                         const XML_Char *publicId) {
229   XML_Char *s;
230   const XML_Char *filename;
231   int ret;
232   XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
233   UNUSED_P(publicId);
234   filename = resolveSystemId(base, systemId, &s);
235   XML_SetBase(entParser, filename);
236   ret = processStream(filename, entParser);
237   free(s);
238   XML_ParserFree(entParser);
239   return ret;
240 }
241 
242 int
243 XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) {
244   int result;
245 
246   if (! XML_SetBase(parser, filename)) {
247     ftprintf(stderr, T("%s: out of memory"), filename);
248     exit(1);
249   }
250 
251   if (flags & XML_EXTERNAL_ENTITIES)
252     XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE)
253                                                 ? externalEntityRefFilemap
254                                                 : externalEntityRefStream);
255   if (flags & XML_MAP_FILE) {
256     int filemapRes;
257     PROCESS_ARGS args;
258     args.retPtr = &result;
259     args.parser = parser;
260     filemapRes = filemap(filename, processFile, &args);
261     switch (filemapRes) {
262     case 0:
263       result = 0;
264       break;
265     case 2:
266       ftprintf(stderr,
267                T("%s: file too large for memory-mapping")
268                    T(", switching to streaming\n"),
269                filename);
270       result = processStream(filename, parser);
271       break;
272     }
273   } else
274     result = processStream(filename, parser);
275   return result;
276 }
277