xref: /freebsd/contrib/expat/xmlwf/xmlfile.c (revision b5a3a89c50671a1ad29e7c43fe15e7b16feac239)
1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12    Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
14    Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
15    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
16    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
17    Copyright (c) 2021      Dong-hee Na <donghee.na@python.org>
18    Licensed under the MIT license:
19 
20    Permission is  hereby granted,  free of charge,  to any  person obtaining
21    a  copy  of  this  software   and  associated  documentation  files  (the
22    "Software"),  to  deal in  the  Software  without restriction,  including
23    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
24    distribute, sublicense, and/or sell copies of the Software, and to permit
25    persons  to whom  the Software  is  furnished to  do so,  subject to  the
26    following conditions:
27 
28    The above copyright  notice and this permission notice  shall be included
29    in all copies or substantial portions of the Software.
30 
31    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
32    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
33    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
34    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
35    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
36    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
37    USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39 
40 #include <expat_config.h>
41 
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <stddef.h>
45 #include <string.h>
46 #include <fcntl.h>
47 
48 #ifdef _WIN32
49 #  include "winconfig.h"
50 #endif
51 
52 #include "expat.h"
53 #include "internal.h" /* for UNUSED_P only */
54 #include "xmlfile.h"
55 #include "xmltchar.h"
56 #include "filemap.h"
57 
58 #if defined(_MSC_VER)
59 #  include <io.h>
60 #endif
61 
62 #ifdef HAVE_UNISTD_H
63 #  include <unistd.h>
64 #endif
65 
66 #ifndef O_BINARY
67 #  ifdef _O_BINARY
68 #    define O_BINARY _O_BINARY
69 #  else
70 #    define O_BINARY 0
71 #  endif
72 #endif
73 
74 #ifdef _DEBUG
75 #  define READ_SIZE 16
76 #else
77 #  define READ_SIZE (1024 * 8)
78 #endif
79 
80 typedef struct {
81   XML_Parser parser;
82   int *retPtr;
83 } PROCESS_ARGS;
84 
85 static int processStream(const XML_Char *filename, XML_Parser parser);
86 
87 static void
88 reportError(XML_Parser parser, const XML_Char *filename) {
89   enum XML_Error code = XML_GetErrorCode(parser);
90   const XML_Char *message = XML_ErrorString(code);
91   if (message)
92     ftprintf(stdout,
93              T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%")
94                  T(XML_FMT_INT_MOD) T("u") T(": %s\n"),
95              filename, XML_GetErrorLineNumber(parser),
96              XML_GetErrorColumnNumber(parser), message);
97   else
98     ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code);
99 }
100 
101 /* This implementation will give problems on files larger than INT_MAX. */
102 static void
103 processFile(const void *data, size_t size, const XML_Char *filename,
104             void *args) {
105   XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
106   int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
107   if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
108     reportError(parser, filename);
109     *retPtr = 0;
110   } else
111     *retPtr = 1;
112 }
113 
114 #if defined(_WIN32)
115 
116 static int
117 isAsciiLetter(XML_Char c) {
118   return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
119 }
120 
121 #endif /* _WIN32 */
122 
123 static const XML_Char *
124 resolveSystemId(const XML_Char *base, const XML_Char *systemId,
125                 XML_Char **toFree) {
126   XML_Char *s;
127   *toFree = 0;
128   if (! base || *systemId == T('/')
129 #if defined(_WIN32)
130       || *systemId == T('\\')
131       || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
132 #endif
133   )
134     return systemId;
135   *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)
136                                * sizeof(XML_Char));
137   if (! *toFree)
138     return systemId;
139   tcscpy(*toFree, base);
140   s = *toFree;
141   if (tcsrchr(s, T('/')))
142     s = tcsrchr(s, T('/')) + 1;
143 #if defined(_WIN32)
144   if (tcsrchr(s, T('\\')))
145     s = tcsrchr(s, T('\\')) + 1;
146 #endif
147   tcscpy(s, systemId);
148   return *toFree;
149 }
150 
151 static int
152 externalEntityRefFilemap(XML_Parser parser, const XML_Char *context,
153                          const XML_Char *base, const XML_Char *systemId,
154                          const XML_Char *publicId) {
155   int result;
156   XML_Char *s;
157   const XML_Char *filename;
158   XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
159   int filemapRes;
160   PROCESS_ARGS args;
161   UNUSED_P(publicId);
162   args.retPtr = &result;
163   args.parser = entParser;
164   filename = resolveSystemId(base, systemId, &s);
165   XML_SetBase(entParser, filename);
166   filemapRes = filemap(filename, processFile, &args);
167   switch (filemapRes) {
168   case 0:
169     result = 0;
170     break;
171   case 2:
172     ftprintf(stderr,
173              T("%s: file too large for memory-mapping")
174                  T(", switching to streaming\n"),
175              filename);
176     result = processStream(filename, entParser);
177     break;
178   }
179   free(s);
180   XML_ParserFree(entParser);
181   return result;
182 }
183 
184 static int
185 processStream(const XML_Char *filename, XML_Parser parser) {
186   /* passing NULL for filename means read input from stdin */
187   int fd = 0; /* 0 is the fileno for stdin */
188 
189   if (filename != NULL) {
190     fd = topen(filename, O_BINARY | O_RDONLY);
191     if (fd < 0) {
192       tperror(filename);
193       return 0;
194     }
195   }
196   for (;;) {
197     int nread;
198     char *buf = (char *)XML_GetBuffer(parser, READ_SIZE);
199     if (! buf) {
200       if (filename != NULL)
201         close(fd);
202       ftprintf(stderr, T("%s: out of memory\n"),
203                filename != NULL ? filename : T("xmlwf"));
204       return 0;
205     }
206     nread = read(fd, buf, READ_SIZE);
207     if (nread < 0) {
208       tperror(filename != NULL ? filename : T("STDIN"));
209       if (filename != NULL)
210         close(fd);
211       return 0;
212     }
213     if (XML_ParseBuffer(parser, nread, nread == 0) == XML_STATUS_ERROR) {
214       reportError(parser, filename != NULL ? filename : T("STDIN"));
215       if (filename != NULL)
216         close(fd);
217       return 0;
218     }
219     if (nread == 0) {
220       if (filename != NULL)
221         close(fd);
222       break;
223       ;
224     }
225   }
226   return 1;
227 }
228 
229 static int
230 externalEntityRefStream(XML_Parser parser, const XML_Char *context,
231                         const XML_Char *base, const XML_Char *systemId,
232                         const XML_Char *publicId) {
233   XML_Char *s;
234   const XML_Char *filename;
235   int ret;
236   XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
237   UNUSED_P(publicId);
238   filename = resolveSystemId(base, systemId, &s);
239   XML_SetBase(entParser, filename);
240   ret = processStream(filename, entParser);
241   free(s);
242   XML_ParserFree(entParser);
243   return ret;
244 }
245 
246 int
247 XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) {
248   int result;
249 
250   if (! XML_SetBase(parser, filename)) {
251     ftprintf(stderr, T("%s: out of memory"), filename);
252     exit(1);
253   }
254 
255   if (flags & XML_EXTERNAL_ENTITIES)
256     XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE)
257                                                 ? externalEntityRefFilemap
258                                                 : externalEntityRefStream);
259   if (flags & XML_MAP_FILE) {
260     int filemapRes;
261     PROCESS_ARGS args;
262     args.retPtr = &result;
263     args.parser = parser;
264     filemapRes = filemap(filename, processFile, &args);
265     switch (filemapRes) {
266     case 0:
267       result = 0;
268       break;
269     case 2:
270       ftprintf(stderr,
271                T("%s: file too large for memory-mapping")
272                    T(", switching to streaming\n"),
273                filename);
274       result = processStream(filename, parser);
275       break;
276     }
277   } else
278     result = processStream(filename, parser);
279   return result;
280 }
281