xref: /freebsd/contrib/expat/xmlwf/xmlfile.c (revision 4543ef516683042d46f3bd3bb8a4f3f746e00499)
10a48773fSEric van Gyzen /*
20a48773fSEric van Gyzen                             __  __            _
30a48773fSEric van Gyzen                          ___\ \/ /_ __   __ _| |_
40a48773fSEric van Gyzen                         / _ \\  /| '_ \ / _` | __|
50a48773fSEric van Gyzen                        |  __//  \| |_) | (_| | |_
60a48773fSEric van Gyzen                         \___/_/\_\ .__/ \__,_|\__|
70a48773fSEric van Gyzen                                  |_| XML parser
80a48773fSEric van Gyzen 
90a48773fSEric van Gyzen    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10cc68614dSXin LI    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11cc68614dSXin LI    Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12cc68614dSXin LI    Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
13ac69e5d4SEric van Gyzen    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
14*4543ef51SXin LI    Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org>
15cc68614dSXin LI    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
16cc68614dSXin LI    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
17*4543ef51SXin LI    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
180a48773fSEric van Gyzen    Licensed under the MIT license:
190a48773fSEric van Gyzen 
200a48773fSEric van Gyzen    Permission is  hereby granted,  free of charge,  to any  person obtaining
210a48773fSEric van Gyzen    a  copy  of  this  software   and  associated  documentation  files  (the
220a48773fSEric van Gyzen    "Software"),  to  deal in  the  Software  without restriction,  including
230a48773fSEric van Gyzen    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
240a48773fSEric van Gyzen    distribute, sublicense, and/or sell copies of the Software, and to permit
250a48773fSEric van Gyzen    persons  to whom  the Software  is  furnished to  do so,  subject to  the
260a48773fSEric van Gyzen    following conditions:
270a48773fSEric van Gyzen 
280a48773fSEric van Gyzen    The above copyright  notice and this permission notice  shall be included
290a48773fSEric van Gyzen    in all copies or substantial portions of the Software.
300a48773fSEric van Gyzen 
310a48773fSEric van Gyzen    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
320a48773fSEric van Gyzen    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
330a48773fSEric van Gyzen    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
340a48773fSEric van Gyzen    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
350a48773fSEric van Gyzen    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
360a48773fSEric van Gyzen    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
370a48773fSEric van Gyzen    USE OR OTHER DEALINGS IN THE SOFTWARE.
385bb6a25fSPoul-Henning Kamp */
395bb6a25fSPoul-Henning Kamp 
40*4543ef51SXin LI #include "expat_config.h"
41cc68614dSXin LI 
425bb6a25fSPoul-Henning Kamp #include <stdio.h>
435bb6a25fSPoul-Henning Kamp #include <stdlib.h>
445bb6a25fSPoul-Henning Kamp #include <stddef.h>
455bb6a25fSPoul-Henning Kamp #include <string.h>
465bb6a25fSPoul-Henning Kamp #include <fcntl.h>
47220ed979SColeman Kane 
480a48773fSEric van Gyzen #ifdef _WIN32
495bb6a25fSPoul-Henning Kamp #  include "winconfig.h"
50cc68614dSXin LI #endif
51220ed979SColeman Kane 
525bb6a25fSPoul-Henning Kamp #include "expat.h"
53be8aff81SXin LI #include "internal.h" /* for UNUSED_P only */
545bb6a25fSPoul-Henning Kamp #include "xmlfile.h"
555bb6a25fSPoul-Henning Kamp #include "xmltchar.h"
565bb6a25fSPoul-Henning Kamp #include "filemap.h"
575bb6a25fSPoul-Henning Kamp 
580a48773fSEric van Gyzen #if defined(_MSC_VER)
595bb6a25fSPoul-Henning Kamp #  include <io.h>
605bb6a25fSPoul-Henning Kamp #endif
615bb6a25fSPoul-Henning Kamp 
625bb6a25fSPoul-Henning Kamp #ifdef HAVE_UNISTD_H
635bb6a25fSPoul-Henning Kamp #  include <unistd.h>
645bb6a25fSPoul-Henning Kamp #endif
655bb6a25fSPoul-Henning Kamp 
665bb6a25fSPoul-Henning Kamp #ifndef O_BINARY
675bb6a25fSPoul-Henning Kamp #  ifdef _O_BINARY
685bb6a25fSPoul-Henning Kamp #    define O_BINARY _O_BINARY
695bb6a25fSPoul-Henning Kamp #  else
705bb6a25fSPoul-Henning Kamp #    define O_BINARY 0
715bb6a25fSPoul-Henning Kamp #  endif
725bb6a25fSPoul-Henning Kamp #endif
735bb6a25fSPoul-Henning Kamp 
74*4543ef51SXin LI int g_read_size_bytes = 1024 * 8;
755bb6a25fSPoul-Henning Kamp 
765bb6a25fSPoul-Henning Kamp typedef struct {
775bb6a25fSPoul-Henning Kamp   XML_Parser parser;
785bb6a25fSPoul-Henning Kamp   int *retPtr;
795bb6a25fSPoul-Henning Kamp } PROCESS_ARGS;
805bb6a25fSPoul-Henning Kamp 
816b2c1e49SXin LI static int processStream(const XML_Char *filename, XML_Parser parser);
820a48773fSEric van Gyzen 
835bb6a25fSPoul-Henning Kamp static void
reportError(XML_Parser parser,const XML_Char * filename)846b2c1e49SXin LI reportError(XML_Parser parser, const XML_Char *filename) {
85220ed979SColeman Kane   enum XML_Error code = XML_GetErrorCode(parser);
865bb6a25fSPoul-Henning Kamp   const XML_Char *message = XML_ErrorString(code);
875bb6a25fSPoul-Henning Kamp   if (message)
880a48773fSEric van Gyzen     ftprintf(stdout,
896b2c1e49SXin LI              T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%")
906b2c1e49SXin LI                  T(XML_FMT_INT_MOD) T("u") T(": %s\n"),
916b2c1e49SXin LI              filename, XML_GetErrorLineNumber(parser),
926b2c1e49SXin LI              XML_GetErrorColumnNumber(parser), message);
935bb6a25fSPoul-Henning Kamp   else
945bb6a25fSPoul-Henning Kamp     ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code);
955bb6a25fSPoul-Henning Kamp }
965bb6a25fSPoul-Henning Kamp 
97220ed979SColeman Kane /* This implementation will give problems on files larger than INT_MAX. */
985bb6a25fSPoul-Henning Kamp static void
processFile(const void * data,size_t size,const XML_Char * filename,void * args)996b2c1e49SXin LI processFile(const void *data, size_t size, const XML_Char *filename,
1006b2c1e49SXin LI             void *args) {
1015bb6a25fSPoul-Henning Kamp   XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
1025bb6a25fSPoul-Henning Kamp   int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
103220ed979SColeman Kane   if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
1045bb6a25fSPoul-Henning Kamp     reportError(parser, filename);
1055bb6a25fSPoul-Henning Kamp     *retPtr = 0;
1066b2c1e49SXin LI   } else
1075bb6a25fSPoul-Henning Kamp     *retPtr = 1;
1085bb6a25fSPoul-Henning Kamp }
1095bb6a25fSPoul-Henning Kamp 
1100a48773fSEric van Gyzen #if defined(_WIN32)
1115bb6a25fSPoul-Henning Kamp 
1125bb6a25fSPoul-Henning Kamp static int
isAsciiLetter(XML_Char c)1136b2c1e49SXin LI isAsciiLetter(XML_Char c) {
1145bb6a25fSPoul-Henning Kamp   return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
1155bb6a25fSPoul-Henning Kamp }
1165bb6a25fSPoul-Henning Kamp 
1170a48773fSEric van Gyzen #endif /* _WIN32 */
1185bb6a25fSPoul-Henning Kamp 
1195bb6a25fSPoul-Henning Kamp static const XML_Char *
resolveSystemId(const XML_Char * base,const XML_Char * systemId,XML_Char ** toFree)1205bb6a25fSPoul-Henning Kamp resolveSystemId(const XML_Char *base, const XML_Char *systemId,
1216b2c1e49SXin LI                 XML_Char **toFree) {
1225bb6a25fSPoul-Henning Kamp   XML_Char *s;
1235bb6a25fSPoul-Henning Kamp   *toFree = 0;
1246b2c1e49SXin LI   if (! base || *systemId == T('/')
1250a48773fSEric van Gyzen #if defined(_WIN32)
1265bb6a25fSPoul-Henning Kamp       || *systemId == T('\\')
1275bb6a25fSPoul-Henning Kamp       || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
1285bb6a25fSPoul-Henning Kamp #endif
1295bb6a25fSPoul-Henning Kamp   )
1305bb6a25fSPoul-Henning Kamp     return systemId;
1315bb6a25fSPoul-Henning Kamp   *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)
1325bb6a25fSPoul-Henning Kamp                                * sizeof(XML_Char));
1335bb6a25fSPoul-Henning Kamp   if (! *toFree)
1345bb6a25fSPoul-Henning Kamp     return systemId;
1355bb6a25fSPoul-Henning Kamp   tcscpy(*toFree, base);
1365bb6a25fSPoul-Henning Kamp   s = *toFree;
1375bb6a25fSPoul-Henning Kamp   if (tcsrchr(s, T('/')))
1385bb6a25fSPoul-Henning Kamp     s = tcsrchr(s, T('/')) + 1;
1390a48773fSEric van Gyzen #if defined(_WIN32)
1405bb6a25fSPoul-Henning Kamp   if (tcsrchr(s, T('\\')))
1415bb6a25fSPoul-Henning Kamp     s = tcsrchr(s, T('\\')) + 1;
1425bb6a25fSPoul-Henning Kamp #endif
1435bb6a25fSPoul-Henning Kamp   tcscpy(s, systemId);
1445bb6a25fSPoul-Henning Kamp   return *toFree;
1455bb6a25fSPoul-Henning Kamp }
1465bb6a25fSPoul-Henning Kamp 
1475bb6a25fSPoul-Henning Kamp static int
externalEntityRefFilemap(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)1486b2c1e49SXin LI externalEntityRefFilemap(XML_Parser parser, const XML_Char *context,
1496b2c1e49SXin LI                          const XML_Char *base, const XML_Char *systemId,
1506b2c1e49SXin LI                          const XML_Char *publicId) {
1515bb6a25fSPoul-Henning Kamp   int result;
1525bb6a25fSPoul-Henning Kamp   XML_Char *s;
1535bb6a25fSPoul-Henning Kamp   const XML_Char *filename;
1545bb6a25fSPoul-Henning Kamp   XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
1550a48773fSEric van Gyzen   int filemapRes;
1565bb6a25fSPoul-Henning Kamp   PROCESS_ARGS args;
1576b2c1e49SXin LI   UNUSED_P(publicId);
1585bb6a25fSPoul-Henning Kamp   args.retPtr = &result;
1595bb6a25fSPoul-Henning Kamp   args.parser = entParser;
1605bb6a25fSPoul-Henning Kamp   filename = resolveSystemId(base, systemId, &s);
1615bb6a25fSPoul-Henning Kamp   XML_SetBase(entParser, filename);
1620a48773fSEric van Gyzen   filemapRes = filemap(filename, processFile, &args);
1630a48773fSEric van Gyzen   switch (filemapRes) {
1640a48773fSEric van Gyzen   case 0:
1655bb6a25fSPoul-Henning Kamp     result = 0;
1660a48773fSEric van Gyzen     break;
1670a48773fSEric van Gyzen   case 2:
1686b2c1e49SXin LI     ftprintf(stderr,
1696b2c1e49SXin LI              T("%s: file too large for memory-mapping")
1706b2c1e49SXin LI                  T(", switching to streaming\n"),
1716b2c1e49SXin LI              filename);
1720a48773fSEric van Gyzen     result = processStream(filename, entParser);
1730a48773fSEric van Gyzen     break;
1740a48773fSEric van Gyzen   }
1755bb6a25fSPoul-Henning Kamp   free(s);
1765bb6a25fSPoul-Henning Kamp   XML_ParserFree(entParser);
1775bb6a25fSPoul-Henning Kamp   return result;
1785bb6a25fSPoul-Henning Kamp }
1795bb6a25fSPoul-Henning Kamp 
1805bb6a25fSPoul-Henning Kamp static int
processStream(const XML_Char * filename,XML_Parser parser)1816b2c1e49SXin LI processStream(const XML_Char *filename, XML_Parser parser) {
182cc68614dSXin LI   /* passing NULL for filename means read input from stdin */
1835bb6a25fSPoul-Henning Kamp   int fd = 0; /* 0 is the fileno for stdin */
1845bb6a25fSPoul-Henning Kamp 
1855bb6a25fSPoul-Henning Kamp   if (filename != NULL) {
1865bb6a25fSPoul-Henning Kamp     fd = topen(filename, O_BINARY | O_RDONLY);
1875bb6a25fSPoul-Henning Kamp     if (fd < 0) {
1885bb6a25fSPoul-Henning Kamp       tperror(filename);
1895bb6a25fSPoul-Henning Kamp       return 0;
1905bb6a25fSPoul-Henning Kamp     }
1915bb6a25fSPoul-Henning Kamp   }
1925bb6a25fSPoul-Henning Kamp   for (;;) {
1935bb6a25fSPoul-Henning Kamp     int nread;
194*4543ef51SXin LI     char *buf = (char *)XML_GetBuffer(parser, g_read_size_bytes);
1955bb6a25fSPoul-Henning Kamp     if (! buf) {
1965bb6a25fSPoul-Henning Kamp       if (filename != NULL)
1975bb6a25fSPoul-Henning Kamp         close(fd);
1985bb6a25fSPoul-Henning Kamp       ftprintf(stderr, T("%s: out of memory\n"),
1990a48773fSEric van Gyzen                filename != NULL ? filename : T("xmlwf"));
2005bb6a25fSPoul-Henning Kamp       return 0;
2015bb6a25fSPoul-Henning Kamp     }
202*4543ef51SXin LI     nread = read(fd, buf, g_read_size_bytes);
2035bb6a25fSPoul-Henning Kamp     if (nread < 0) {
2040a48773fSEric van Gyzen       tperror(filename != NULL ? filename : T("STDIN"));
2055bb6a25fSPoul-Henning Kamp       if (filename != NULL)
2065bb6a25fSPoul-Henning Kamp         close(fd);
2075bb6a25fSPoul-Henning Kamp       return 0;
2085bb6a25fSPoul-Henning Kamp     }
2095bb6a25fSPoul-Henning Kamp     if (XML_ParseBuffer(parser, nread, nread == 0) == XML_STATUS_ERROR) {
2100a48773fSEric van Gyzen       reportError(parser, filename != NULL ? filename : T("STDIN"));
2115bb6a25fSPoul-Henning Kamp       if (filename != NULL)
2125bb6a25fSPoul-Henning Kamp         close(fd);
2135bb6a25fSPoul-Henning Kamp       return 0;
2145bb6a25fSPoul-Henning Kamp     }
2155bb6a25fSPoul-Henning Kamp     if (nread == 0) {
2165bb6a25fSPoul-Henning Kamp       if (filename != NULL)
2175bb6a25fSPoul-Henning Kamp         close(fd);
2186b2c1e49SXin LI       break;
2196b2c1e49SXin LI       ;
2205bb6a25fSPoul-Henning Kamp     }
2215bb6a25fSPoul-Henning Kamp   }
2225bb6a25fSPoul-Henning Kamp   return 1;
2235bb6a25fSPoul-Henning Kamp }
2245bb6a25fSPoul-Henning Kamp 
2255bb6a25fSPoul-Henning Kamp static int
externalEntityRefStream(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)2266b2c1e49SXin LI externalEntityRefStream(XML_Parser parser, const XML_Char *context,
2276b2c1e49SXin LI                         const XML_Char *base, const XML_Char *systemId,
2286b2c1e49SXin LI                         const XML_Char *publicId) {
2295bb6a25fSPoul-Henning Kamp   XML_Char *s;
2305bb6a25fSPoul-Henning Kamp   const XML_Char *filename;
2315bb6a25fSPoul-Henning Kamp   int ret;
2325bb6a25fSPoul-Henning Kamp   XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
2336b2c1e49SXin LI   UNUSED_P(publicId);
2345bb6a25fSPoul-Henning Kamp   filename = resolveSystemId(base, systemId, &s);
2355bb6a25fSPoul-Henning Kamp   XML_SetBase(entParser, filename);
2365bb6a25fSPoul-Henning Kamp   ret = processStream(filename, entParser);
2375bb6a25fSPoul-Henning Kamp   free(s);
2385bb6a25fSPoul-Henning Kamp   XML_ParserFree(entParser);
2395bb6a25fSPoul-Henning Kamp   return ret;
2405bb6a25fSPoul-Henning Kamp }
2415bb6a25fSPoul-Henning Kamp 
2425bb6a25fSPoul-Henning Kamp int
XML_ProcessFile(XML_Parser parser,const XML_Char * filename,unsigned flags)2436b2c1e49SXin LI XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) {
2445bb6a25fSPoul-Henning Kamp   int result;
2455bb6a25fSPoul-Henning Kamp 
2465bb6a25fSPoul-Henning Kamp   if (! XML_SetBase(parser, filename)) {
2475bb6a25fSPoul-Henning Kamp     ftprintf(stderr, T("%s: out of memory"), filename);
2485bb6a25fSPoul-Henning Kamp     exit(1);
2495bb6a25fSPoul-Henning Kamp   }
2505bb6a25fSPoul-Henning Kamp 
2515bb6a25fSPoul-Henning Kamp   if (flags & XML_EXTERNAL_ENTITIES)
2526b2c1e49SXin LI     XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE)
2535bb6a25fSPoul-Henning Kamp                                                 ? externalEntityRefFilemap
2545bb6a25fSPoul-Henning Kamp                                                 : externalEntityRefStream);
2555bb6a25fSPoul-Henning Kamp   if (flags & XML_MAP_FILE) {
2560a48773fSEric van Gyzen     int filemapRes;
2575bb6a25fSPoul-Henning Kamp     PROCESS_ARGS args;
2585bb6a25fSPoul-Henning Kamp     args.retPtr = &result;
2595bb6a25fSPoul-Henning Kamp     args.parser = parser;
2600a48773fSEric van Gyzen     filemapRes = filemap(filename, processFile, &args);
2610a48773fSEric van Gyzen     switch (filemapRes) {
2620a48773fSEric van Gyzen     case 0:
2635bb6a25fSPoul-Henning Kamp       result = 0;
2640a48773fSEric van Gyzen       break;
2650a48773fSEric van Gyzen     case 2:
2666b2c1e49SXin LI       ftprintf(stderr,
2676b2c1e49SXin LI                T("%s: file too large for memory-mapping")
2686b2c1e49SXin LI                    T(", switching to streaming\n"),
2696b2c1e49SXin LI                filename);
2700a48773fSEric van Gyzen       result = processStream(filename, parser);
2710a48773fSEric van Gyzen       break;
2720a48773fSEric van Gyzen     }
2736b2c1e49SXin LI   } else
2745bb6a25fSPoul-Henning Kamp     result = processStream(filename, parser);
2755bb6a25fSPoul-Henning Kamp   return result;
2765bb6a25fSPoul-Henning Kamp }
277