10a48773fSEric van Gyzen /*
20a48773fSEric van Gyzen __ __ _
30a48773fSEric van Gyzen ___\ \/ /_ __ __ _| |_
40a48773fSEric van Gyzen / _ \\ /| '_ \ / _` | __|
50a48773fSEric van Gyzen | __// \| |_) | (_| | |_
60a48773fSEric van Gyzen \___/_/\_\ .__/ \__,_|\__|
70a48773fSEric van Gyzen |_| XML parser
80a48773fSEric van Gyzen
90a48773fSEric van Gyzen Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10cc68614dSXin LI Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11cc68614dSXin LI Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12cc68614dSXin LI Copyright (c) 2004-2009 Karl Waclawek <karl@waclawek.net>
13ac69e5d4SEric van Gyzen Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
14*4543ef51SXin LI Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org>
15cc68614dSXin LI Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
16cc68614dSXin LI Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
17cc68614dSXin LI Copyright (c) 2020 Joe Orton <jorton@redhat.com>
18cc68614dSXin LI Copyright (c) 2020 Kleber Tarcísio <klebertarcisio@yahoo.com.br>
19cc68614dSXin LI Copyright (c) 2021 Tim Bray <tbray@textuality.com>
2071f0c44aSXin LI Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com>
21*4543ef51SXin LI Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
220a48773fSEric van Gyzen Licensed under the MIT license:
230a48773fSEric van Gyzen
240a48773fSEric van Gyzen Permission is hereby granted, free of charge, to any person obtaining
250a48773fSEric van Gyzen a copy of this software and associated documentation files (the
260a48773fSEric van Gyzen "Software"), to deal in the Software without restriction, including
270a48773fSEric van Gyzen without limitation the rights to use, copy, modify, merge, publish,
280a48773fSEric van Gyzen distribute, sublicense, and/or sell copies of the Software, and to permit
290a48773fSEric van Gyzen persons to whom the Software is furnished to do so, subject to the
300a48773fSEric van Gyzen following conditions:
310a48773fSEric van Gyzen
320a48773fSEric van Gyzen The above copyright notice and this permission notice shall be included
330a48773fSEric van Gyzen in all copies or substantial portions of the Software.
340a48773fSEric van Gyzen
350a48773fSEric van Gyzen THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
360a48773fSEric van Gyzen EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
370a48773fSEric van Gyzen MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
380a48773fSEric van Gyzen NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
390a48773fSEric van Gyzen DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
400a48773fSEric van Gyzen OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
410a48773fSEric van Gyzen USE OR OTHER DEALINGS IN THE SOFTWARE.
425bb6a25fSPoul-Henning Kamp */
435bb6a25fSPoul-Henning Kamp
44*4543ef51SXin LI #include "expat_config.h"
45cc68614dSXin LI
460a48773fSEric van Gyzen #include <assert.h>
475bb6a25fSPoul-Henning Kamp #include <stdio.h>
485bb6a25fSPoul-Henning Kamp #include <stdlib.h>
495bb6a25fSPoul-Henning Kamp #include <stddef.h>
505bb6a25fSPoul-Henning Kamp #include <string.h>
51cc68614dSXin LI #include <math.h> /* for isnan */
52cc68614dSXin LI #include <errno.h>
535bb6a25fSPoul-Henning Kamp
545bb6a25fSPoul-Henning Kamp #include "expat.h"
555bb6a25fSPoul-Henning Kamp #include "codepage.h"
56be8aff81SXin LI #include "internal.h" /* for UNUSED_P only */
575bb6a25fSPoul-Henning Kamp #include "xmlfile.h"
585bb6a25fSPoul-Henning Kamp #include "xmltchar.h"
595bb6a25fSPoul-Henning Kamp
605bb6a25fSPoul-Henning Kamp #ifdef _MSC_VER
615bb6a25fSPoul-Henning Kamp # include <crtdbg.h>
625bb6a25fSPoul-Henning Kamp #endif
635bb6a25fSPoul-Henning Kamp
640a48773fSEric van Gyzen #ifdef XML_UNICODE
650a48773fSEric van Gyzen # include <wchar.h>
66220ed979SColeman Kane #endif
67220ed979SColeman Kane
68cc68614dSXin LI enum ExitCode {
69cc68614dSXin LI XMLWF_EXIT_SUCCESS = 0,
70cc68614dSXin LI XMLWF_EXIT_INTERNAL_ERROR = 1,
71cc68614dSXin LI XMLWF_EXIT_NOT_WELLFORMED = 2,
72cc68614dSXin LI XMLWF_EXIT_OUTPUT_ERROR = 3,
73cc68614dSXin LI XMLWF_EXIT_USAGE_ERROR = 4,
74cc68614dSXin LI };
75cc68614dSXin LI
760a48773fSEric van Gyzen /* Structures for handler user data */
770a48773fSEric van Gyzen typedef struct NotationList {
780a48773fSEric van Gyzen struct NotationList *next;
790a48773fSEric van Gyzen const XML_Char *notationName;
800a48773fSEric van Gyzen const XML_Char *systemId;
810a48773fSEric van Gyzen const XML_Char *publicId;
820a48773fSEric van Gyzen } NotationList;
830a48773fSEric van Gyzen
840a48773fSEric van Gyzen typedef struct xmlwfUserData {
850a48773fSEric van Gyzen FILE *fp;
860a48773fSEric van Gyzen NotationList *notationListHead;
870a48773fSEric van Gyzen const XML_Char *currentDoctypeName;
880a48773fSEric van Gyzen } XmlwfUserData;
890a48773fSEric van Gyzen
905bb6a25fSPoul-Henning Kamp /* This ensures proper sorting. */
915bb6a25fSPoul-Henning Kamp
925bb6a25fSPoul-Henning Kamp #define NSSEP T('\001')
935bb6a25fSPoul-Henning Kamp
94220ed979SColeman Kane static void XMLCALL
characterData(void * userData,const XML_Char * s,int len)956b2c1e49SXin LI characterData(void *userData, const XML_Char *s, int len) {
960a48773fSEric van Gyzen FILE *fp = ((XmlwfUserData *)userData)->fp;
975bb6a25fSPoul-Henning Kamp for (; len > 0; --len, ++s) {
985bb6a25fSPoul-Henning Kamp switch (*s) {
995bb6a25fSPoul-Henning Kamp case T('&'):
1005bb6a25fSPoul-Henning Kamp fputts(T("&"), fp);
1015bb6a25fSPoul-Henning Kamp break;
1025bb6a25fSPoul-Henning Kamp case T('<'):
1035bb6a25fSPoul-Henning Kamp fputts(T("<"), fp);
1045bb6a25fSPoul-Henning Kamp break;
1055bb6a25fSPoul-Henning Kamp case T('>'):
1065bb6a25fSPoul-Henning Kamp fputts(T(">"), fp);
1075bb6a25fSPoul-Henning Kamp break;
1085bb6a25fSPoul-Henning Kamp #ifdef W3C14N
1095bb6a25fSPoul-Henning Kamp case 13:
1105bb6a25fSPoul-Henning Kamp fputts(T("
"), fp);
1115bb6a25fSPoul-Henning Kamp break;
1125bb6a25fSPoul-Henning Kamp #else
1135bb6a25fSPoul-Henning Kamp case T('"'):
1145bb6a25fSPoul-Henning Kamp fputts(T("""), fp);
1155bb6a25fSPoul-Henning Kamp break;
1165bb6a25fSPoul-Henning Kamp case 9:
1175bb6a25fSPoul-Henning Kamp case 10:
1185bb6a25fSPoul-Henning Kamp case 13:
1195bb6a25fSPoul-Henning Kamp ftprintf(fp, T("&#%d;"), *s);
1205bb6a25fSPoul-Henning Kamp break;
1215bb6a25fSPoul-Henning Kamp #endif
1225bb6a25fSPoul-Henning Kamp default:
1235bb6a25fSPoul-Henning Kamp puttc(*s, fp);
1245bb6a25fSPoul-Henning Kamp break;
1255bb6a25fSPoul-Henning Kamp }
1265bb6a25fSPoul-Henning Kamp }
1275bb6a25fSPoul-Henning Kamp }
1285bb6a25fSPoul-Henning Kamp
1295bb6a25fSPoul-Henning Kamp static void
attributeValue(FILE * fp,const XML_Char * s)1306b2c1e49SXin LI attributeValue(FILE *fp, const XML_Char *s) {
1315bb6a25fSPoul-Henning Kamp puttc(T('='), fp);
1325bb6a25fSPoul-Henning Kamp puttc(T('"'), fp);
1330a48773fSEric van Gyzen assert(s);
1345bb6a25fSPoul-Henning Kamp for (;;) {
1355bb6a25fSPoul-Henning Kamp switch (*s) {
1365bb6a25fSPoul-Henning Kamp case 0:
1375bb6a25fSPoul-Henning Kamp case NSSEP:
1385bb6a25fSPoul-Henning Kamp puttc(T('"'), fp);
1395bb6a25fSPoul-Henning Kamp return;
1405bb6a25fSPoul-Henning Kamp case T('&'):
1415bb6a25fSPoul-Henning Kamp fputts(T("&"), fp);
1425bb6a25fSPoul-Henning Kamp break;
1435bb6a25fSPoul-Henning Kamp case T('<'):
1445bb6a25fSPoul-Henning Kamp fputts(T("<"), fp);
1455bb6a25fSPoul-Henning Kamp break;
1465bb6a25fSPoul-Henning Kamp case T('"'):
1475bb6a25fSPoul-Henning Kamp fputts(T("""), fp);
1485bb6a25fSPoul-Henning Kamp break;
1495bb6a25fSPoul-Henning Kamp #ifdef W3C14N
1505bb6a25fSPoul-Henning Kamp case 9:
1515bb6a25fSPoul-Henning Kamp fputts(T("	"), fp);
1525bb6a25fSPoul-Henning Kamp break;
1535bb6a25fSPoul-Henning Kamp case 10:
1545bb6a25fSPoul-Henning Kamp fputts(T("
"), fp);
1555bb6a25fSPoul-Henning Kamp break;
1565bb6a25fSPoul-Henning Kamp case 13:
1575bb6a25fSPoul-Henning Kamp fputts(T("
"), fp);
1585bb6a25fSPoul-Henning Kamp break;
1595bb6a25fSPoul-Henning Kamp #else
1605bb6a25fSPoul-Henning Kamp case T('>'):
1615bb6a25fSPoul-Henning Kamp fputts(T(">"), fp);
1625bb6a25fSPoul-Henning Kamp break;
1635bb6a25fSPoul-Henning Kamp case 9:
1645bb6a25fSPoul-Henning Kamp case 10:
1655bb6a25fSPoul-Henning Kamp case 13:
1665bb6a25fSPoul-Henning Kamp ftprintf(fp, T("&#%d;"), *s);
1675bb6a25fSPoul-Henning Kamp break;
1685bb6a25fSPoul-Henning Kamp #endif
1695bb6a25fSPoul-Henning Kamp default:
1705bb6a25fSPoul-Henning Kamp puttc(*s, fp);
1715bb6a25fSPoul-Henning Kamp break;
1725bb6a25fSPoul-Henning Kamp }
1735bb6a25fSPoul-Henning Kamp s++;
1745bb6a25fSPoul-Henning Kamp }
1755bb6a25fSPoul-Henning Kamp }
1765bb6a25fSPoul-Henning Kamp
1775bb6a25fSPoul-Henning Kamp /* Lexicographically comparing UTF-8 encoded attribute values,
1785bb6a25fSPoul-Henning Kamp is equivalent to lexicographically comparing based on the character number. */
1795bb6a25fSPoul-Henning Kamp
1805bb6a25fSPoul-Henning Kamp static int
attcmp(const void * att1,const void * att2)1816b2c1e49SXin LI attcmp(const void *att1, const void *att2) {
182*4543ef51SXin LI return tcscmp(*(const XML_Char *const *)att1, *(const XML_Char *const *)att2);
1835bb6a25fSPoul-Henning Kamp }
1845bb6a25fSPoul-Henning Kamp
185220ed979SColeman Kane static void XMLCALL
startElement(void * userData,const XML_Char * name,const XML_Char ** atts)1866b2c1e49SXin LI startElement(void *userData, const XML_Char *name, const XML_Char **atts) {
1875bb6a25fSPoul-Henning Kamp int nAtts;
1885bb6a25fSPoul-Henning Kamp const XML_Char **p;
1890a48773fSEric van Gyzen FILE *fp = ((XmlwfUserData *)userData)->fp;
1905bb6a25fSPoul-Henning Kamp puttc(T('<'), fp);
1915bb6a25fSPoul-Henning Kamp fputts(name, fp);
1925bb6a25fSPoul-Henning Kamp
1935bb6a25fSPoul-Henning Kamp p = atts;
1945bb6a25fSPoul-Henning Kamp while (*p)
1955bb6a25fSPoul-Henning Kamp ++p;
196220ed979SColeman Kane nAtts = (int)((p - atts) >> 1);
1975bb6a25fSPoul-Henning Kamp if (nAtts > 1)
1985bb6a25fSPoul-Henning Kamp qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
1995bb6a25fSPoul-Henning Kamp while (*atts) {
2005bb6a25fSPoul-Henning Kamp puttc(T(' '), fp);
2015bb6a25fSPoul-Henning Kamp fputts(*atts++, fp);
2025bb6a25fSPoul-Henning Kamp attributeValue(fp, *atts);
2035bb6a25fSPoul-Henning Kamp atts++;
2045bb6a25fSPoul-Henning Kamp }
2055bb6a25fSPoul-Henning Kamp puttc(T('>'), fp);
2065bb6a25fSPoul-Henning Kamp }
2075bb6a25fSPoul-Henning Kamp
208220ed979SColeman Kane static void XMLCALL
endElement(void * userData,const XML_Char * name)2096b2c1e49SXin LI endElement(void *userData, const XML_Char *name) {
2100a48773fSEric van Gyzen FILE *fp = ((XmlwfUserData *)userData)->fp;
2115bb6a25fSPoul-Henning Kamp puttc(T('<'), fp);
2125bb6a25fSPoul-Henning Kamp puttc(T('/'), fp);
2135bb6a25fSPoul-Henning Kamp fputts(name, fp);
2145bb6a25fSPoul-Henning Kamp puttc(T('>'), fp);
2155bb6a25fSPoul-Henning Kamp }
2165bb6a25fSPoul-Henning Kamp
2175bb6a25fSPoul-Henning Kamp static int
nsattcmp(const void * p1,const void * p2)2186b2c1e49SXin LI nsattcmp(const void *p1, const void *p2) {
219*4543ef51SXin LI const XML_Char *att1 = *(const XML_Char *const *)p1;
220*4543ef51SXin LI const XML_Char *att2 = *(const XML_Char *const *)p2;
2215bb6a25fSPoul-Henning Kamp int sep1 = (tcsrchr(att1, NSSEP) != 0);
22271f0c44aSXin LI int sep2 = (tcsrchr(att2, NSSEP) != 0);
2235bb6a25fSPoul-Henning Kamp if (sep1 != sep2)
2245bb6a25fSPoul-Henning Kamp return sep1 - sep2;
2255bb6a25fSPoul-Henning Kamp return tcscmp(att1, att2);
2265bb6a25fSPoul-Henning Kamp }
2275bb6a25fSPoul-Henning Kamp
228220ed979SColeman Kane static void XMLCALL
startElementNS(void * userData,const XML_Char * name,const XML_Char ** atts)2296b2c1e49SXin LI startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) {
2305bb6a25fSPoul-Henning Kamp int nAtts;
2315bb6a25fSPoul-Henning Kamp int nsi;
2325bb6a25fSPoul-Henning Kamp const XML_Char **p;
2330a48773fSEric van Gyzen FILE *fp = ((XmlwfUserData *)userData)->fp;
2345bb6a25fSPoul-Henning Kamp const XML_Char *sep;
2355bb6a25fSPoul-Henning Kamp puttc(T('<'), fp);
2365bb6a25fSPoul-Henning Kamp
2375bb6a25fSPoul-Henning Kamp sep = tcsrchr(name, NSSEP);
2385bb6a25fSPoul-Henning Kamp if (sep) {
2395bb6a25fSPoul-Henning Kamp fputts(T("n1:"), fp);
2405bb6a25fSPoul-Henning Kamp fputts(sep + 1, fp);
2415bb6a25fSPoul-Henning Kamp fputts(T(" xmlns:n1"), fp);
2425bb6a25fSPoul-Henning Kamp attributeValue(fp, name);
2435bb6a25fSPoul-Henning Kamp nsi = 2;
2446b2c1e49SXin LI } else {
2455bb6a25fSPoul-Henning Kamp fputts(name, fp);
2465bb6a25fSPoul-Henning Kamp nsi = 1;
2475bb6a25fSPoul-Henning Kamp }
2485bb6a25fSPoul-Henning Kamp
2495bb6a25fSPoul-Henning Kamp p = atts;
2505bb6a25fSPoul-Henning Kamp while (*p)
2515bb6a25fSPoul-Henning Kamp ++p;
252220ed979SColeman Kane nAtts = (int)((p - atts) >> 1);
2535bb6a25fSPoul-Henning Kamp if (nAtts > 1)
2545bb6a25fSPoul-Henning Kamp qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp);
2555bb6a25fSPoul-Henning Kamp while (*atts) {
2565bb6a25fSPoul-Henning Kamp name = *atts++;
2575bb6a25fSPoul-Henning Kamp sep = tcsrchr(name, NSSEP);
2585bb6a25fSPoul-Henning Kamp puttc(T(' '), fp);
2595bb6a25fSPoul-Henning Kamp if (sep) {
2605bb6a25fSPoul-Henning Kamp ftprintf(fp, T("n%d:"), nsi);
2615bb6a25fSPoul-Henning Kamp fputts(sep + 1, fp);
2626b2c1e49SXin LI } else
2635bb6a25fSPoul-Henning Kamp fputts(name, fp);
2645bb6a25fSPoul-Henning Kamp attributeValue(fp, *atts);
2655bb6a25fSPoul-Henning Kamp if (sep) {
2665bb6a25fSPoul-Henning Kamp ftprintf(fp, T(" xmlns:n%d"), nsi++);
2675bb6a25fSPoul-Henning Kamp attributeValue(fp, name);
2685bb6a25fSPoul-Henning Kamp }
2695bb6a25fSPoul-Henning Kamp atts++;
2705bb6a25fSPoul-Henning Kamp }
2715bb6a25fSPoul-Henning Kamp puttc(T('>'), fp);
2725bb6a25fSPoul-Henning Kamp }
2735bb6a25fSPoul-Henning Kamp
274220ed979SColeman Kane static void XMLCALL
endElementNS(void * userData,const XML_Char * name)2756b2c1e49SXin LI endElementNS(void *userData, const XML_Char *name) {
2760a48773fSEric van Gyzen FILE *fp = ((XmlwfUserData *)userData)->fp;
2775bb6a25fSPoul-Henning Kamp const XML_Char *sep;
2785bb6a25fSPoul-Henning Kamp puttc(T('<'), fp);
2795bb6a25fSPoul-Henning Kamp puttc(T('/'), fp);
2805bb6a25fSPoul-Henning Kamp sep = tcsrchr(name, NSSEP);
2815bb6a25fSPoul-Henning Kamp if (sep) {
2825bb6a25fSPoul-Henning Kamp fputts(T("n1:"), fp);
2835bb6a25fSPoul-Henning Kamp fputts(sep + 1, fp);
2846b2c1e49SXin LI } else
2855bb6a25fSPoul-Henning Kamp fputts(name, fp);
2865bb6a25fSPoul-Henning Kamp puttc(T('>'), fp);
2875bb6a25fSPoul-Henning Kamp }
2885bb6a25fSPoul-Henning Kamp
2895bb6a25fSPoul-Henning Kamp #ifndef W3C14N
2905bb6a25fSPoul-Henning Kamp
291220ed979SColeman Kane static void XMLCALL
processingInstruction(void * userData,const XML_Char * target,const XML_Char * data)2925bb6a25fSPoul-Henning Kamp processingInstruction(void *userData, const XML_Char *target,
2936b2c1e49SXin LI const XML_Char *data) {
2940a48773fSEric van Gyzen FILE *fp = ((XmlwfUserData *)userData)->fp;
2955bb6a25fSPoul-Henning Kamp puttc(T('<'), fp);
2965bb6a25fSPoul-Henning Kamp puttc(T('?'), fp);
2975bb6a25fSPoul-Henning Kamp fputts(target, fp);
2985bb6a25fSPoul-Henning Kamp puttc(T(' '), fp);
2995bb6a25fSPoul-Henning Kamp fputts(data, fp);
3005bb6a25fSPoul-Henning Kamp puttc(T('?'), fp);
3015bb6a25fSPoul-Henning Kamp puttc(T('>'), fp);
3025bb6a25fSPoul-Henning Kamp }
3035bb6a25fSPoul-Henning Kamp
3046b2c1e49SXin LI static XML_Char *
xcsdup(const XML_Char * s)3056b2c1e49SXin LI xcsdup(const XML_Char *s) {
3060a48773fSEric van Gyzen XML_Char *result;
3070a48773fSEric van Gyzen int count = 0;
3080a48773fSEric van Gyzen int numBytes;
3090a48773fSEric van Gyzen
3100a48773fSEric van Gyzen /* Get the length of the string, including terminator */
3110a48773fSEric van Gyzen while (s[count++] != 0) {
3120a48773fSEric van Gyzen /* Do nothing */
3130a48773fSEric van Gyzen }
3140a48773fSEric van Gyzen numBytes = count * sizeof(XML_Char);
3150a48773fSEric van Gyzen result = malloc(numBytes);
3160a48773fSEric van Gyzen if (result == NULL)
3170a48773fSEric van Gyzen return NULL;
3180a48773fSEric van Gyzen memcpy(result, s, numBytes);
3190a48773fSEric van Gyzen return result;
3200a48773fSEric van Gyzen }
3210a48773fSEric van Gyzen
3220a48773fSEric van Gyzen static void XMLCALL
startDoctypeDecl(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * publid,int has_internal_subset)3236b2c1e49SXin LI startDoctypeDecl(void *userData, const XML_Char *doctypeName,
3246b2c1e49SXin LI const XML_Char *sysid, const XML_Char *publid,
3256b2c1e49SXin LI int has_internal_subset) {
3260a48773fSEric van Gyzen XmlwfUserData *data = (XmlwfUserData *)userData;
3276b2c1e49SXin LI UNUSED_P(sysid);
3286b2c1e49SXin LI UNUSED_P(publid);
3296b2c1e49SXin LI UNUSED_P(has_internal_subset);
3300a48773fSEric van Gyzen data->currentDoctypeName = xcsdup(doctypeName);
3310a48773fSEric van Gyzen }
3320a48773fSEric van Gyzen
3330a48773fSEric van Gyzen static void
freeNotations(XmlwfUserData * data)3346b2c1e49SXin LI freeNotations(XmlwfUserData *data) {
3350a48773fSEric van Gyzen NotationList *notationListHead = data->notationListHead;
3360a48773fSEric van Gyzen
3370a48773fSEric van Gyzen while (notationListHead != NULL) {
3380a48773fSEric van Gyzen NotationList *next = notationListHead->next;
3390a48773fSEric van Gyzen free((void *)notationListHead->notationName);
3400a48773fSEric van Gyzen free((void *)notationListHead->systemId);
3410a48773fSEric van Gyzen free((void *)notationListHead->publicId);
3420a48773fSEric van Gyzen free(notationListHead);
3430a48773fSEric van Gyzen notationListHead = next;
3440a48773fSEric van Gyzen }
3450a48773fSEric van Gyzen data->notationListHead = NULL;
3460a48773fSEric van Gyzen }
3470a48773fSEric van Gyzen
348cc68614dSXin LI static void
cleanupUserData(XmlwfUserData * userData)349cc68614dSXin LI cleanupUserData(XmlwfUserData *userData) {
350cc68614dSXin LI free((void *)userData->currentDoctypeName);
351cc68614dSXin LI userData->currentDoctypeName = NULL;
352cc68614dSXin LI freeNotations(userData);
353cc68614dSXin LI }
354cc68614dSXin LI
3556b2c1e49SXin LI static int
xcscmp(const XML_Char * xs,const XML_Char * xt)3566b2c1e49SXin LI xcscmp(const XML_Char *xs, const XML_Char *xt) {
3570a48773fSEric van Gyzen while (*xs != 0 && *xt != 0) {
3580a48773fSEric van Gyzen if (*xs < *xt)
3590a48773fSEric van Gyzen return -1;
3600a48773fSEric van Gyzen if (*xs > *xt)
3610a48773fSEric van Gyzen return 1;
3620a48773fSEric van Gyzen xs++;
3630a48773fSEric van Gyzen xt++;
3640a48773fSEric van Gyzen }
3650a48773fSEric van Gyzen if (*xs < *xt)
3660a48773fSEric van Gyzen return -1;
3670a48773fSEric van Gyzen if (*xs > *xt)
3680a48773fSEric van Gyzen return 1;
3690a48773fSEric van Gyzen return 0;
3700a48773fSEric van Gyzen }
3710a48773fSEric van Gyzen
3720a48773fSEric van Gyzen static int
notationCmp(const void * a,const void * b)3736b2c1e49SXin LI notationCmp(const void *a, const void *b) {
374*4543ef51SXin LI const NotationList *const n1 = *(const NotationList *const *)a;
375*4543ef51SXin LI const NotationList *const n2 = *(const NotationList *const *)b;
3760a48773fSEric van Gyzen
3770a48773fSEric van Gyzen return xcscmp(n1->notationName, n2->notationName);
3780a48773fSEric van Gyzen }
3790a48773fSEric van Gyzen
3800a48773fSEric van Gyzen static void XMLCALL
endDoctypeDecl(void * userData)3816b2c1e49SXin LI endDoctypeDecl(void *userData) {
3820a48773fSEric van Gyzen XmlwfUserData *data = (XmlwfUserData *)userData;
3830a48773fSEric van Gyzen NotationList **notations;
3840a48773fSEric van Gyzen int notationCount = 0;
3850a48773fSEric van Gyzen NotationList *p;
3860a48773fSEric van Gyzen int i;
3870a48773fSEric van Gyzen
3880a48773fSEric van Gyzen /* How many notations do we have? */
3890a48773fSEric van Gyzen for (p = data->notationListHead; p != NULL; p = p->next)
3900a48773fSEric van Gyzen notationCount++;
3910a48773fSEric van Gyzen if (notationCount == 0) {
3920a48773fSEric van Gyzen /* Nothing to report */
3930a48773fSEric van Gyzen free((void *)data->currentDoctypeName);
3940a48773fSEric van Gyzen data->currentDoctypeName = NULL;
3950a48773fSEric van Gyzen return;
3960a48773fSEric van Gyzen }
3970a48773fSEric van Gyzen
3980a48773fSEric van Gyzen notations = malloc(notationCount * sizeof(NotationList *));
3990a48773fSEric van Gyzen if (notations == NULL) {
4000a48773fSEric van Gyzen fprintf(stderr, "Unable to sort notations");
4010a48773fSEric van Gyzen freeNotations(data);
4020a48773fSEric van Gyzen return;
4030a48773fSEric van Gyzen }
4040a48773fSEric van Gyzen
4056b2c1e49SXin LI for (p = data->notationListHead, i = 0; i < notationCount; p = p->next, i++) {
4060a48773fSEric van Gyzen notations[i] = p;
4070a48773fSEric van Gyzen }
4080a48773fSEric van Gyzen qsort(notations, notationCount, sizeof(NotationList *), notationCmp);
4090a48773fSEric van Gyzen
4100a48773fSEric van Gyzen /* Output the DOCTYPE header */
4110a48773fSEric van Gyzen fputts(T("<!DOCTYPE "), data->fp);
4120a48773fSEric van Gyzen fputts(data->currentDoctypeName, data->fp);
4130a48773fSEric van Gyzen fputts(T(" [\n"), data->fp);
4140a48773fSEric van Gyzen
4150a48773fSEric van Gyzen /* Now the NOTATIONs */
4160a48773fSEric van Gyzen for (i = 0; i < notationCount; i++) {
4170a48773fSEric van Gyzen fputts(T("<!NOTATION "), data->fp);
4180a48773fSEric van Gyzen fputts(notations[i]->notationName, data->fp);
4190a48773fSEric van Gyzen if (notations[i]->publicId != NULL) {
4200a48773fSEric van Gyzen fputts(T(" PUBLIC '"), data->fp);
4210a48773fSEric van Gyzen fputts(notations[i]->publicId, data->fp);
4220a48773fSEric van Gyzen puttc(T('\''), data->fp);
4230a48773fSEric van Gyzen if (notations[i]->systemId != NULL) {
4240a48773fSEric van Gyzen puttc(T(' '), data->fp);
4250a48773fSEric van Gyzen puttc(T('\''), data->fp);
4260a48773fSEric van Gyzen fputts(notations[i]->systemId, data->fp);
4270a48773fSEric van Gyzen puttc(T('\''), data->fp);
4280a48773fSEric van Gyzen }
4296b2c1e49SXin LI } else if (notations[i]->systemId != NULL) {
4300a48773fSEric van Gyzen fputts(T(" SYSTEM '"), data->fp);
4310a48773fSEric van Gyzen fputts(notations[i]->systemId, data->fp);
4320a48773fSEric van Gyzen puttc(T('\''), data->fp);
4330a48773fSEric van Gyzen }
4340a48773fSEric van Gyzen puttc(T('>'), data->fp);
4350a48773fSEric van Gyzen puttc(T('\n'), data->fp);
4360a48773fSEric van Gyzen }
4370a48773fSEric van Gyzen
4380a48773fSEric van Gyzen /* Finally end the DOCTYPE */
4390a48773fSEric van Gyzen fputts(T("]>\n"), data->fp);
4400a48773fSEric van Gyzen
4410a48773fSEric van Gyzen free(notations);
4420a48773fSEric van Gyzen freeNotations(data);
4430a48773fSEric van Gyzen free((void *)data->currentDoctypeName);
4440a48773fSEric van Gyzen data->currentDoctypeName = NULL;
4450a48773fSEric van Gyzen }
4460a48773fSEric van Gyzen
4470a48773fSEric van Gyzen static void XMLCALL
notationDecl(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)4486b2c1e49SXin LI notationDecl(void *userData, const XML_Char *notationName, const XML_Char *base,
4496b2c1e49SXin LI const XML_Char *systemId, const XML_Char *publicId) {
4500a48773fSEric van Gyzen XmlwfUserData *data = (XmlwfUserData *)userData;
4510a48773fSEric van Gyzen NotationList *entry = malloc(sizeof(NotationList));
4520a48773fSEric van Gyzen const char *errorMessage = "Unable to store NOTATION for output\n";
4530a48773fSEric van Gyzen
4546b2c1e49SXin LI UNUSED_P(base);
4550a48773fSEric van Gyzen if (entry == NULL) {
4560a48773fSEric van Gyzen fputs(errorMessage, stderr);
4570a48773fSEric van Gyzen return; /* Nothing we can really do about this */
4580a48773fSEric van Gyzen }
4590a48773fSEric van Gyzen entry->notationName = xcsdup(notationName);
4600a48773fSEric van Gyzen if (entry->notationName == NULL) {
4610a48773fSEric van Gyzen fputs(errorMessage, stderr);
4620a48773fSEric van Gyzen free(entry);
4630a48773fSEric van Gyzen return;
4640a48773fSEric van Gyzen }
4650a48773fSEric van Gyzen if (systemId != NULL) {
4660a48773fSEric van Gyzen entry->systemId = xcsdup(systemId);
4670a48773fSEric van Gyzen if (entry->systemId == NULL) {
4680a48773fSEric van Gyzen fputs(errorMessage, stderr);
4690a48773fSEric van Gyzen free((void *)entry->notationName);
4700a48773fSEric van Gyzen free(entry);
4710a48773fSEric van Gyzen return;
4720a48773fSEric van Gyzen }
4736b2c1e49SXin LI } else {
4740a48773fSEric van Gyzen entry->systemId = NULL;
4750a48773fSEric van Gyzen }
4760a48773fSEric van Gyzen if (publicId != NULL) {
4770a48773fSEric van Gyzen entry->publicId = xcsdup(publicId);
4780a48773fSEric van Gyzen if (entry->publicId == NULL) {
4790a48773fSEric van Gyzen fputs(errorMessage, stderr);
4800a48773fSEric van Gyzen free((void *)entry->systemId); /* Safe if it's NULL */
4810a48773fSEric van Gyzen free((void *)entry->notationName);
4820a48773fSEric van Gyzen free(entry);
4830a48773fSEric van Gyzen return;
4840a48773fSEric van Gyzen }
4856b2c1e49SXin LI } else {
4860a48773fSEric van Gyzen entry->publicId = NULL;
4870a48773fSEric van Gyzen }
4880a48773fSEric van Gyzen
4890a48773fSEric van Gyzen entry->next = data->notationListHead;
4900a48773fSEric van Gyzen data->notationListHead = entry;
4910a48773fSEric van Gyzen }
4920a48773fSEric van Gyzen
4935bb6a25fSPoul-Henning Kamp #endif /* not W3C14N */
4945bb6a25fSPoul-Henning Kamp
495220ed979SColeman Kane static void XMLCALL
defaultCharacterData(void * userData,const XML_Char * s,int len)4966b2c1e49SXin LI defaultCharacterData(void *userData, const XML_Char *s, int len) {
4976b2c1e49SXin LI UNUSED_P(s);
4986b2c1e49SXin LI UNUSED_P(len);
4995bb6a25fSPoul-Henning Kamp XML_DefaultCurrent((XML_Parser)userData);
5005bb6a25fSPoul-Henning Kamp }
5015bb6a25fSPoul-Henning Kamp
502220ed979SColeman Kane static void XMLCALL
defaultStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)5036b2c1e49SXin LI defaultStartElement(void *userData, const XML_Char *name,
5046b2c1e49SXin LI const XML_Char **atts) {
5056b2c1e49SXin LI UNUSED_P(name);
5066b2c1e49SXin LI UNUSED_P(atts);
5075bb6a25fSPoul-Henning Kamp XML_DefaultCurrent((XML_Parser)userData);
5085bb6a25fSPoul-Henning Kamp }
5095bb6a25fSPoul-Henning Kamp
510220ed979SColeman Kane static void XMLCALL
defaultEndElement(void * userData,const XML_Char * name)5116b2c1e49SXin LI defaultEndElement(void *userData, const XML_Char *name) {
5126b2c1e49SXin LI UNUSED_P(name);
5135bb6a25fSPoul-Henning Kamp XML_DefaultCurrent((XML_Parser)userData);
5145bb6a25fSPoul-Henning Kamp }
5155bb6a25fSPoul-Henning Kamp
516220ed979SColeman Kane static void XMLCALL
defaultProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)5176b2c1e49SXin LI defaultProcessingInstruction(void *userData, const XML_Char *target,
5186b2c1e49SXin LI const XML_Char *data) {
5196b2c1e49SXin LI UNUSED_P(target);
5206b2c1e49SXin LI UNUSED_P(data);
5215bb6a25fSPoul-Henning Kamp XML_DefaultCurrent((XML_Parser)userData);
5225bb6a25fSPoul-Henning Kamp }
5235bb6a25fSPoul-Henning Kamp
524220ed979SColeman Kane static void XMLCALL
nopCharacterData(void * userData,const XML_Char * s,int len)5256b2c1e49SXin LI nopCharacterData(void *userData, const XML_Char *s, int len) {
5266b2c1e49SXin LI UNUSED_P(userData);
5276b2c1e49SXin LI UNUSED_P(s);
5286b2c1e49SXin LI UNUSED_P(len);
5295bb6a25fSPoul-Henning Kamp }
5305bb6a25fSPoul-Henning Kamp
531220ed979SColeman Kane static void XMLCALL
nopStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)5326b2c1e49SXin LI nopStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
5336b2c1e49SXin LI UNUSED_P(userData);
5346b2c1e49SXin LI UNUSED_P(name);
5356b2c1e49SXin LI UNUSED_P(atts);
5365bb6a25fSPoul-Henning Kamp }
5375bb6a25fSPoul-Henning Kamp
538220ed979SColeman Kane static void XMLCALL
nopEndElement(void * userData,const XML_Char * name)5396b2c1e49SXin LI nopEndElement(void *userData, const XML_Char *name) {
5406b2c1e49SXin LI UNUSED_P(userData);
5416b2c1e49SXin LI UNUSED_P(name);
5425bb6a25fSPoul-Henning Kamp }
5435bb6a25fSPoul-Henning Kamp
544220ed979SColeman Kane static void XMLCALL
nopProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)5456b2c1e49SXin LI nopProcessingInstruction(void *userData, const XML_Char *target,
5466b2c1e49SXin LI const XML_Char *data) {
5476b2c1e49SXin LI UNUSED_P(userData);
5486b2c1e49SXin LI UNUSED_P(target);
5496b2c1e49SXin LI UNUSED_P(data);
5505bb6a25fSPoul-Henning Kamp }
5515bb6a25fSPoul-Henning Kamp
552220ed979SColeman Kane static void XMLCALL
markup(void * userData,const XML_Char * s,int len)5536b2c1e49SXin LI markup(void *userData, const XML_Char *s, int len) {
5540a48773fSEric van Gyzen FILE *fp = ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp;
5555bb6a25fSPoul-Henning Kamp for (; len > 0; --len, ++s)
5565bb6a25fSPoul-Henning Kamp puttc(*s, fp);
5575bb6a25fSPoul-Henning Kamp }
5585bb6a25fSPoul-Henning Kamp
5595bb6a25fSPoul-Henning Kamp static void
metaLocation(XML_Parser parser)5606b2c1e49SXin LI metaLocation(XML_Parser parser) {
5615bb6a25fSPoul-Henning Kamp const XML_Char *uri = XML_GetBase(parser);
5620a48773fSEric van Gyzen FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp;
5635bb6a25fSPoul-Henning Kamp if (uri)
5640a48773fSEric van Gyzen ftprintf(fp, T(" uri=\"%s\""), uri);
5650a48773fSEric van Gyzen ftprintf(fp,
5666b2c1e49SXin LI T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"") T(" nbytes=\"%d\"")
5676b2c1e49SXin LI T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"") T(" col=\"%")
5686b2c1e49SXin LI T(XML_FMT_INT_MOD) T("u\""),
5696b2c1e49SXin LI XML_GetCurrentByteIndex(parser), XML_GetCurrentByteCount(parser),
5705bb6a25fSPoul-Henning Kamp XML_GetCurrentLineNumber(parser),
5715bb6a25fSPoul-Henning Kamp XML_GetCurrentColumnNumber(parser));
5725bb6a25fSPoul-Henning Kamp }
5735bb6a25fSPoul-Henning Kamp
5745bb6a25fSPoul-Henning Kamp static void
metaStartDocument(void * userData)5756b2c1e49SXin LI metaStartDocument(void *userData) {
5760a48773fSEric van Gyzen fputts(T("<document>\n"),
5770a48773fSEric van Gyzen ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
5785bb6a25fSPoul-Henning Kamp }
5795bb6a25fSPoul-Henning Kamp
5805bb6a25fSPoul-Henning Kamp static void
metaEndDocument(void * userData)5816b2c1e49SXin LI metaEndDocument(void *userData) {
5820a48773fSEric van Gyzen fputts(T("</document>\n"),
5830a48773fSEric van Gyzen ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
5845bb6a25fSPoul-Henning Kamp }
5855bb6a25fSPoul-Henning Kamp
586220ed979SColeman Kane static void XMLCALL
metaStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)5876b2c1e49SXin LI metaStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
5885bb6a25fSPoul-Henning Kamp XML_Parser parser = (XML_Parser)userData;
5890a48773fSEric van Gyzen XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
5900a48773fSEric van Gyzen FILE *fp = data->fp;
5915bb6a25fSPoul-Henning Kamp const XML_Char **specifiedAttsEnd
5925bb6a25fSPoul-Henning Kamp = atts + XML_GetSpecifiedAttributeCount(parser);
5935bb6a25fSPoul-Henning Kamp const XML_Char **idAttPtr;
5945bb6a25fSPoul-Henning Kamp int idAttIndex = XML_GetIdAttributeIndex(parser);
5955bb6a25fSPoul-Henning Kamp if (idAttIndex < 0)
5965bb6a25fSPoul-Henning Kamp idAttPtr = 0;
5975bb6a25fSPoul-Henning Kamp else
5985bb6a25fSPoul-Henning Kamp idAttPtr = atts + idAttIndex;
5995bb6a25fSPoul-Henning Kamp
6005bb6a25fSPoul-Henning Kamp ftprintf(fp, T("<starttag name=\"%s\""), name);
6015bb6a25fSPoul-Henning Kamp metaLocation(parser);
6025bb6a25fSPoul-Henning Kamp if (*atts) {
6035bb6a25fSPoul-Henning Kamp fputts(T(">\n"), fp);
6045bb6a25fSPoul-Henning Kamp do {
6055bb6a25fSPoul-Henning Kamp ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]);
6060a48773fSEric van Gyzen characterData(data, atts[1], (int)tcslen(atts[1]));
6075bb6a25fSPoul-Henning Kamp if (atts >= specifiedAttsEnd)
6085bb6a25fSPoul-Henning Kamp fputts(T("\" defaulted=\"yes\"/>\n"), fp);
6095bb6a25fSPoul-Henning Kamp else if (atts == idAttPtr)
6105bb6a25fSPoul-Henning Kamp fputts(T("\" id=\"yes\"/>\n"), fp);
6115bb6a25fSPoul-Henning Kamp else
6125bb6a25fSPoul-Henning Kamp fputts(T("\"/>\n"), fp);
6135bb6a25fSPoul-Henning Kamp } while (*(atts += 2));
6145bb6a25fSPoul-Henning Kamp fputts(T("</starttag>\n"), fp);
6156b2c1e49SXin LI } else
6165bb6a25fSPoul-Henning Kamp fputts(T("/>\n"), fp);
6175bb6a25fSPoul-Henning Kamp }
6185bb6a25fSPoul-Henning Kamp
619220ed979SColeman Kane static void XMLCALL
metaEndElement(void * userData,const XML_Char * name)6206b2c1e49SXin LI metaEndElement(void *userData, const XML_Char *name) {
6215bb6a25fSPoul-Henning Kamp XML_Parser parser = (XML_Parser)userData;
6220a48773fSEric van Gyzen XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
6230a48773fSEric van Gyzen FILE *fp = data->fp;
6245bb6a25fSPoul-Henning Kamp ftprintf(fp, T("<endtag name=\"%s\""), name);
6255bb6a25fSPoul-Henning Kamp metaLocation(parser);
6265bb6a25fSPoul-Henning Kamp fputts(T("/>\n"), fp);
6275bb6a25fSPoul-Henning Kamp }
6285bb6a25fSPoul-Henning Kamp
629220ed979SColeman Kane static void XMLCALL
metaProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)6305bb6a25fSPoul-Henning Kamp metaProcessingInstruction(void *userData, const XML_Char *target,
6316b2c1e49SXin LI const XML_Char *data) {
6325bb6a25fSPoul-Henning Kamp XML_Parser parser = (XML_Parser)userData;
6330a48773fSEric van Gyzen XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
6340a48773fSEric van Gyzen FILE *fp = usrData->fp;
6355bb6a25fSPoul-Henning Kamp ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
6360a48773fSEric van Gyzen characterData(usrData, data, (int)tcslen(data));
6375bb6a25fSPoul-Henning Kamp puttc(T('"'), fp);
6385bb6a25fSPoul-Henning Kamp metaLocation(parser);
6395bb6a25fSPoul-Henning Kamp fputts(T("/>\n"), fp);
6405bb6a25fSPoul-Henning Kamp }
6415bb6a25fSPoul-Henning Kamp
642220ed979SColeman Kane static void XMLCALL
metaComment(void * userData,const XML_Char * data)6436b2c1e49SXin LI metaComment(void *userData, const XML_Char *data) {
6445bb6a25fSPoul-Henning Kamp XML_Parser parser = (XML_Parser)userData;
6450a48773fSEric van Gyzen XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
6460a48773fSEric van Gyzen FILE *fp = usrData->fp;
6475bb6a25fSPoul-Henning Kamp fputts(T("<comment data=\""), fp);
6480a48773fSEric van Gyzen characterData(usrData, data, (int)tcslen(data));
6495bb6a25fSPoul-Henning Kamp puttc(T('"'), fp);
6505bb6a25fSPoul-Henning Kamp metaLocation(parser);
6515bb6a25fSPoul-Henning Kamp fputts(T("/>\n"), fp);
6525bb6a25fSPoul-Henning Kamp }
6535bb6a25fSPoul-Henning Kamp
654220ed979SColeman Kane static void XMLCALL
metaStartCdataSection(void * userData)6556b2c1e49SXin LI metaStartCdataSection(void *userData) {
6565bb6a25fSPoul-Henning Kamp XML_Parser parser = (XML_Parser)userData;
6570a48773fSEric van Gyzen XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
6580a48773fSEric van Gyzen FILE *fp = data->fp;
6595bb6a25fSPoul-Henning Kamp fputts(T("<startcdata"), fp);
6605bb6a25fSPoul-Henning Kamp metaLocation(parser);
6615bb6a25fSPoul-Henning Kamp fputts(T("/>\n"), fp);
6625bb6a25fSPoul-Henning Kamp }
6635bb6a25fSPoul-Henning Kamp
664220ed979SColeman Kane static void XMLCALL
metaEndCdataSection(void * userData)6656b2c1e49SXin LI metaEndCdataSection(void *userData) {
6665bb6a25fSPoul-Henning Kamp XML_Parser parser = (XML_Parser)userData;
6670a48773fSEric van Gyzen XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
6680a48773fSEric van Gyzen FILE *fp = data->fp;
6695bb6a25fSPoul-Henning Kamp fputts(T("<endcdata"), fp);
6705bb6a25fSPoul-Henning Kamp metaLocation(parser);
6715bb6a25fSPoul-Henning Kamp fputts(T("/>\n"), fp);
6725bb6a25fSPoul-Henning Kamp }
6735bb6a25fSPoul-Henning Kamp
674220ed979SColeman Kane static void XMLCALL
metaCharacterData(void * userData,const XML_Char * s,int len)6756b2c1e49SXin LI metaCharacterData(void *userData, const XML_Char *s, int len) {
6765bb6a25fSPoul-Henning Kamp XML_Parser parser = (XML_Parser)userData;
6770a48773fSEric van Gyzen XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
6780a48773fSEric van Gyzen FILE *fp = data->fp;
6795bb6a25fSPoul-Henning Kamp fputts(T("<chars str=\""), fp);
6800a48773fSEric van Gyzen characterData(data, s, len);
6815bb6a25fSPoul-Henning Kamp puttc(T('"'), fp);
6825bb6a25fSPoul-Henning Kamp metaLocation(parser);
6835bb6a25fSPoul-Henning Kamp fputts(T("/>\n"), fp);
6845bb6a25fSPoul-Henning Kamp }
6855bb6a25fSPoul-Henning Kamp
686220ed979SColeman Kane static void XMLCALL
metaStartDoctypeDecl(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)6876b2c1e49SXin LI metaStartDoctypeDecl(void *userData, const XML_Char *doctypeName,
6886b2c1e49SXin LI const XML_Char *sysid, const XML_Char *pubid,
6896b2c1e49SXin LI int has_internal_subset) {
6905bb6a25fSPoul-Henning Kamp XML_Parser parser = (XML_Parser)userData;
6910a48773fSEric van Gyzen XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
6920a48773fSEric van Gyzen FILE *fp = data->fp;
6936b2c1e49SXin LI UNUSED_P(sysid);
6946b2c1e49SXin LI UNUSED_P(pubid);
6956b2c1e49SXin LI UNUSED_P(has_internal_subset);
6965bb6a25fSPoul-Henning Kamp ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName);
6975bb6a25fSPoul-Henning Kamp metaLocation(parser);
6985bb6a25fSPoul-Henning Kamp fputts(T("/>\n"), fp);
6995bb6a25fSPoul-Henning Kamp }
7005bb6a25fSPoul-Henning Kamp
701220ed979SColeman Kane static void XMLCALL
metaEndDoctypeDecl(void * userData)7026b2c1e49SXin LI metaEndDoctypeDecl(void *userData) {
7035bb6a25fSPoul-Henning Kamp XML_Parser parser = (XML_Parser)userData;
7040a48773fSEric van Gyzen XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
7050a48773fSEric van Gyzen FILE *fp = data->fp;
7065bb6a25fSPoul-Henning Kamp fputts(T("<enddoctype"), fp);
7075bb6a25fSPoul-Henning Kamp metaLocation(parser);
7085bb6a25fSPoul-Henning Kamp fputts(T("/>\n"), fp);
7095bb6a25fSPoul-Henning Kamp }
7105bb6a25fSPoul-Henning Kamp
711220ed979SColeman Kane static void XMLCALL
metaNotationDecl(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)7126b2c1e49SXin LI metaNotationDecl(void *userData, const XML_Char *notationName,
7136b2c1e49SXin LI const XML_Char *base, const XML_Char *systemId,
7146b2c1e49SXin LI const XML_Char *publicId) {
7155bb6a25fSPoul-Henning Kamp XML_Parser parser = (XML_Parser)userData;
7160a48773fSEric van Gyzen XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
7170a48773fSEric van Gyzen FILE *fp = data->fp;
7186b2c1e49SXin LI UNUSED_P(base);
7195bb6a25fSPoul-Henning Kamp ftprintf(fp, T("<notation name=\"%s\""), notationName);
7205bb6a25fSPoul-Henning Kamp if (publicId)
7215bb6a25fSPoul-Henning Kamp ftprintf(fp, T(" public=\"%s\""), publicId);
7225bb6a25fSPoul-Henning Kamp if (systemId) {
7235bb6a25fSPoul-Henning Kamp fputts(T(" system=\""), fp);
7240a48773fSEric van Gyzen characterData(data, systemId, (int)tcslen(systemId));
7255bb6a25fSPoul-Henning Kamp puttc(T('"'), fp);
7265bb6a25fSPoul-Henning Kamp }
7275bb6a25fSPoul-Henning Kamp metaLocation(parser);
7285bb6a25fSPoul-Henning Kamp fputts(T("/>\n"), fp);
7295bb6a25fSPoul-Henning Kamp }
7305bb6a25fSPoul-Henning Kamp
731220ed979SColeman Kane static void XMLCALL
metaEntityDecl(void * userData,const XML_Char * entityName,int is_param,const XML_Char * value,int value_length,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)7326b2c1e49SXin LI metaEntityDecl(void *userData, const XML_Char *entityName, int is_param,
7336b2c1e49SXin LI const XML_Char *value, int value_length, const XML_Char *base,
7346b2c1e49SXin LI const XML_Char *systemId, const XML_Char *publicId,
7356b2c1e49SXin LI const XML_Char *notationName) {
7365bb6a25fSPoul-Henning Kamp XML_Parser parser = (XML_Parser)userData;
7370a48773fSEric van Gyzen XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
7380a48773fSEric van Gyzen FILE *fp = data->fp;
7395bb6a25fSPoul-Henning Kamp
7406b2c1e49SXin LI UNUSED_P(is_param);
7416b2c1e49SXin LI UNUSED_P(base);
7425bb6a25fSPoul-Henning Kamp if (value) {
7435bb6a25fSPoul-Henning Kamp ftprintf(fp, T("<entity name=\"%s\""), entityName);
7445bb6a25fSPoul-Henning Kamp metaLocation(parser);
7455bb6a25fSPoul-Henning Kamp puttc(T('>'), fp);
7460a48773fSEric van Gyzen characterData(data, value, value_length);
7475bb6a25fSPoul-Henning Kamp fputts(T("</entity/>\n"), fp);
7486b2c1e49SXin LI } else if (notationName) {
7495bb6a25fSPoul-Henning Kamp ftprintf(fp, T("<entity name=\"%s\""), entityName);
7505bb6a25fSPoul-Henning Kamp if (publicId)
7515bb6a25fSPoul-Henning Kamp ftprintf(fp, T(" public=\"%s\""), publicId);
7525bb6a25fSPoul-Henning Kamp fputts(T(" system=\""), fp);
7530a48773fSEric van Gyzen characterData(data, systemId, (int)tcslen(systemId));
7545bb6a25fSPoul-Henning Kamp puttc(T('"'), fp);
7555bb6a25fSPoul-Henning Kamp ftprintf(fp, T(" notation=\"%s\""), notationName);
7565bb6a25fSPoul-Henning Kamp metaLocation(parser);
7575bb6a25fSPoul-Henning Kamp fputts(T("/>\n"), fp);
7586b2c1e49SXin LI } else {
7595bb6a25fSPoul-Henning Kamp ftprintf(fp, T("<entity name=\"%s\""), entityName);
7605bb6a25fSPoul-Henning Kamp if (publicId)
7615bb6a25fSPoul-Henning Kamp ftprintf(fp, T(" public=\"%s\""), publicId);
7625bb6a25fSPoul-Henning Kamp fputts(T(" system=\""), fp);
7630a48773fSEric van Gyzen characterData(data, systemId, (int)tcslen(systemId));
7645bb6a25fSPoul-Henning Kamp puttc(T('"'), fp);
7655bb6a25fSPoul-Henning Kamp metaLocation(parser);
7665bb6a25fSPoul-Henning Kamp fputts(T("/>\n"), fp);
7675bb6a25fSPoul-Henning Kamp }
7685bb6a25fSPoul-Henning Kamp }
7695bb6a25fSPoul-Henning Kamp
770220ed979SColeman Kane static void XMLCALL
metaStartNamespaceDecl(void * userData,const XML_Char * prefix,const XML_Char * uri)7716b2c1e49SXin LI metaStartNamespaceDecl(void *userData, const XML_Char *prefix,
7726b2c1e49SXin LI const XML_Char *uri) {
7735bb6a25fSPoul-Henning Kamp XML_Parser parser = (XML_Parser)userData;
7740a48773fSEric van Gyzen XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
7750a48773fSEric van Gyzen FILE *fp = data->fp;
7765bb6a25fSPoul-Henning Kamp fputts(T("<startns"), fp);
7775bb6a25fSPoul-Henning Kamp if (prefix)
7785bb6a25fSPoul-Henning Kamp ftprintf(fp, T(" prefix=\"%s\""), prefix);
7795bb6a25fSPoul-Henning Kamp if (uri) {
7805bb6a25fSPoul-Henning Kamp fputts(T(" ns=\""), fp);
7810a48773fSEric van Gyzen characterData(data, uri, (int)tcslen(uri));
7825bb6a25fSPoul-Henning Kamp fputts(T("\"/>\n"), fp);
7836b2c1e49SXin LI } else
7845bb6a25fSPoul-Henning Kamp fputts(T("/>\n"), fp);
7855bb6a25fSPoul-Henning Kamp }
7865bb6a25fSPoul-Henning Kamp
787220ed979SColeman Kane static void XMLCALL
metaEndNamespaceDecl(void * userData,const XML_Char * prefix)7886b2c1e49SXin LI metaEndNamespaceDecl(void *userData, const XML_Char *prefix) {
7895bb6a25fSPoul-Henning Kamp XML_Parser parser = (XML_Parser)userData;
7900a48773fSEric van Gyzen XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
7910a48773fSEric van Gyzen FILE *fp = data->fp;
7925bb6a25fSPoul-Henning Kamp if (! prefix)
7935bb6a25fSPoul-Henning Kamp fputts(T("<endns/>\n"), fp);
7945bb6a25fSPoul-Henning Kamp else
7955bb6a25fSPoul-Henning Kamp ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix);
7965bb6a25fSPoul-Henning Kamp }
7975bb6a25fSPoul-Henning Kamp
798220ed979SColeman Kane static int XMLCALL
unknownEncodingConvert(void * data,const char * p)7996b2c1e49SXin LI unknownEncodingConvert(void *data, const char *p) {
8005bb6a25fSPoul-Henning Kamp return codepageConvert(*(int *)data, p);
8015bb6a25fSPoul-Henning Kamp }
8025bb6a25fSPoul-Henning Kamp
803220ed979SColeman Kane static int XMLCALL
unknownEncoding(void * userData,const XML_Char * name,XML_Encoding * info)8046b2c1e49SXin LI unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info) {
8055bb6a25fSPoul-Henning Kamp int cp;
8065bb6a25fSPoul-Henning Kamp static const XML_Char prefixL[] = T("windows-");
8075bb6a25fSPoul-Henning Kamp static const XML_Char prefixU[] = T("WINDOWS-");
8085bb6a25fSPoul-Henning Kamp int i;
8095bb6a25fSPoul-Henning Kamp
8106b2c1e49SXin LI UNUSED_P(userData);
8115bb6a25fSPoul-Henning Kamp for (i = 0; prefixU[i]; i++)
8125bb6a25fSPoul-Henning Kamp if (name[i] != prefixU[i] && name[i] != prefixL[i])
8135bb6a25fSPoul-Henning Kamp return 0;
8145bb6a25fSPoul-Henning Kamp
8155bb6a25fSPoul-Henning Kamp cp = 0;
8165bb6a25fSPoul-Henning Kamp for (; name[i]; i++) {
8175bb6a25fSPoul-Henning Kamp static const XML_Char digits[] = T("0123456789");
8185bb6a25fSPoul-Henning Kamp const XML_Char *s = tcschr(digits, name[i]);
8195bb6a25fSPoul-Henning Kamp if (! s)
8205bb6a25fSPoul-Henning Kamp return 0;
8215bb6a25fSPoul-Henning Kamp cp *= 10;
822220ed979SColeman Kane cp += (int)(s - digits);
8235bb6a25fSPoul-Henning Kamp if (cp >= 0x10000)
8245bb6a25fSPoul-Henning Kamp return 0;
8255bb6a25fSPoul-Henning Kamp }
8265bb6a25fSPoul-Henning Kamp if (! codepageMap(cp, info->map))
8275bb6a25fSPoul-Henning Kamp return 0;
8285bb6a25fSPoul-Henning Kamp info->convert = unknownEncodingConvert;
8295bb6a25fSPoul-Henning Kamp /* We could just cast the code page integer to a void *,
8305bb6a25fSPoul-Henning Kamp and avoid the use of release. */
8315bb6a25fSPoul-Henning Kamp info->release = free;
8325bb6a25fSPoul-Henning Kamp info->data = malloc(sizeof(int));
8335bb6a25fSPoul-Henning Kamp if (! info->data)
8345bb6a25fSPoul-Henning Kamp return 0;
8355bb6a25fSPoul-Henning Kamp *(int *)info->data = cp;
8365bb6a25fSPoul-Henning Kamp return 1;
8375bb6a25fSPoul-Henning Kamp }
8385bb6a25fSPoul-Henning Kamp
839220ed979SColeman Kane static int XMLCALL
notStandalone(void * userData)8406b2c1e49SXin LI notStandalone(void *userData) {
8416b2c1e49SXin LI UNUSED_P(userData);
8425bb6a25fSPoul-Henning Kamp return 0;
8435bb6a25fSPoul-Henning Kamp }
8445bb6a25fSPoul-Henning Kamp
8455bb6a25fSPoul-Henning Kamp static void
showVersion(XML_Char * prog)8466b2c1e49SXin LI showVersion(XML_Char *prog) {
8475bb6a25fSPoul-Henning Kamp XML_Char *s = prog;
8485bb6a25fSPoul-Henning Kamp XML_Char ch;
8495bb6a25fSPoul-Henning Kamp const XML_Feature *features = XML_GetFeatureList();
8505bb6a25fSPoul-Henning Kamp while ((ch = *s) != 0) {
8515bb6a25fSPoul-Henning Kamp if (ch == '/'
8520a48773fSEric van Gyzen #if defined(_WIN32)
8535bb6a25fSPoul-Henning Kamp || ch == '\\'
8545bb6a25fSPoul-Henning Kamp #endif
8555bb6a25fSPoul-Henning Kamp )
8565bb6a25fSPoul-Henning Kamp prog = s + 1;
8575bb6a25fSPoul-Henning Kamp ++s;
8585bb6a25fSPoul-Henning Kamp }
8595bb6a25fSPoul-Henning Kamp ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion());
8605bb6a25fSPoul-Henning Kamp if (features != NULL && features[0].feature != XML_FEATURE_END) {
8615bb6a25fSPoul-Henning Kamp int i = 1;
8625bb6a25fSPoul-Henning Kamp ftprintf(stdout, T("%s"), features[0].name);
8635bb6a25fSPoul-Henning Kamp if (features[0].value)
8645bb6a25fSPoul-Henning Kamp ftprintf(stdout, T("=%ld"), features[0].value);
8655bb6a25fSPoul-Henning Kamp while (features[i].feature != XML_FEATURE_END) {
8665bb6a25fSPoul-Henning Kamp ftprintf(stdout, T(", %s"), features[i].name);
8675bb6a25fSPoul-Henning Kamp if (features[i].value)
8685bb6a25fSPoul-Henning Kamp ftprintf(stdout, T("=%ld"), features[i].value);
8695bb6a25fSPoul-Henning Kamp ++i;
8705bb6a25fSPoul-Henning Kamp }
8715bb6a25fSPoul-Henning Kamp ftprintf(stdout, T("\n"));
8725bb6a25fSPoul-Henning Kamp }
8735bb6a25fSPoul-Henning Kamp }
8745bb6a25fSPoul-Henning Kamp
875*4543ef51SXin LI #if defined(__GNUC__)
876*4543ef51SXin LI __attribute__((noreturn))
877*4543ef51SXin LI #endif
8785bb6a25fSPoul-Henning Kamp static void
usage(const XML_Char * prog,int rc)8796b2c1e49SXin LI usage(const XML_Char *prog, int rc) {
8806b2c1e49SXin LI ftprintf(
8816b2c1e49SXin LI stderr,
8826b2c1e49SXin LI /* Generated with:
8836b2c1e49SXin LI * $ xmlwf/xmlwf_helpgen.sh
884cc68614dSXin LI * To update, change xmlwf/xmlwf_helpgen.py, then paste the output of
885cc68614dSXin LI * xmlwf/xmlwf_helpgen.sh in here.
8866b2c1e49SXin LI */
8876b2c1e49SXin LI /* clang-format off */
888cc68614dSXin LI T("usage:\n")
889cc68614dSXin LI T(" %s [OPTIONS] [FILE ...]\n")
890*4543ef51SXin LI T(" %s -h|--help\n")
891*4543ef51SXin LI T(" %s -v|--version\n")
8926b2c1e49SXin LI T("\n")
8936b2c1e49SXin LI T("xmlwf - Determines if an XML document is well-formed\n")
8946b2c1e49SXin LI T("\n")
8956b2c1e49SXin LI T("positional arguments:\n")
896cc68614dSXin LI T(" FILE file to process (default: STDIN)\n")
8976b2c1e49SXin LI T("\n")
8986b2c1e49SXin LI T("input control arguments:\n")
8996b2c1e49SXin LI T(" -s print an error if the document is not [s]tandalone\n")
9006b2c1e49SXin LI T(" -n enable [n]amespace processing\n")
901*4543ef51SXin LI T(" -p enable processing of external DTDs and [p]arameter entities\n")
9026b2c1e49SXin LI T(" -x enable processing of e[x]ternal entities\n")
9036b2c1e49SXin LI T(" -e ENCODING override any in-document [e]ncoding declaration\n")
9046b2c1e49SXin LI T(" -w enable support for [W]indows code pages\n")
905*4543ef51SXin LI T(" -r disable memory-mapping and use [r]ead calls instead\n")
906*4543ef51SXin LI T(" -g BYTES buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)\n")
907cc68614dSXin LI T(" -k when processing multiple files, [k]eep processing after first file with error\n")
9086b2c1e49SXin LI T("\n")
9096b2c1e49SXin LI T("output control arguments:\n")
9106b2c1e49SXin LI T(" -d DIRECTORY output [d]estination directory\n")
9116b2c1e49SXin LI T(" -c write a [c]opy of input XML, not canonical XML\n")
9126b2c1e49SXin LI T(" -m write [m]eta XML, not canonical XML\n")
9136b2c1e49SXin LI T(" -t write no XML output for [t]iming of plain parsing\n")
9146b2c1e49SXin LI T(" -N enable adding doctype and [n]otation declarations\n")
9156b2c1e49SXin LI T("\n")
916cc68614dSXin LI T("billion laughs attack protection:\n")
917cc68614dSXin LI T(" NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n")
918cc68614dSXin LI T("\n")
919cc68614dSXin LI T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n")
920cc68614dSXin LI T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB)\n")
921cc68614dSXin LI T("\n")
922*4543ef51SXin LI T("reparse deferral:\n")
923*4543ef51SXin LI T(" -q disable reparse deferral, and allow [q]uadratic parse runtime with large tokens\n")
924*4543ef51SXin LI T("\n")
9256b2c1e49SXin LI T("info arguments:\n")
926*4543ef51SXin LI T(" -h, --help show this [h]elp message and exit\n")
927*4543ef51SXin LI T(" -v, --version show program's [v]ersion number and exit\n")
9286b2c1e49SXin LI T("\n")
929cc68614dSXin LI T("exit status:\n")
930cc68614dSXin LI T(" 0 the input files are well-formed and the output (if requested) was written successfully\n")
931cc68614dSXin LI T(" 1 could not allocate data structures, signals a serious problem with execution environment\n")
932cc68614dSXin LI T(" 2 one or more input files were not well-formed\n")
933cc68614dSXin LI T(" 3 could not create an output file\n")
934cc68614dSXin LI T(" 4 command-line argument error\n")
935cc68614dSXin LI T("\n")
936cc68614dSXin LI T("xmlwf of libexpat is software libre, licensed under the MIT license.\n")
937*4543ef51SXin LI T("Please report bugs at https://github.com/libexpat/libexpat/issues -- thank you!\n")
9386b2c1e49SXin LI , /* clang-format on */
939cc68614dSXin LI prog, prog, prog);
9405bb6a25fSPoul-Henning Kamp exit(rc);
9415bb6a25fSPoul-Henning Kamp }
9425bb6a25fSPoul-Henning Kamp
9430a48773fSEric van Gyzen #if defined(__MINGW32__) && defined(XML_UNICODE)
9440a48773fSEric van Gyzen /* Silence warning about missing prototype */
9450a48773fSEric van Gyzen int wmain(int argc, XML_Char **argv);
9460a48773fSEric van Gyzen #endif
9470a48773fSEric van Gyzen
948cc68614dSXin LI #define XMLWF_SHIFT_ARG_INTO(constCharStarTarget, argc, argv, i, j) \
949cc68614dSXin LI { \
950cc68614dSXin LI if (argv[i][j + 1] == T('\0')) { \
951*4543ef51SXin LI if (++i == argc) { \
952cc68614dSXin LI usage(argv[0], XMLWF_EXIT_USAGE_ERROR); \
953*4543ef51SXin LI /* usage called exit(..), never gets here */ \
954*4543ef51SXin LI } \
955cc68614dSXin LI constCharStarTarget = argv[i]; \
956cc68614dSXin LI } else { \
957cc68614dSXin LI constCharStarTarget = argv[i] + j + 1; \
958cc68614dSXin LI } \
959cc68614dSXin LI i++; \
960cc68614dSXin LI j = 0; \
961cc68614dSXin LI }
962cc68614dSXin LI
9635bb6a25fSPoul-Henning Kamp int
tmain(int argc,XML_Char ** argv)9646b2c1e49SXin LI tmain(int argc, XML_Char **argv) {
9655bb6a25fSPoul-Henning Kamp int i, j;
9665bb6a25fSPoul-Henning Kamp const XML_Char *outputDir = NULL;
9675bb6a25fSPoul-Henning Kamp const XML_Char *encoding = NULL;
9685bb6a25fSPoul-Henning Kamp unsigned processFlags = XML_MAP_FILE;
9695bb6a25fSPoul-Henning Kamp int windowsCodePages = 0;
9705bb6a25fSPoul-Henning Kamp int outputType = 0;
9715bb6a25fSPoul-Henning Kamp int useNamespaces = 0;
9725bb6a25fSPoul-Henning Kamp int requireStandalone = 0;
9730a48773fSEric van Gyzen int requiresNotations = 0;
974cc68614dSXin LI int continueOnError = 0;
975cc68614dSXin LI
976cc68614dSXin LI float attackMaximumAmplification = -1.0f; /* signaling "not set" */
977*4543ef51SXin LI unsigned long long attackThresholdBytes = 0;
978cc68614dSXin LI XML_Bool attackThresholdGiven = XML_FALSE;
979cc68614dSXin LI
980*4543ef51SXin LI XML_Bool disableDeferral = XML_FALSE;
981*4543ef51SXin LI
982cc68614dSXin LI int exitCode = XMLWF_EXIT_SUCCESS;
9836b2c1e49SXin LI enum XML_ParamEntityParsing paramEntityParsing
9846b2c1e49SXin LI = XML_PARAM_ENTITY_PARSING_NEVER;
9855bb6a25fSPoul-Henning Kamp int useStdin = 0;
9860a48773fSEric van Gyzen XmlwfUserData userData = {NULL, NULL, NULL};
9875bb6a25fSPoul-Henning Kamp
9885bb6a25fSPoul-Henning Kamp #ifdef _MSC_VER
9895bb6a25fSPoul-Henning Kamp _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
9905bb6a25fSPoul-Henning Kamp #endif
9915bb6a25fSPoul-Henning Kamp
9925bb6a25fSPoul-Henning Kamp i = 1;
9935bb6a25fSPoul-Henning Kamp j = 0;
9945bb6a25fSPoul-Henning Kamp while (i < argc) {
9955bb6a25fSPoul-Henning Kamp if (j == 0) {
9965bb6a25fSPoul-Henning Kamp if (argv[i][0] != T('-'))
9975bb6a25fSPoul-Henning Kamp break;
998*4543ef51SXin LI if (argv[i][1] == T('-')) {
999*4543ef51SXin LI if (argv[i][2] == T('\0')) {
10005bb6a25fSPoul-Henning Kamp i++;
10015bb6a25fSPoul-Henning Kamp break;
1002*4543ef51SXin LI } else if (tcscmp(argv[i] + 2, T("help")) == 0) {
1003*4543ef51SXin LI usage(argv[0], XMLWF_EXIT_SUCCESS);
1004*4543ef51SXin LI // usage called exit(..), never gets here
1005*4543ef51SXin LI } else if (tcscmp(argv[i] + 2, T("version")) == 0) {
1006*4543ef51SXin LI showVersion(argv[0]);
1007*4543ef51SXin LI return XMLWF_EXIT_SUCCESS;
1008*4543ef51SXin LI }
10095bb6a25fSPoul-Henning Kamp }
10105bb6a25fSPoul-Henning Kamp j++;
10115bb6a25fSPoul-Henning Kamp }
10125bb6a25fSPoul-Henning Kamp switch (argv[i][j]) {
10135bb6a25fSPoul-Henning Kamp case T('r'):
10145bb6a25fSPoul-Henning Kamp processFlags &= ~XML_MAP_FILE;
10155bb6a25fSPoul-Henning Kamp j++;
10165bb6a25fSPoul-Henning Kamp break;
10175bb6a25fSPoul-Henning Kamp case T('s'):
10185bb6a25fSPoul-Henning Kamp requireStandalone = 1;
10195bb6a25fSPoul-Henning Kamp j++;
10205bb6a25fSPoul-Henning Kamp break;
10215bb6a25fSPoul-Henning Kamp case T('n'):
10225bb6a25fSPoul-Henning Kamp useNamespaces = 1;
10235bb6a25fSPoul-Henning Kamp j++;
10245bb6a25fSPoul-Henning Kamp break;
10255bb6a25fSPoul-Henning Kamp case T('p'):
10265bb6a25fSPoul-Henning Kamp paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS;
10275bb6a25fSPoul-Henning Kamp /* fall through */
10285bb6a25fSPoul-Henning Kamp case T('x'):
10295bb6a25fSPoul-Henning Kamp processFlags |= XML_EXTERNAL_ENTITIES;
10305bb6a25fSPoul-Henning Kamp j++;
10315bb6a25fSPoul-Henning Kamp break;
10325bb6a25fSPoul-Henning Kamp case T('w'):
10335bb6a25fSPoul-Henning Kamp windowsCodePages = 1;
10345bb6a25fSPoul-Henning Kamp j++;
10355bb6a25fSPoul-Henning Kamp break;
10365bb6a25fSPoul-Henning Kamp case T('m'):
10375bb6a25fSPoul-Henning Kamp outputType = 'm';
10385bb6a25fSPoul-Henning Kamp j++;
10395bb6a25fSPoul-Henning Kamp break;
10405bb6a25fSPoul-Henning Kamp case T('c'):
10415bb6a25fSPoul-Henning Kamp outputType = 'c';
10425bb6a25fSPoul-Henning Kamp useNamespaces = 0;
10435bb6a25fSPoul-Henning Kamp j++;
10445bb6a25fSPoul-Henning Kamp break;
10455bb6a25fSPoul-Henning Kamp case T('t'):
10465bb6a25fSPoul-Henning Kamp outputType = 't';
10475bb6a25fSPoul-Henning Kamp j++;
10485bb6a25fSPoul-Henning Kamp break;
10490a48773fSEric van Gyzen case T('N'):
10500a48773fSEric van Gyzen requiresNotations = 1;
10510a48773fSEric van Gyzen j++;
10520a48773fSEric van Gyzen break;
10535bb6a25fSPoul-Henning Kamp case T('d'):
1054cc68614dSXin LI XMLWF_SHIFT_ARG_INTO(outputDir, argc, argv, i, j);
10555bb6a25fSPoul-Henning Kamp break;
10565bb6a25fSPoul-Henning Kamp case T('e'):
1057cc68614dSXin LI XMLWF_SHIFT_ARG_INTO(encoding, argc, argv, i, j);
10585bb6a25fSPoul-Henning Kamp break;
10595bb6a25fSPoul-Henning Kamp case T('h'):
1060cc68614dSXin LI usage(argv[0], XMLWF_EXIT_SUCCESS);
1061*4543ef51SXin LI // usage called exit(..), never gets here
10625bb6a25fSPoul-Henning Kamp case T('v'):
10635bb6a25fSPoul-Henning Kamp showVersion(argv[0]);
1064*4543ef51SXin LI return XMLWF_EXIT_SUCCESS;
1065*4543ef51SXin LI case T('g'): {
1066*4543ef51SXin LI const XML_Char *valueText = NULL;
1067*4543ef51SXin LI XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1068*4543ef51SXin LI
1069*4543ef51SXin LI errno = 0;
1070*4543ef51SXin LI XML_Char *afterValueText = (XML_Char *)valueText;
1071*4543ef51SXin LI const long long read_size_bytes_candidate
1072*4543ef51SXin LI = tcstoull(valueText, &afterValueText, 10);
1073*4543ef51SXin LI if ((errno != 0) || (afterValueText[0] != T('\0'))
1074*4543ef51SXin LI || (read_size_bytes_candidate < 1)
1075*4543ef51SXin LI || (read_size_bytes_candidate > (INT_MAX / 2 + 1))) {
1076*4543ef51SXin LI // This prevents tperror(..) from reporting misleading "[..]: Success"
1077*4543ef51SXin LI errno = ERANGE;
1078*4543ef51SXin LI tperror(T("invalid buffer size") T(
1079*4543ef51SXin LI " (needs an integer from 1 to INT_MAX/2+1 i.e. 1,073,741,824 on most platforms)"));
1080*4543ef51SXin LI exit(XMLWF_EXIT_USAGE_ERROR);
1081*4543ef51SXin LI }
1082*4543ef51SXin LI g_read_size_bytes = (int)read_size_bytes_candidate;
1083*4543ef51SXin LI break;
1084*4543ef51SXin LI }
1085cc68614dSXin LI case T('k'):
1086cc68614dSXin LI continueOnError = 1;
1087cc68614dSXin LI j++;
1088cc68614dSXin LI break;
1089cc68614dSXin LI case T('a'): {
1090cc68614dSXin LI const XML_Char *valueText = NULL;
1091cc68614dSXin LI XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1092cc68614dSXin LI
1093cc68614dSXin LI errno = 0;
1094*4543ef51SXin LI XML_Char *afterValueText = NULL;
1095cc68614dSXin LI attackMaximumAmplification = tcstof(valueText, &afterValueText);
1096cc68614dSXin LI if ((errno != 0) || (afterValueText[0] != T('\0'))
1097cc68614dSXin LI || isnan(attackMaximumAmplification)
1098cc68614dSXin LI || (attackMaximumAmplification < 1.0f)) {
1099cc68614dSXin LI // This prevents tperror(..) from reporting misleading "[..]: Success"
1100cc68614dSXin LI errno = ERANGE;
1101cc68614dSXin LI tperror(T("invalid amplification limit") T(
1102cc68614dSXin LI " (needs a floating point number greater or equal than 1.0)"));
1103cc68614dSXin LI exit(XMLWF_EXIT_USAGE_ERROR);
1104cc68614dSXin LI }
1105*4543ef51SXin LI #if XML_GE == 0
1106*4543ef51SXin LI ftprintf(stderr,
1107*4543ef51SXin LI T("Warning: Given amplification limit ignored")
1108*4543ef51SXin LI T(", xmlwf has been compiled without DTD/GE support.\n"));
1109cc68614dSXin LI #endif
1110cc68614dSXin LI break;
1111cc68614dSXin LI }
1112cc68614dSXin LI case T('b'): {
1113cc68614dSXin LI const XML_Char *valueText = NULL;
1114cc68614dSXin LI XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1115cc68614dSXin LI
1116cc68614dSXin LI errno = 0;
1117cc68614dSXin LI XML_Char *afterValueText = (XML_Char *)valueText;
1118cc68614dSXin LI attackThresholdBytes = tcstoull(valueText, &afterValueText, 10);
1119cc68614dSXin LI if ((errno != 0) || (afterValueText[0] != T('\0'))) {
1120cc68614dSXin LI // This prevents tperror(..) from reporting misleading "[..]: Success"
1121cc68614dSXin LI errno = ERANGE;
1122cc68614dSXin LI tperror(T("invalid ignore threshold")
1123cc68614dSXin LI T(" (needs an integer from 0 to 2^64-1)"));
1124cc68614dSXin LI exit(XMLWF_EXIT_USAGE_ERROR);
1125cc68614dSXin LI }
1126cc68614dSXin LI attackThresholdGiven = XML_TRUE;
1127*4543ef51SXin LI #if XML_GE == 0
1128*4543ef51SXin LI ftprintf(stderr,
1129*4543ef51SXin LI T("Warning: Given attack threshold ignored")
1130*4543ef51SXin LI T(", xmlwf has been compiled without DTD/GE support.\n"));
1131cc68614dSXin LI #endif
1132cc68614dSXin LI break;
1133cc68614dSXin LI }
1134*4543ef51SXin LI case T('q'): {
1135*4543ef51SXin LI disableDeferral = XML_TRUE;
1136*4543ef51SXin LI j++;
1137*4543ef51SXin LI break;
1138*4543ef51SXin LI }
11395bb6a25fSPoul-Henning Kamp case T('\0'):
11405bb6a25fSPoul-Henning Kamp if (j > 1) {
11415bb6a25fSPoul-Henning Kamp i++;
11425bb6a25fSPoul-Henning Kamp j = 0;
11435bb6a25fSPoul-Henning Kamp break;
11445bb6a25fSPoul-Henning Kamp }
11455bb6a25fSPoul-Henning Kamp /* fall through */
11465bb6a25fSPoul-Henning Kamp default:
1147cc68614dSXin LI usage(argv[0], XMLWF_EXIT_USAGE_ERROR);
1148*4543ef51SXin LI // usage called exit(..), never gets here
11495bb6a25fSPoul-Henning Kamp }
11505bb6a25fSPoul-Henning Kamp }
11515bb6a25fSPoul-Henning Kamp if (i == argc) {
11525bb6a25fSPoul-Henning Kamp useStdin = 1;
11535bb6a25fSPoul-Henning Kamp processFlags &= ~XML_MAP_FILE;
11545bb6a25fSPoul-Henning Kamp i--;
11555bb6a25fSPoul-Henning Kamp }
11565bb6a25fSPoul-Henning Kamp for (; i < argc; i++) {
11575bb6a25fSPoul-Henning Kamp XML_Char *outName = 0;
11585bb6a25fSPoul-Henning Kamp int result;
11595bb6a25fSPoul-Henning Kamp XML_Parser parser;
11605bb6a25fSPoul-Henning Kamp if (useNamespaces)
11615bb6a25fSPoul-Henning Kamp parser = XML_ParserCreateNS(encoding, NSSEP);
11625bb6a25fSPoul-Henning Kamp else
11635bb6a25fSPoul-Henning Kamp parser = XML_ParserCreate(encoding);
1164be8aff81SXin LI
1165be8aff81SXin LI if (! parser) {
11660a48773fSEric van Gyzen tperror(T("Could not instantiate parser"));
1167cc68614dSXin LI exit(XMLWF_EXIT_INTERNAL_ERROR);
1168cc68614dSXin LI }
1169cc68614dSXin LI
1170cc68614dSXin LI if (attackMaximumAmplification != -1.0f) {
1171*4543ef51SXin LI #if XML_GE == 1
1172cc68614dSXin LI XML_SetBillionLaughsAttackProtectionMaximumAmplification(
1173cc68614dSXin LI parser, attackMaximumAmplification);
1174cc68614dSXin LI #endif
1175cc68614dSXin LI }
1176cc68614dSXin LI if (attackThresholdGiven) {
1177*4543ef51SXin LI #if XML_GE == 1
1178cc68614dSXin LI XML_SetBillionLaughsAttackProtectionActivationThreshold(
1179cc68614dSXin LI parser, attackThresholdBytes);
1180cc68614dSXin LI #else
1181cc68614dSXin LI (void)attackThresholdBytes; // silence -Wunused-but-set-variable
1182cc68614dSXin LI #endif
1183be8aff81SXin LI }
1184be8aff81SXin LI
1185*4543ef51SXin LI if (disableDeferral) {
1186*4543ef51SXin LI const XML_Bool success = XML_SetReparseDeferralEnabled(parser, XML_FALSE);
1187*4543ef51SXin LI if (! success) {
1188*4543ef51SXin LI // This prevents tperror(..) from reporting misleading "[..]: Success"
1189*4543ef51SXin LI errno = EINVAL;
1190*4543ef51SXin LI tperror(T("Failed to disable reparse deferral"));
1191*4543ef51SXin LI exit(XMLWF_EXIT_INTERNAL_ERROR);
1192*4543ef51SXin LI }
1193*4543ef51SXin LI }
1194*4543ef51SXin LI
11955bb6a25fSPoul-Henning Kamp if (requireStandalone)
11965bb6a25fSPoul-Henning Kamp XML_SetNotStandaloneHandler(parser, notStandalone);
11975bb6a25fSPoul-Henning Kamp XML_SetParamEntityParsing(parser, paramEntityParsing);
11985bb6a25fSPoul-Henning Kamp if (outputType == 't') {
11995bb6a25fSPoul-Henning Kamp /* This is for doing timings; this gives a more realistic estimate of
12005bb6a25fSPoul-Henning Kamp the parsing time. */
12015bb6a25fSPoul-Henning Kamp outputDir = 0;
12025bb6a25fSPoul-Henning Kamp XML_SetElementHandler(parser, nopStartElement, nopEndElement);
12035bb6a25fSPoul-Henning Kamp XML_SetCharacterDataHandler(parser, nopCharacterData);
12045bb6a25fSPoul-Henning Kamp XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction);
12056b2c1e49SXin LI } else if (outputDir) {
1206220ed979SColeman Kane const XML_Char *delim = T("/");
12075bb6a25fSPoul-Henning Kamp const XML_Char *file = useStdin ? T("STDIN") : argv[i];
1208220ed979SColeman Kane if (! useStdin) {
1209220ed979SColeman Kane /* Jump after last (back)slash */
1210220ed979SColeman Kane const XML_Char *lastDelim = tcsrchr(file, delim[0]);
1211220ed979SColeman Kane if (lastDelim)
1212220ed979SColeman Kane file = lastDelim + 1;
12130a48773fSEric van Gyzen #if defined(_WIN32)
1214220ed979SColeman Kane else {
1215220ed979SColeman Kane const XML_Char *winDelim = T("\\");
1216220ed979SColeman Kane lastDelim = tcsrchr(file, winDelim[0]);
1217220ed979SColeman Kane if (lastDelim) {
1218220ed979SColeman Kane file = lastDelim + 1;
1219220ed979SColeman Kane delim = winDelim;
1220220ed979SColeman Kane }
1221220ed979SColeman Kane }
12225bb6a25fSPoul-Henning Kamp #endif
1223220ed979SColeman Kane }
1224220ed979SColeman Kane outName = (XML_Char *)malloc((tcslen(outputDir) + tcslen(file) + 2)
12255bb6a25fSPoul-Henning Kamp * sizeof(XML_Char));
1226cc68614dSXin LI if (! outName) {
1227cc68614dSXin LI tperror(T("Could not allocate memory"));
1228cc68614dSXin LI exit(XMLWF_EXIT_INTERNAL_ERROR);
1229cc68614dSXin LI }
12305bb6a25fSPoul-Henning Kamp tcscpy(outName, outputDir);
1231220ed979SColeman Kane tcscat(outName, delim);
12325bb6a25fSPoul-Henning Kamp tcscat(outName, file);
12330a48773fSEric van Gyzen userData.fp = tfopen(outName, T("wb"));
12340a48773fSEric van Gyzen if (! userData.fp) {
12355bb6a25fSPoul-Henning Kamp tperror(outName);
1236cc68614dSXin LI exitCode = XMLWF_EXIT_OUTPUT_ERROR;
1237cc68614dSXin LI free(outName);
1238ac69e5d4SEric van Gyzen XML_ParserFree(parser);
1239ac69e5d4SEric van Gyzen if (continueOnError) {
1240cc68614dSXin LI continue;
1241cc68614dSXin LI } else {
1242cc68614dSXin LI break;
1243cc68614dSXin LI }
12445bb6a25fSPoul-Henning Kamp }
12450a48773fSEric van Gyzen setvbuf(userData.fp, NULL, _IOFBF, 16384);
12465bb6a25fSPoul-Henning Kamp #ifdef XML_UNICODE
12470a48773fSEric van Gyzen puttc(0xFEFF, userData.fp);
12485bb6a25fSPoul-Henning Kamp #endif
12490a48773fSEric van Gyzen XML_SetUserData(parser, &userData);
12505bb6a25fSPoul-Henning Kamp switch (outputType) {
12515bb6a25fSPoul-Henning Kamp case 'm':
12525bb6a25fSPoul-Henning Kamp XML_UseParserAsHandlerArg(parser);
12535bb6a25fSPoul-Henning Kamp XML_SetElementHandler(parser, metaStartElement, metaEndElement);
12545bb6a25fSPoul-Henning Kamp XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
12555bb6a25fSPoul-Henning Kamp XML_SetCommentHandler(parser, metaComment);
12565bb6a25fSPoul-Henning Kamp XML_SetCdataSectionHandler(parser, metaStartCdataSection,
12575bb6a25fSPoul-Henning Kamp metaEndCdataSection);
12585bb6a25fSPoul-Henning Kamp XML_SetCharacterDataHandler(parser, metaCharacterData);
12595bb6a25fSPoul-Henning Kamp XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl,
12605bb6a25fSPoul-Henning Kamp metaEndDoctypeDecl);
12615bb6a25fSPoul-Henning Kamp XML_SetEntityDeclHandler(parser, metaEntityDecl);
12625bb6a25fSPoul-Henning Kamp XML_SetNotationDeclHandler(parser, metaNotationDecl);
12635bb6a25fSPoul-Henning Kamp XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl,
12645bb6a25fSPoul-Henning Kamp metaEndNamespaceDecl);
12655bb6a25fSPoul-Henning Kamp metaStartDocument(parser);
12665bb6a25fSPoul-Henning Kamp break;
12675bb6a25fSPoul-Henning Kamp case 'c':
12685bb6a25fSPoul-Henning Kamp XML_UseParserAsHandlerArg(parser);
12695bb6a25fSPoul-Henning Kamp XML_SetDefaultHandler(parser, markup);
12705bb6a25fSPoul-Henning Kamp XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
12715bb6a25fSPoul-Henning Kamp XML_SetCharacterDataHandler(parser, defaultCharacterData);
12725bb6a25fSPoul-Henning Kamp XML_SetProcessingInstructionHandler(parser,
12735bb6a25fSPoul-Henning Kamp defaultProcessingInstruction);
12745bb6a25fSPoul-Henning Kamp break;
12755bb6a25fSPoul-Henning Kamp default:
12765bb6a25fSPoul-Henning Kamp if (useNamespaces)
12775bb6a25fSPoul-Henning Kamp XML_SetElementHandler(parser, startElementNS, endElementNS);
12785bb6a25fSPoul-Henning Kamp else
12795bb6a25fSPoul-Henning Kamp XML_SetElementHandler(parser, startElement, endElement);
12805bb6a25fSPoul-Henning Kamp XML_SetCharacterDataHandler(parser, characterData);
12815bb6a25fSPoul-Henning Kamp #ifndef W3C14N
12825bb6a25fSPoul-Henning Kamp XML_SetProcessingInstructionHandler(parser, processingInstruction);
12830a48773fSEric van Gyzen if (requiresNotations) {
12840a48773fSEric van Gyzen XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl);
12850a48773fSEric van Gyzen XML_SetNotationDeclHandler(parser, notationDecl);
12860a48773fSEric van Gyzen }
12875bb6a25fSPoul-Henning Kamp #endif /* not W3C14N */
12885bb6a25fSPoul-Henning Kamp break;
12895bb6a25fSPoul-Henning Kamp }
12905bb6a25fSPoul-Henning Kamp }
12915bb6a25fSPoul-Henning Kamp if (windowsCodePages)
12925bb6a25fSPoul-Henning Kamp XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
12935bb6a25fSPoul-Henning Kamp result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags);
12945bb6a25fSPoul-Henning Kamp if (outputDir) {
12955bb6a25fSPoul-Henning Kamp if (outputType == 'm')
12965bb6a25fSPoul-Henning Kamp metaEndDocument(parser);
12970a48773fSEric van Gyzen fclose(userData.fp);
1298e3466a89SXin LI if (! result) {
12995bb6a25fSPoul-Henning Kamp tremove(outName);
1300e3466a89SXin LI }
13015bb6a25fSPoul-Henning Kamp free(outName);
13025bb6a25fSPoul-Henning Kamp }
13035bb6a25fSPoul-Henning Kamp XML_ParserFree(parser);
13046b2c1e49SXin LI if (! result) {
1305cc68614dSXin LI exitCode = XMLWF_EXIT_NOT_WELLFORMED;
1306cc68614dSXin LI cleanupUserData(&userData);
1307cc68614dSXin LI if (! continueOnError) {
1308cc68614dSXin LI break;
13096b2c1e49SXin LI }
13105bb6a25fSPoul-Henning Kamp }
1311cc68614dSXin LI }
1312cc68614dSXin LI return exitCode;
13135bb6a25fSPoul-Henning Kamp }
1314