xref: /freebsd/contrib/expat/xmlwf/xmlwf.c (revision 627b778d9e6b603a44a010d22d823ca7c392b363)
1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12    Copyright (c) 2004-2009 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
14    Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org>
15    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
16    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
17    Copyright (c) 2020      Joe Orton <jorton@redhat.com>
18    Copyright (c) 2020      Kleber Tarcísio <klebertarcisio@yahoo.com.br>
19    Copyright (c) 2021      Tim Bray <tbray@textuality.com>
20    Copyright (c) 2022      Martin Ettl <ettl.martin78@googlemail.com>
21    Copyright (c) 2022      Sean McBride <sean@rogue-research.com>
22    Licensed under the MIT license:
23 
24    Permission is  hereby granted,  free of charge,  to any  person obtaining
25    a  copy  of  this  software   and  associated  documentation  files  (the
26    "Software"),  to  deal in  the  Software  without restriction,  including
27    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
28    distribute, sublicense, and/or sell copies of the Software, and to permit
29    persons  to whom  the Software  is  furnished to  do so,  subject to  the
30    following conditions:
31 
32    The above copyright  notice and this permission notice  shall be included
33    in all copies or substantial portions of the Software.
34 
35    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
36    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
37    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
40    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41    USE OR OTHER DEALINGS IN THE SOFTWARE.
42 */
43 
44 #include "expat_config.h"
45 
46 #include <assert.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <stddef.h>
50 #include <string.h>
51 #include <math.h> /* for isnan */
52 #include <errno.h>
53 
54 #include "expat.h"
55 #include "codepage.h"
56 #include "internal.h" /* for UNUSED_P only */
57 #include "xmlfile.h"
58 #include "xmltchar.h"
59 
60 #ifdef _MSC_VER
61 #  include <crtdbg.h>
62 #endif
63 
64 #ifdef XML_UNICODE
65 #  include <wchar.h>
66 #endif
67 
68 enum ExitCode {
69   XMLWF_EXIT_SUCCESS = 0,
70   XMLWF_EXIT_INTERNAL_ERROR = 1,
71   XMLWF_EXIT_NOT_WELLFORMED = 2,
72   XMLWF_EXIT_OUTPUT_ERROR = 3,
73   XMLWF_EXIT_USAGE_ERROR = 4,
74 };
75 
76 /* Structures for handler user data */
77 typedef struct NotationList {
78   struct NotationList *next;
79   const XML_Char *notationName;
80   const XML_Char *systemId;
81   const XML_Char *publicId;
82 } NotationList;
83 
84 typedef struct xmlwfUserData {
85   FILE *fp;
86   NotationList *notationListHead;
87   const XML_Char *currentDoctypeName;
88 } XmlwfUserData;
89 
90 /* This ensures proper sorting. */
91 
92 #define NSSEP T('\001')
93 
94 static void XMLCALL
characterData(void * userData,const XML_Char * s,int len)95 characterData(void *userData, const XML_Char *s, int len) {
96   FILE *fp = ((XmlwfUserData *)userData)->fp;
97   for (; len > 0; --len, ++s) {
98     switch (*s) {
99     case T('&'):
100       fputts(T("&amp;"), fp);
101       break;
102     case T('<'):
103       fputts(T("&lt;"), fp);
104       break;
105     case T('>'):
106       fputts(T("&gt;"), fp);
107       break;
108 #ifdef W3C14N
109     case 13:
110       fputts(T("&#xD;"), fp);
111       break;
112 #else
113     case T('"'):
114       fputts(T("&quot;"), fp);
115       break;
116     case 9:
117     case 10:
118     case 13:
119       ftprintf(fp, T("&#%d;"), *s);
120       break;
121 #endif
122     default:
123       puttc(*s, fp);
124       break;
125     }
126   }
127 }
128 
129 static void
attributeValue(FILE * fp,const XML_Char * s)130 attributeValue(FILE *fp, const XML_Char *s) {
131   puttc(T('='), fp);
132   puttc(T('"'), fp);
133   assert(s);
134   for (;;) {
135     switch (*s) {
136     case 0:
137     case NSSEP:
138       puttc(T('"'), fp);
139       return;
140     case T('&'):
141       fputts(T("&amp;"), fp);
142       break;
143     case T('<'):
144       fputts(T("&lt;"), fp);
145       break;
146     case T('"'):
147       fputts(T("&quot;"), fp);
148       break;
149 #ifdef W3C14N
150     case 9:
151       fputts(T("&#x9;"), fp);
152       break;
153     case 10:
154       fputts(T("&#xA;"), fp);
155       break;
156     case 13:
157       fputts(T("&#xD;"), fp);
158       break;
159 #else
160     case T('>'):
161       fputts(T("&gt;"), fp);
162       break;
163     case 9:
164     case 10:
165     case 13:
166       ftprintf(fp, T("&#%d;"), *s);
167       break;
168 #endif
169     default:
170       puttc(*s, fp);
171       break;
172     }
173     s++;
174   }
175 }
176 
177 /* Lexicographically comparing UTF-8 encoded attribute values,
178 is equivalent to lexicographically comparing based on the character number. */
179 
180 static int
attcmp(const void * att1,const void * att2)181 attcmp(const void *att1, const void *att2) {
182   return tcscmp(*(const XML_Char *const *)att1, *(const XML_Char *const *)att2);
183 }
184 
185 static void XMLCALL
startElement(void * userData,const XML_Char * name,const XML_Char ** atts)186 startElement(void *userData, const XML_Char *name, const XML_Char **atts) {
187   int nAtts;
188   const XML_Char **p;
189   FILE *fp = ((XmlwfUserData *)userData)->fp;
190   puttc(T('<'), fp);
191   fputts(name, fp);
192 
193   p = atts;
194   while (*p)
195     ++p;
196   nAtts = (int)((p - atts) >> 1);
197   if (nAtts > 1)
198     qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
199   while (*atts) {
200     puttc(T(' '), fp);
201     fputts(*atts++, fp);
202     attributeValue(fp, *atts);
203     atts++;
204   }
205   puttc(T('>'), fp);
206 }
207 
208 static void XMLCALL
endElement(void * userData,const XML_Char * name)209 endElement(void *userData, const XML_Char *name) {
210   FILE *fp = ((XmlwfUserData *)userData)->fp;
211   puttc(T('<'), fp);
212   puttc(T('/'), fp);
213   fputts(name, fp);
214   puttc(T('>'), fp);
215 }
216 
217 static int
nsattcmp(const void * p1,const void * p2)218 nsattcmp(const void *p1, const void *p2) {
219   const XML_Char *att1 = *(const XML_Char *const *)p1;
220   const XML_Char *att2 = *(const XML_Char *const *)p2;
221   int sep1 = (tcsrchr(att1, NSSEP) != 0);
222   int sep2 = (tcsrchr(att2, NSSEP) != 0);
223   if (sep1 != sep2)
224     return sep1 - sep2;
225   return tcscmp(att1, att2);
226 }
227 
228 static void XMLCALL
startElementNS(void * userData,const XML_Char * name,const XML_Char ** atts)229 startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) {
230   int nAtts;
231   int nsi;
232   const XML_Char **p;
233   FILE *fp = ((XmlwfUserData *)userData)->fp;
234   const XML_Char *sep;
235   puttc(T('<'), fp);
236 
237   sep = tcsrchr(name, NSSEP);
238   if (sep) {
239     fputts(T("n1:"), fp);
240     fputts(sep + 1, fp);
241     fputts(T(" xmlns:n1"), fp);
242     attributeValue(fp, name);
243     nsi = 2;
244   } else {
245     fputts(name, fp);
246     nsi = 1;
247   }
248 
249   p = atts;
250   while (*p)
251     ++p;
252   nAtts = (int)((p - atts) >> 1);
253   if (nAtts > 1)
254     qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp);
255   while (*atts) {
256     name = *atts++;
257     sep = tcsrchr(name, NSSEP);
258     puttc(T(' '), fp);
259     if (sep) {
260       ftprintf(fp, T("n%d:"), nsi);
261       fputts(sep + 1, fp);
262     } else
263       fputts(name, fp);
264     attributeValue(fp, *atts);
265     if (sep) {
266       ftprintf(fp, T(" xmlns:n%d"), nsi++);
267       attributeValue(fp, name);
268     }
269     atts++;
270   }
271   puttc(T('>'), fp);
272 }
273 
274 static void XMLCALL
endElementNS(void * userData,const XML_Char * name)275 endElementNS(void *userData, const XML_Char *name) {
276   FILE *fp = ((XmlwfUserData *)userData)->fp;
277   const XML_Char *sep;
278   puttc(T('<'), fp);
279   puttc(T('/'), fp);
280   sep = tcsrchr(name, NSSEP);
281   if (sep) {
282     fputts(T("n1:"), fp);
283     fputts(sep + 1, fp);
284   } else
285     fputts(name, fp);
286   puttc(T('>'), fp);
287 }
288 
289 #ifndef W3C14N
290 
291 static void XMLCALL
processingInstruction(void * userData,const XML_Char * target,const XML_Char * data)292 processingInstruction(void *userData, const XML_Char *target,
293                       const XML_Char *data) {
294   FILE *fp = ((XmlwfUserData *)userData)->fp;
295   puttc(T('<'), fp);
296   puttc(T('?'), fp);
297   fputts(target, fp);
298   puttc(T(' '), fp);
299   fputts(data, fp);
300   puttc(T('?'), fp);
301   puttc(T('>'), fp);
302 }
303 
304 static XML_Char *
xcsdup(const XML_Char * s)305 xcsdup(const XML_Char *s) {
306   XML_Char *result;
307   int count = 0;
308   size_t numBytes;
309 
310   /* Get the length of the string, including terminator */
311   while (s[count++] != 0) {
312     /* Do nothing */
313   }
314   numBytes = count * sizeof(XML_Char);
315   result = malloc(numBytes);
316   if (result == NULL)
317     return NULL;
318   memcpy(result, s, numBytes);
319   return result;
320 }
321 
322 static void XMLCALL
startDoctypeDecl(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * publid,int has_internal_subset)323 startDoctypeDecl(void *userData, const XML_Char *doctypeName,
324                  const XML_Char *sysid, const XML_Char *publid,
325                  int has_internal_subset) {
326   XmlwfUserData *data = (XmlwfUserData *)userData;
327   UNUSED_P(sysid);
328   UNUSED_P(publid);
329   UNUSED_P(has_internal_subset);
330   data->currentDoctypeName = xcsdup(doctypeName);
331 }
332 
333 static void
freeNotations(XmlwfUserData * data)334 freeNotations(XmlwfUserData *data) {
335   NotationList *notationListHead = data->notationListHead;
336 
337   while (notationListHead != NULL) {
338     NotationList *next = notationListHead->next;
339     free((void *)notationListHead->notationName);
340     free((void *)notationListHead->systemId);
341     free((void *)notationListHead->publicId);
342     free(notationListHead);
343     notationListHead = next;
344   }
345   data->notationListHead = NULL;
346 }
347 
348 static void
cleanupUserData(XmlwfUserData * userData)349 cleanupUserData(XmlwfUserData *userData) {
350   free((void *)userData->currentDoctypeName);
351   userData->currentDoctypeName = NULL;
352   freeNotations(userData);
353 }
354 
355 static int
xcscmp(const XML_Char * xs,const XML_Char * xt)356 xcscmp(const XML_Char *xs, const XML_Char *xt) {
357   while (*xs != 0 && *xt != 0) {
358     if (*xs < *xt)
359       return -1;
360     if (*xs > *xt)
361       return 1;
362     xs++;
363     xt++;
364   }
365   if (*xs < *xt)
366     return -1;
367   if (*xs > *xt)
368     return 1;
369   return 0;
370 }
371 
372 static int
notationCmp(const void * a,const void * b)373 notationCmp(const void *a, const void *b) {
374   const NotationList *const n1 = *(const NotationList *const *)a;
375   const NotationList *const n2 = *(const NotationList *const *)b;
376 
377   return xcscmp(n1->notationName, n2->notationName);
378 }
379 
380 static void XMLCALL
endDoctypeDecl(void * userData)381 endDoctypeDecl(void *userData) {
382   XmlwfUserData *data = (XmlwfUserData *)userData;
383   NotationList **notations;
384   int notationCount = 0;
385   NotationList *p;
386   int i;
387 
388   /* How many notations do we have? */
389   for (p = data->notationListHead; p != NULL; p = p->next)
390     notationCount++;
391   if (notationCount == 0) {
392     /* Nothing to report */
393     free((void *)data->currentDoctypeName);
394     data->currentDoctypeName = NULL;
395     return;
396   }
397 
398   notations = malloc(notationCount * sizeof(NotationList *));
399   if (notations == NULL) {
400     fprintf(stderr, "Unable to sort notations");
401     freeNotations(data);
402     return;
403   }
404 
405   for (p = data->notationListHead, i = 0; i < notationCount; p = p->next, i++) {
406     notations[i] = p;
407   }
408   qsort(notations, notationCount, sizeof(NotationList *), notationCmp);
409 
410   /* Output the DOCTYPE header */
411   fputts(T("<!DOCTYPE "), data->fp);
412   fputts(data->currentDoctypeName, data->fp);
413   fputts(T(" [\n"), data->fp);
414 
415   /* Now the NOTATIONs */
416   for (i = 0; i < notationCount; i++) {
417     fputts(T("<!NOTATION "), data->fp);
418     fputts(notations[i]->notationName, data->fp);
419     if (notations[i]->publicId != NULL) {
420       fputts(T(" PUBLIC '"), data->fp);
421       fputts(notations[i]->publicId, data->fp);
422       puttc(T('\''), data->fp);
423       if (notations[i]->systemId != NULL) {
424         puttc(T(' '), data->fp);
425         puttc(T('\''), data->fp);
426         fputts(notations[i]->systemId, data->fp);
427         puttc(T('\''), data->fp);
428       }
429     } else if (notations[i]->systemId != NULL) {
430       fputts(T(" SYSTEM '"), data->fp);
431       fputts(notations[i]->systemId, data->fp);
432       puttc(T('\''), data->fp);
433     }
434     puttc(T('>'), data->fp);
435     puttc(T('\n'), data->fp);
436   }
437 
438   /* Finally end the DOCTYPE */
439   fputts(T("]>\n"), data->fp);
440 
441   free(notations);
442   freeNotations(data);
443   free((void *)data->currentDoctypeName);
444   data->currentDoctypeName = NULL;
445 }
446 
447 static void XMLCALL
notationDecl(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)448 notationDecl(void *userData, const XML_Char *notationName, const XML_Char *base,
449              const XML_Char *systemId, const XML_Char *publicId) {
450   XmlwfUserData *data = (XmlwfUserData *)userData;
451   NotationList *entry = malloc(sizeof(NotationList));
452   const char *errorMessage = "Unable to store NOTATION for output\n";
453 
454   UNUSED_P(base);
455   if (entry == NULL) {
456     fputs(errorMessage, stderr);
457     return; /* Nothing we can really do about this */
458   }
459   entry->notationName = xcsdup(notationName);
460   if (entry->notationName == NULL) {
461     fputs(errorMessage, stderr);
462     free(entry);
463     return;
464   }
465   if (systemId != NULL) {
466     entry->systemId = xcsdup(systemId);
467     if (entry->systemId == NULL) {
468       fputs(errorMessage, stderr);
469       free((void *)entry->notationName);
470       free(entry);
471       return;
472     }
473   } else {
474     entry->systemId = NULL;
475   }
476   if (publicId != NULL) {
477     entry->publicId = xcsdup(publicId);
478     if (entry->publicId == NULL) {
479       fputs(errorMessage, stderr);
480       free((void *)entry->systemId); /* Safe if it's NULL */
481       free((void *)entry->notationName);
482       free(entry);
483       return;
484     }
485   } else {
486     entry->publicId = NULL;
487   }
488 
489   entry->next = data->notationListHead;
490   data->notationListHead = entry;
491 }
492 
493 #endif /* not W3C14N */
494 
495 static void XMLCALL
defaultCharacterData(void * userData,const XML_Char * s,int len)496 defaultCharacterData(void *userData, const XML_Char *s, int len) {
497   UNUSED_P(s);
498   UNUSED_P(len);
499   XML_DefaultCurrent((XML_Parser)userData);
500 }
501 
502 static void XMLCALL
defaultStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)503 defaultStartElement(void *userData, const XML_Char *name,
504                     const XML_Char **atts) {
505   UNUSED_P(name);
506   UNUSED_P(atts);
507   XML_DefaultCurrent((XML_Parser)userData);
508 }
509 
510 static void XMLCALL
defaultEndElement(void * userData,const XML_Char * name)511 defaultEndElement(void *userData, const XML_Char *name) {
512   UNUSED_P(name);
513   XML_DefaultCurrent((XML_Parser)userData);
514 }
515 
516 static void XMLCALL
defaultProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)517 defaultProcessingInstruction(void *userData, const XML_Char *target,
518                              const XML_Char *data) {
519   UNUSED_P(target);
520   UNUSED_P(data);
521   XML_DefaultCurrent((XML_Parser)userData);
522 }
523 
524 static void XMLCALL
nopCharacterData(void * userData,const XML_Char * s,int len)525 nopCharacterData(void *userData, const XML_Char *s, int len) {
526   UNUSED_P(userData);
527   UNUSED_P(s);
528   UNUSED_P(len);
529 }
530 
531 static void XMLCALL
nopStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)532 nopStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
533   UNUSED_P(userData);
534   UNUSED_P(name);
535   UNUSED_P(atts);
536 }
537 
538 static void XMLCALL
nopEndElement(void * userData,const XML_Char * name)539 nopEndElement(void *userData, const XML_Char *name) {
540   UNUSED_P(userData);
541   UNUSED_P(name);
542 }
543 
544 static void XMLCALL
nopProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)545 nopProcessingInstruction(void *userData, const XML_Char *target,
546                          const XML_Char *data) {
547   UNUSED_P(userData);
548   UNUSED_P(target);
549   UNUSED_P(data);
550 }
551 
552 static void XMLCALL
markup(void * userData,const XML_Char * s,int len)553 markup(void *userData, const XML_Char *s, int len) {
554   FILE *fp = ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp;
555   for (; len > 0; --len, ++s)
556     puttc(*s, fp);
557 }
558 
559 static void
metaLocation(XML_Parser parser)560 metaLocation(XML_Parser parser) {
561   const XML_Char *uri = XML_GetBase(parser);
562   FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp;
563   if (uri)
564     ftprintf(fp, T(" uri=\"%s\""), uri);
565   ftprintf(fp,
566            T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"") T(" nbytes=\"%d\"")
567                T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"") T(" col=\"%")
568                    T(XML_FMT_INT_MOD) T("u\""),
569            XML_GetCurrentByteIndex(parser), XML_GetCurrentByteCount(parser),
570            XML_GetCurrentLineNumber(parser),
571            XML_GetCurrentColumnNumber(parser));
572 }
573 
574 static void
metaStartDocument(void * userData)575 metaStartDocument(void *userData) {
576   fputts(T("<document>\n"),
577          ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
578 }
579 
580 static void
metaEndDocument(void * userData)581 metaEndDocument(void *userData) {
582   fputts(T("</document>\n"),
583          ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
584 }
585 
586 static void XMLCALL
metaStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)587 metaStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
588   XML_Parser parser = (XML_Parser)userData;
589   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
590   FILE *fp = data->fp;
591   const XML_Char **specifiedAttsEnd
592       = atts + XML_GetSpecifiedAttributeCount(parser);
593   const XML_Char **idAttPtr;
594   int idAttIndex = XML_GetIdAttributeIndex(parser);
595   if (idAttIndex < 0)
596     idAttPtr = 0;
597   else
598     idAttPtr = atts + idAttIndex;
599 
600   ftprintf(fp, T("<starttag name=\"%s\""), name);
601   metaLocation(parser);
602   if (*atts) {
603     fputts(T(">\n"), fp);
604     do {
605       ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]);
606       characterData(data, atts[1], (int)tcslen(atts[1]));
607       if (atts >= specifiedAttsEnd)
608         fputts(T("\" defaulted=\"yes\"/>\n"), fp);
609       else if (atts == idAttPtr)
610         fputts(T("\" id=\"yes\"/>\n"), fp);
611       else
612         fputts(T("\"/>\n"), fp);
613     } while (*(atts += 2));
614     fputts(T("</starttag>\n"), fp);
615   } else
616     fputts(T("/>\n"), fp);
617 }
618 
619 static void XMLCALL
metaEndElement(void * userData,const XML_Char * name)620 metaEndElement(void *userData, const XML_Char *name) {
621   XML_Parser parser = (XML_Parser)userData;
622   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
623   FILE *fp = data->fp;
624   ftprintf(fp, T("<endtag name=\"%s\""), name);
625   metaLocation(parser);
626   fputts(T("/>\n"), fp);
627 }
628 
629 static void XMLCALL
metaProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)630 metaProcessingInstruction(void *userData, const XML_Char *target,
631                           const XML_Char *data) {
632   XML_Parser parser = (XML_Parser)userData;
633   XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
634   FILE *fp = usrData->fp;
635   ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
636   characterData(usrData, data, (int)tcslen(data));
637   puttc(T('"'), fp);
638   metaLocation(parser);
639   fputts(T("/>\n"), fp);
640 }
641 
642 static void XMLCALL
metaComment(void * userData,const XML_Char * data)643 metaComment(void *userData, const XML_Char *data) {
644   XML_Parser parser = (XML_Parser)userData;
645   XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
646   FILE *fp = usrData->fp;
647   fputts(T("<comment data=\""), fp);
648   characterData(usrData, data, (int)tcslen(data));
649   puttc(T('"'), fp);
650   metaLocation(parser);
651   fputts(T("/>\n"), fp);
652 }
653 
654 static void XMLCALL
metaStartCdataSection(void * userData)655 metaStartCdataSection(void *userData) {
656   XML_Parser parser = (XML_Parser)userData;
657   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
658   FILE *fp = data->fp;
659   fputts(T("<startcdata"), fp);
660   metaLocation(parser);
661   fputts(T("/>\n"), fp);
662 }
663 
664 static void XMLCALL
metaEndCdataSection(void * userData)665 metaEndCdataSection(void *userData) {
666   XML_Parser parser = (XML_Parser)userData;
667   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
668   FILE *fp = data->fp;
669   fputts(T("<endcdata"), fp);
670   metaLocation(parser);
671   fputts(T("/>\n"), fp);
672 }
673 
674 static void XMLCALL
metaCharacterData(void * userData,const XML_Char * s,int len)675 metaCharacterData(void *userData, const XML_Char *s, int len) {
676   XML_Parser parser = (XML_Parser)userData;
677   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
678   FILE *fp = data->fp;
679   fputts(T("<chars str=\""), fp);
680   characterData(data, s, len);
681   puttc(T('"'), fp);
682   metaLocation(parser);
683   fputts(T("/>\n"), fp);
684 }
685 
686 static void XMLCALL
metaStartDoctypeDecl(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)687 metaStartDoctypeDecl(void *userData, const XML_Char *doctypeName,
688                      const XML_Char *sysid, const XML_Char *pubid,
689                      int has_internal_subset) {
690   XML_Parser parser = (XML_Parser)userData;
691   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
692   FILE *fp = data->fp;
693   UNUSED_P(sysid);
694   UNUSED_P(pubid);
695   UNUSED_P(has_internal_subset);
696   ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName);
697   metaLocation(parser);
698   fputts(T("/>\n"), fp);
699 }
700 
701 static void XMLCALL
metaEndDoctypeDecl(void * userData)702 metaEndDoctypeDecl(void *userData) {
703   XML_Parser parser = (XML_Parser)userData;
704   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
705   FILE *fp = data->fp;
706   fputts(T("<enddoctype"), fp);
707   metaLocation(parser);
708   fputts(T("/>\n"), fp);
709 }
710 
711 static void XMLCALL
metaNotationDecl(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)712 metaNotationDecl(void *userData, const XML_Char *notationName,
713                  const XML_Char *base, const XML_Char *systemId,
714                  const XML_Char *publicId) {
715   XML_Parser parser = (XML_Parser)userData;
716   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
717   FILE *fp = data->fp;
718   UNUSED_P(base);
719   ftprintf(fp, T("<notation name=\"%s\""), notationName);
720   if (publicId)
721     ftprintf(fp, T(" public=\"%s\""), publicId);
722   if (systemId) {
723     fputts(T(" system=\""), fp);
724     characterData(data, systemId, (int)tcslen(systemId));
725     puttc(T('"'), fp);
726   }
727   metaLocation(parser);
728   fputts(T("/>\n"), fp);
729 }
730 
731 static void XMLCALL
metaEntityDecl(void * userData,const XML_Char * entityName,int is_param,const XML_Char * value,int value_length,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)732 metaEntityDecl(void *userData, const XML_Char *entityName, int is_param,
733                const XML_Char *value, int value_length, const XML_Char *base,
734                const XML_Char *systemId, const XML_Char *publicId,
735                const XML_Char *notationName) {
736   XML_Parser parser = (XML_Parser)userData;
737   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
738   FILE *fp = data->fp;
739 
740   UNUSED_P(is_param);
741   UNUSED_P(base);
742   if (value) {
743     ftprintf(fp, T("<entity name=\"%s\""), entityName);
744     metaLocation(parser);
745     puttc(T('>'), fp);
746     characterData(data, value, value_length);
747     fputts(T("</entity/>\n"), fp);
748   } else if (notationName) {
749     ftprintf(fp, T("<entity name=\"%s\""), entityName);
750     if (publicId)
751       ftprintf(fp, T(" public=\"%s\""), publicId);
752     fputts(T(" system=\""), fp);
753     characterData(data, systemId, (int)tcslen(systemId));
754     puttc(T('"'), fp);
755     ftprintf(fp, T(" notation=\"%s\""), notationName);
756     metaLocation(parser);
757     fputts(T("/>\n"), fp);
758   } else {
759     ftprintf(fp, T("<entity name=\"%s\""), entityName);
760     if (publicId)
761       ftprintf(fp, T(" public=\"%s\""), publicId);
762     fputts(T(" system=\""), fp);
763     characterData(data, systemId, (int)tcslen(systemId));
764     puttc(T('"'), fp);
765     metaLocation(parser);
766     fputts(T("/>\n"), fp);
767   }
768 }
769 
770 static void XMLCALL
metaStartNamespaceDecl(void * userData,const XML_Char * prefix,const XML_Char * uri)771 metaStartNamespaceDecl(void *userData, const XML_Char *prefix,
772                        const XML_Char *uri) {
773   XML_Parser parser = (XML_Parser)userData;
774   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
775   FILE *fp = data->fp;
776   fputts(T("<startns"), fp);
777   if (prefix)
778     ftprintf(fp, T(" prefix=\"%s\""), prefix);
779   if (uri) {
780     fputts(T(" ns=\""), fp);
781     characterData(data, uri, (int)tcslen(uri));
782     fputts(T("\"/>\n"), fp);
783   } else
784     fputts(T("/>\n"), fp);
785 }
786 
787 static void XMLCALL
metaEndNamespaceDecl(void * userData,const XML_Char * prefix)788 metaEndNamespaceDecl(void *userData, const XML_Char *prefix) {
789   XML_Parser parser = (XML_Parser)userData;
790   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
791   FILE *fp = data->fp;
792   if (! prefix)
793     fputts(T("<endns/>\n"), fp);
794   else
795     ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix);
796 }
797 
798 static int XMLCALL
unknownEncodingConvert(void * data,const char * p)799 unknownEncodingConvert(void *data, const char *p) {
800   return codepageConvert(*(int *)data, p);
801 }
802 
803 static int XMLCALL
unknownEncoding(void * userData,const XML_Char * name,XML_Encoding * info)804 unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info) {
805   int cp;
806   static const XML_Char prefixL[] = T("windows-");
807   static const XML_Char prefixU[] = T("WINDOWS-");
808   int i;
809 
810   UNUSED_P(userData);
811   for (i = 0; prefixU[i]; i++)
812     if (name[i] != prefixU[i] && name[i] != prefixL[i])
813       return 0;
814 
815   cp = 0;
816   for (; name[i]; i++) {
817     static const XML_Char digits[] = T("0123456789");
818     const XML_Char *s = tcschr(digits, name[i]);
819     if (! s)
820       return 0;
821     cp *= 10;
822     cp += (int)(s - digits);
823     if (cp >= 0x10000)
824       return 0;
825   }
826   if (! codepageMap(cp, info->map))
827     return 0;
828   info->convert = unknownEncodingConvert;
829   /* We could just cast the code page integer to a void *,
830   and avoid the use of release. */
831   info->release = free;
832   info->data = malloc(sizeof(int));
833   if (! info->data)
834     return 0;
835   *(int *)info->data = cp;
836   return 1;
837 }
838 
839 static int XMLCALL
notStandalone(void * userData)840 notStandalone(void *userData) {
841   UNUSED_P(userData);
842   return 0;
843 }
844 
845 static void
showVersion(XML_Char * prog)846 showVersion(XML_Char *prog) {
847   XML_Char *s = prog;
848   XML_Char ch;
849   const XML_Feature *features = XML_GetFeatureList();
850   while ((ch = *s) != 0) {
851     if (ch == '/'
852 #if defined(_WIN32)
853         || ch == '\\'
854 #endif
855     )
856       prog = s + 1;
857     ++s;
858   }
859   ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion());
860   if (features != NULL && features[0].feature != XML_FEATURE_END) {
861     int i = 1;
862     ftprintf(stdout, T("%s"), features[0].name);
863     if (features[0].value)
864       ftprintf(stdout, T("=%ld"), features[0].value);
865     while (features[i].feature != XML_FEATURE_END) {
866       ftprintf(stdout, T(", %s"), features[i].name);
867       if (features[i].value)
868         ftprintf(stdout, T("=%ld"), features[i].value);
869       ++i;
870     }
871     ftprintf(stdout, T("\n"));
872   }
873 }
874 
875 #if defined(__GNUC__)
876 __attribute__((noreturn))
877 #endif
878 static void
usage(const XML_Char * prog,int rc)879 usage(const XML_Char *prog, int rc) {
880   ftprintf(
881       stderr,
882       /* Generated with:
883        * $ xmlwf/xmlwf_helpgen.sh
884        * To update, change xmlwf/xmlwf_helpgen.py, then paste the output of
885        * xmlwf/xmlwf_helpgen.sh in here.
886        */
887       /* clang-format off */
888       T("usage:\n")
889       T("  %s [OPTIONS] [FILE ...]\n")
890       T("  %s -h|--help\n")
891       T("  %s -v|--version\n")
892       T("\n")
893       T("xmlwf - Determines if an XML document is well-formed\n")
894       T("\n")
895       T("positional arguments:\n")
896       T("  FILE           file to process (default: STDIN)\n")
897       T("\n")
898       T("input control arguments:\n")
899       T("  -s             print an error if the document is not [s]tandalone\n")
900       T("  -n             enable [n]amespace processing\n")
901       T("  -p             enable processing of external DTDs and [p]arameter entities\n")
902       T("  -x             enable processing of e[x]ternal entities\n")
903       T("  -e ENCODING    override any in-document [e]ncoding declaration\n")
904       T("  -w             enable support for [W]indows code pages\n")
905       T("  -r             disable memory-mapping and use [r]ead calls instead\n")
906       T("  -g BYTES       buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)\n")
907       T("  -k             when processing multiple files, [k]eep processing after first file with error\n")
908       T("\n")
909       T("output control arguments:\n")
910       T("  -d DIRECTORY   output [d]estination directory\n")
911       T("  -c             write a [c]opy of input XML, not canonical XML\n")
912       T("  -m             write [m]eta XML, not canonical XML\n")
913       T("  -t             write no XML output for [t]iming of plain parsing\n")
914       T("  -N             enable adding doctype and [n]otation declarations\n")
915       T("\n")
916       T("amplification attack protection (e.g. billion laughs):\n")
917       T("  NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n")
918       T("\n")
919       T("  -a FACTOR      set maximum tolerated [a]mplification factor (default: 100.0)\n")
920       T("  -b BYTES       set number of output [b]ytes needed to activate (default: 8 MiB/64 MiB)\n")
921       T("\n")
922       T("reparse deferral:\n")
923       T("  -q             disable reparse deferral, and allow [q]uadratic parse runtime with large tokens\n")
924       T("\n")
925       T("info arguments:\n")
926       T("  -h, --help     show this [h]elp message and exit\n")
927       T("  -v, --version  show program's [v]ersion number and exit\n")
928       T("\n")
929       T("environment variables:\n")
930       T("  EXPAT_ACCOUNTING_DEBUG=(0|1|2|3)\n")
931       T("                 Control verbosity of accounting debugging (default: 0)\n")
932       T("  EXPAT_ENTITY_DEBUG=(0|1)\n")
933       T("                 Control verbosity of entity debugging (default: 0)\n")
934       T("  EXPAT_ENTROPY_DEBUG=(0|1)\n")
935       T("                 Control verbosity of entropy debugging (default: 0)\n")
936       T("  EXPAT_MALLOC_DEBUG=(0|1|2)\n")
937       T("                 Control verbosity of allocation tracker (default: 0)\n")
938       T("\n")
939       T("exit status:\n")
940       T("  0              the input files are well-formed and the output (if requested) was written successfully\n")
941       T("  1              could not allocate data structures, signals a serious problem with execution environment\n")
942       T("  2              one or more input files were not well-formed\n")
943       T("  3              could not create an output file\n")
944       T("  4              command-line argument error\n")
945       T("\n")
946       T("xmlwf of libexpat is software libre, licensed under the MIT license.\n")
947       T("Please report bugs at https://github.com/libexpat/libexpat/issues -- thank you!\n")
948       , /* clang-format on */
949       prog, prog, prog);
950   exit(rc);
951 }
952 
953 #if defined(__MINGW32__) && defined(XML_UNICODE)
954 /* Silence warning about missing prototype */
955 int wmain(int argc, XML_Char **argv);
956 #endif
957 
958 #define XMLWF_SHIFT_ARG_INTO(constCharStarTarget, argc, argv, i, j)            \
959   {                                                                            \
960     if (argv[i][j + 1] == T('\0')) {                                           \
961       if (++i == argc) {                                                       \
962         usage(argv[0], XMLWF_EXIT_USAGE_ERROR);                                \
963         /* usage called exit(..), never gets here */                           \
964       }                                                                        \
965       constCharStarTarget = argv[i];                                           \
966     } else {                                                                   \
967       constCharStarTarget = argv[i] + j + 1;                                   \
968     }                                                                          \
969     i++;                                                                       \
970     j = 0;                                                                     \
971   }
972 
973 int
tmain(int argc,XML_Char ** argv)974 tmain(int argc, XML_Char **argv) {
975   int i, j;
976   const XML_Char *outputDir = NULL;
977   const XML_Char *encoding = NULL;
978   unsigned processFlags = XML_MAP_FILE;
979   int windowsCodePages = 0;
980   int outputType = 0;
981   int useNamespaces = 0;
982   int requireStandalone = 0;
983   int requiresNotations = 0;
984   int continueOnError = 0;
985 
986   float attackMaximumAmplification = -1.0f; /* signaling "not set" */
987   unsigned long long attackThresholdBytes = 0;
988   XML_Bool attackThresholdGiven = XML_FALSE;
989 
990   XML_Bool disableDeferral = XML_FALSE;
991 
992   int exitCode = XMLWF_EXIT_SUCCESS;
993   enum XML_ParamEntityParsing paramEntityParsing
994       = XML_PARAM_ENTITY_PARSING_NEVER;
995   int useStdin = 0;
996   XmlwfUserData userData = {NULL, NULL, NULL};
997 
998 #ifdef _MSC_VER
999   _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
1000 #endif
1001 
1002   i = 1;
1003   j = 0;
1004   while (i < argc) {
1005     if (j == 0) {
1006       if (argv[i][0] != T('-'))
1007         break;
1008       if (argv[i][1] == T('-')) {
1009         if (argv[i][2] == T('\0')) {
1010           i++;
1011           break;
1012         } else if (tcscmp(argv[i] + 2, T("help")) == 0) {
1013           usage(argv[0], XMLWF_EXIT_SUCCESS);
1014           // usage called exit(..), never gets here
1015         } else if (tcscmp(argv[i] + 2, T("version")) == 0) {
1016           showVersion(argv[0]);
1017           return XMLWF_EXIT_SUCCESS;
1018         }
1019       }
1020       j++;
1021     }
1022     switch (argv[i][j]) {
1023     case T('r'):
1024       processFlags &= ~XML_MAP_FILE;
1025       j++;
1026       break;
1027     case T('s'):
1028       requireStandalone = 1;
1029       j++;
1030       break;
1031     case T('n'):
1032       useNamespaces = 1;
1033       j++;
1034       break;
1035     case T('p'):
1036       paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS;
1037       /* fall through */
1038     case T('x'):
1039       processFlags |= XML_EXTERNAL_ENTITIES;
1040       j++;
1041       break;
1042     case T('w'):
1043       windowsCodePages = 1;
1044       j++;
1045       break;
1046     case T('m'):
1047       outputType = 'm';
1048       j++;
1049       break;
1050     case T('c'):
1051       outputType = 'c';
1052       useNamespaces = 0;
1053       j++;
1054       break;
1055     case T('t'):
1056       outputType = 't';
1057       j++;
1058       break;
1059     case T('N'):
1060       requiresNotations = 1;
1061       j++;
1062       break;
1063     case T('d'):
1064       XMLWF_SHIFT_ARG_INTO(outputDir, argc, argv, i, j);
1065       break;
1066     case T('e'):
1067       XMLWF_SHIFT_ARG_INTO(encoding, argc, argv, i, j);
1068       break;
1069     case T('h'):
1070       usage(argv[0], XMLWF_EXIT_SUCCESS);
1071       // usage called exit(..), never gets here
1072     case T('v'):
1073       showVersion(argv[0]);
1074       return XMLWF_EXIT_SUCCESS;
1075     case T('g'): {
1076       const XML_Char *valueText = NULL;
1077       XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1078 
1079       errno = 0;
1080       XML_Char *afterValueText = (XML_Char *)valueText;
1081       const long long read_size_bytes_candidate
1082           = tcstoull(valueText, &afterValueText, 10);
1083       if ((errno != 0) || (afterValueText[0] != T('\0'))
1084           || (read_size_bytes_candidate < 1)
1085           || (read_size_bytes_candidate > (INT_MAX / 2 + 1))) {
1086         // This prevents tperror(..) from reporting misleading "[..]: Success"
1087         errno = ERANGE;
1088         tperror(T("invalid buffer size") T(
1089             " (needs an integer from 1 to INT_MAX/2+1 i.e. 1,073,741,824 on most platforms)"));
1090         exit(XMLWF_EXIT_USAGE_ERROR);
1091       }
1092       g_read_size_bytes = (int)read_size_bytes_candidate;
1093       break;
1094     }
1095     case T('k'):
1096       continueOnError = 1;
1097       j++;
1098       break;
1099     case T('a'): {
1100       const XML_Char *valueText = NULL;
1101       XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1102 
1103       errno = 0;
1104       XML_Char *afterValueText = NULL;
1105       attackMaximumAmplification = tcstof(valueText, &afterValueText);
1106       if ((errno != 0) || (afterValueText[0] != T('\0'))
1107           || isnan(attackMaximumAmplification)
1108           || (attackMaximumAmplification < 1.0f)) {
1109         // This prevents tperror(..) from reporting misleading "[..]: Success"
1110         errno = ERANGE;
1111         tperror(T("invalid amplification limit") T(
1112             " (needs a floating point number greater or equal than 1.0)"));
1113         exit(XMLWF_EXIT_USAGE_ERROR);
1114       }
1115 #if XML_GE == 0
1116       ftprintf(stderr,
1117                T("Warning: Given amplification limit ignored")
1118                    T(", xmlwf has been compiled without DTD/GE support.\n"));
1119 #endif
1120       break;
1121     }
1122     case T('b'): {
1123       const XML_Char *valueText = NULL;
1124       XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1125 
1126       errno = 0;
1127       XML_Char *afterValueText = (XML_Char *)valueText;
1128       attackThresholdBytes = tcstoull(valueText, &afterValueText, 10);
1129       if ((errno != 0) || (afterValueText[0] != T('\0'))) {
1130         // This prevents tperror(..) from reporting misleading "[..]: Success"
1131         errno = ERANGE;
1132         tperror(T("invalid ignore threshold")
1133                     T(" (needs an integer from 0 to 2^64-1)"));
1134         exit(XMLWF_EXIT_USAGE_ERROR);
1135       }
1136       attackThresholdGiven = XML_TRUE;
1137 #if XML_GE == 0
1138       ftprintf(stderr,
1139                T("Warning: Given attack threshold ignored")
1140                    T(", xmlwf has been compiled without DTD/GE support.\n"));
1141 #endif
1142       break;
1143     }
1144     case T('q'): {
1145       disableDeferral = XML_TRUE;
1146       j++;
1147       break;
1148     }
1149     case T('\0'):
1150       if (j > 1) {
1151         i++;
1152         j = 0;
1153         break;
1154       }
1155       /* fall through */
1156     default:
1157       usage(argv[0], XMLWF_EXIT_USAGE_ERROR);
1158       // usage called exit(..), never gets here
1159     }
1160   }
1161   if (i == argc) {
1162     useStdin = 1;
1163     processFlags &= ~XML_MAP_FILE;
1164     i--;
1165   }
1166   for (; i < argc; i++) {
1167     XML_Char *outName = 0;
1168     int result;
1169     XML_Parser parser;
1170     if (useNamespaces)
1171       parser = XML_ParserCreateNS(encoding, NSSEP);
1172     else
1173       parser = XML_ParserCreate(encoding);
1174 
1175     if (! parser) {
1176       tperror(T("Could not instantiate parser"));
1177       exit(XMLWF_EXIT_INTERNAL_ERROR);
1178     }
1179 
1180     if (attackMaximumAmplification != -1.0f) {
1181 #if XML_GE == 1
1182       XML_SetBillionLaughsAttackProtectionMaximumAmplification(
1183           parser, attackMaximumAmplification);
1184       XML_SetAllocTrackerMaximumAmplification(parser,
1185                                               attackMaximumAmplification);
1186 #endif
1187     }
1188     if (attackThresholdGiven) {
1189 #if XML_GE == 1
1190       XML_SetBillionLaughsAttackProtectionActivationThreshold(
1191           parser, attackThresholdBytes);
1192       XML_SetAllocTrackerActivationThreshold(parser, attackThresholdBytes);
1193 #else
1194       (void)attackThresholdBytes; // silence -Wunused-but-set-variable
1195 #endif
1196     }
1197 
1198     if (disableDeferral) {
1199       const XML_Bool success = XML_SetReparseDeferralEnabled(parser, XML_FALSE);
1200       if (! success) {
1201         // This prevents tperror(..) from reporting misleading "[..]: Success"
1202         errno = EINVAL;
1203         tperror(T("Failed to disable reparse deferral"));
1204         exit(XMLWF_EXIT_INTERNAL_ERROR);
1205       }
1206     }
1207 
1208     if (requireStandalone)
1209       XML_SetNotStandaloneHandler(parser, notStandalone);
1210     XML_SetParamEntityParsing(parser, paramEntityParsing);
1211     if (outputType == 't') {
1212       /* This is for doing timings; this gives a more realistic estimate of
1213          the parsing time. */
1214       outputDir = 0;
1215       XML_SetElementHandler(parser, nopStartElement, nopEndElement);
1216       XML_SetCharacterDataHandler(parser, nopCharacterData);
1217       XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction);
1218     } else if (outputDir) {
1219       const XML_Char *delim = T("/");
1220       const XML_Char *file = useStdin ? T("STDIN") : argv[i];
1221       if (! useStdin) {
1222         /* Jump after last (back)slash */
1223         const XML_Char *lastDelim = tcsrchr(file, delim[0]);
1224         if (lastDelim)
1225           file = lastDelim + 1;
1226 #if defined(_WIN32)
1227         else {
1228           const XML_Char *winDelim = T("\\");
1229           lastDelim = tcsrchr(file, winDelim[0]);
1230           if (lastDelim) {
1231             file = lastDelim + 1;
1232             delim = winDelim;
1233           }
1234         }
1235 #endif
1236       }
1237       outName = (XML_Char *)malloc((tcslen(outputDir) + tcslen(file) + 2)
1238                                    * sizeof(XML_Char));
1239       if (! outName) {
1240         tperror(T("Could not allocate memory"));
1241         exit(XMLWF_EXIT_INTERNAL_ERROR);
1242       }
1243       tcscpy(outName, outputDir);
1244       tcscat(outName, delim);
1245       tcscat(outName, file);
1246       userData.fp = tfopen(outName, T("wb"));
1247       if (! userData.fp) {
1248         tperror(outName);
1249         exitCode = XMLWF_EXIT_OUTPUT_ERROR;
1250         free(outName);
1251         XML_ParserFree(parser);
1252         if (continueOnError) {
1253           continue;
1254         } else {
1255           break;
1256         }
1257       }
1258       setvbuf(userData.fp, NULL, _IOFBF, 16384);
1259 #ifdef XML_UNICODE
1260       puttc(0xFEFF, userData.fp);
1261 #endif
1262       XML_SetUserData(parser, &userData);
1263       switch (outputType) {
1264       case 'm':
1265         XML_UseParserAsHandlerArg(parser);
1266         XML_SetElementHandler(parser, metaStartElement, metaEndElement);
1267         XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
1268         XML_SetCommentHandler(parser, metaComment);
1269         XML_SetCdataSectionHandler(parser, metaStartCdataSection,
1270                                    metaEndCdataSection);
1271         XML_SetCharacterDataHandler(parser, metaCharacterData);
1272         XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl,
1273                                   metaEndDoctypeDecl);
1274         XML_SetEntityDeclHandler(parser, metaEntityDecl);
1275         XML_SetNotationDeclHandler(parser, metaNotationDecl);
1276         XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl,
1277                                     metaEndNamespaceDecl);
1278         metaStartDocument(parser);
1279         break;
1280       case 'c':
1281         XML_UseParserAsHandlerArg(parser);
1282         XML_SetDefaultHandler(parser, markup);
1283         XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
1284         XML_SetCharacterDataHandler(parser, defaultCharacterData);
1285         XML_SetProcessingInstructionHandler(parser,
1286                                             defaultProcessingInstruction);
1287         break;
1288       default:
1289         if (useNamespaces)
1290           XML_SetElementHandler(parser, startElementNS, endElementNS);
1291         else
1292           XML_SetElementHandler(parser, startElement, endElement);
1293         XML_SetCharacterDataHandler(parser, characterData);
1294 #ifndef W3C14N
1295         XML_SetProcessingInstructionHandler(parser, processingInstruction);
1296         if (requiresNotations) {
1297           XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl);
1298           XML_SetNotationDeclHandler(parser, notationDecl);
1299         }
1300 #endif /* not W3C14N */
1301         break;
1302       }
1303     }
1304     if (windowsCodePages)
1305       XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
1306     result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags);
1307     if (outputDir) {
1308       if (outputType == 'm')
1309         metaEndDocument(parser);
1310       fclose(userData.fp);
1311       if (! result) {
1312         tremove(outName);
1313       }
1314       free(outName);
1315     }
1316     XML_ParserFree(parser);
1317     if (! result) {
1318       exitCode = XMLWF_EXIT_NOT_WELLFORMED;
1319       cleanupUserData(&userData);
1320       if (! continueOnError) {
1321         break;
1322       }
1323     }
1324   }
1325   return exitCode;
1326 }
1327