xref: /freebsd/contrib/expat/xmlwf/xmlwf.c (revision 9cc9b8b372842b9a941d235c5e9949a214e5284f)
1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12    Copyright (c) 2004-2009 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
14    Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org>
15    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
16    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
17    Copyright (c) 2020      Joe Orton <jorton@redhat.com>
18    Copyright (c) 2020      Kleber Tarcísio <klebertarcisio@yahoo.com.br>
19    Copyright (c) 2021      Tim Bray <tbray@textuality.com>
20    Copyright (c) 2022      Martin Ettl <ettl.martin78@googlemail.com>
21    Copyright (c) 2022      Sean McBride <sean@rogue-research.com>
22    Copyright (c) 2025      Alfonso Gregory <gfunni234@gmail.com>
23    Copyright (c) 2026      Matthew Fernandez <matthew.fernandez@gmail.com>
24    Licensed under the MIT license:
25 
26    Permission is  hereby granted,  free of charge,  to any  person obtaining
27    a  copy  of  this  software   and  associated  documentation  files  (the
28    "Software"),  to  deal in  the  Software  without restriction,  including
29    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
30    distribute, sublicense, and/or sell copies of the Software, and to permit
31    persons  to whom  the Software  is  furnished to  do so,  subject to  the
32    following conditions:
33 
34    The above copyright  notice and this permission notice  shall be included
35    in all copies or substantial portions of the Software.
36 
37    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
38    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
39    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
40    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
41    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
42    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
43    USE OR OTHER DEALINGS IN THE SOFTWARE.
44 */
45 
46 #include "expat_config.h"
47 
48 #include <assert.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <stddef.h>
52 #include <string.h>
53 #include <math.h> /* for isnan */
54 #include <errno.h>
55 
56 #include "expat.h"
57 #include "codepage.h"
58 #include "internal.h" /* for UNUSED_P only */
59 #include "xmlfile.h"
60 #include "xmltchar.h"
61 
62 #ifdef _MSC_VER
63 #  include <crtdbg.h>
64 #endif
65 
66 #ifdef XML_UNICODE
67 #  include <wchar.h>
68 #endif
69 
70 enum ExitCode {
71   XMLWF_EXIT_SUCCESS = 0,
72   XMLWF_EXIT_INTERNAL_ERROR = 1,
73   XMLWF_EXIT_NOT_WELLFORMED = 2,
74   XMLWF_EXIT_OUTPUT_ERROR = 3,
75   XMLWF_EXIT_USAGE_ERROR = 4,
76 };
77 
78 /* Structures for handler user data */
79 typedef struct NotationList {
80   struct NotationList *next;
81   const XML_Char *notationName;
82   const XML_Char *systemId;
83   const XML_Char *publicId;
84 } NotationList;
85 
86 typedef struct xmlwfUserData {
87   FILE *fp;
88   NotationList *notationListHead;
89   const XML_Char *currentDoctypeName;
90 } XmlwfUserData;
91 
92 /* This ensures proper sorting. */
93 
94 #define NSSEP T('\001')
95 
96 static void XMLCALL
characterData(void * userData,const XML_Char * s,int len)97 characterData(void *userData, const XML_Char *s, int len) {
98   FILE *fp = ((XmlwfUserData *)userData)->fp;
99   for (; len > 0; --len, ++s) {
100     switch (*s) {
101     case T('&'):
102       fputts(T("&amp;"), fp);
103       break;
104     case T('<'):
105       fputts(T("&lt;"), fp);
106       break;
107     case T('>'):
108       fputts(T("&gt;"), fp);
109       break;
110 #ifdef W3C14N
111     case 13:
112       fputts(T("&#xD;"), fp);
113       break;
114 #else
115     case T('"'):
116       fputts(T("&quot;"), fp);
117       break;
118     case 9:
119     case 10:
120     case 13:
121       ftprintf(fp, T("&#%d;"), *s);
122       break;
123 #endif
124     default:
125       puttc(*s, fp);
126       break;
127     }
128   }
129 }
130 
131 static void
attributeValue(FILE * fp,const XML_Char * s)132 attributeValue(FILE *fp, const XML_Char *s) {
133   puttc(T('='), fp);
134   puttc(T('"'), fp);
135   assert(s);
136   for (;;) {
137     switch (*s) {
138     case 0:
139     case NSSEP:
140       puttc(T('"'), fp);
141       return;
142     case T('&'):
143       fputts(T("&amp;"), fp);
144       break;
145     case T('<'):
146       fputts(T("&lt;"), fp);
147       break;
148     case T('"'):
149       fputts(T("&quot;"), fp);
150       break;
151 #ifdef W3C14N
152     case 9:
153       fputts(T("&#x9;"), fp);
154       break;
155     case 10:
156       fputts(T("&#xA;"), fp);
157       break;
158     case 13:
159       fputts(T("&#xD;"), fp);
160       break;
161 #else
162     case T('>'):
163       fputts(T("&gt;"), fp);
164       break;
165     case 9:
166     case 10:
167     case 13:
168       ftprintf(fp, T("&#%d;"), *s);
169       break;
170 #endif
171     default:
172       puttc(*s, fp);
173       break;
174     }
175     s++;
176   }
177 }
178 
179 /* Lexicographically comparing UTF-8 encoded attribute values,
180 is equivalent to lexicographically comparing based on the character number. */
181 
182 static int
attcmp(const void * att1,const void * att2)183 attcmp(const void *att1, const void *att2) {
184   return tcscmp(*(const XML_Char *const *)att1, *(const XML_Char *const *)att2);
185 }
186 
187 static void XMLCALL
startElement(void * userData,const XML_Char * name,const XML_Char ** atts)188 startElement(void *userData, const XML_Char *name, const XML_Char **atts) {
189   int nAtts;
190   const XML_Char **p;
191   FILE *fp = ((XmlwfUserData *)userData)->fp;
192   puttc(T('<'), fp);
193   fputts(name, fp);
194 
195   p = atts;
196   while (*p)
197     ++p;
198   nAtts = (int)((p - atts) >> 1);
199   if (nAtts > 1)
200     qsort(atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
201   while (*atts) {
202     puttc(T(' '), fp);
203     fputts(*atts++, fp);
204     attributeValue(fp, *atts);
205     atts++;
206   }
207   puttc(T('>'), fp);
208 }
209 
210 static void XMLCALL
endElement(void * userData,const XML_Char * name)211 endElement(void *userData, const XML_Char *name) {
212   FILE *fp = ((XmlwfUserData *)userData)->fp;
213   puttc(T('<'), fp);
214   puttc(T('/'), fp);
215   fputts(name, fp);
216   puttc(T('>'), fp);
217 }
218 
219 static int
nsattcmp(const void * p1,const void * p2)220 nsattcmp(const void *p1, const void *p2) {
221   const XML_Char *att1 = *(const XML_Char *const *)p1;
222   const XML_Char *att2 = *(const XML_Char *const *)p2;
223   int sep1 = (tcsrchr(att1, NSSEP) != 0);
224   int sep2 = (tcsrchr(att2, NSSEP) != 0);
225   if (sep1 != sep2)
226     return sep1 - sep2;
227   return tcscmp(att1, att2);
228 }
229 
230 static void XMLCALL
startElementNS(void * userData,const XML_Char * name,const XML_Char ** atts)231 startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) {
232   int nAtts;
233   int nsi;
234   const XML_Char **p;
235   FILE *fp = ((XmlwfUserData *)userData)->fp;
236   const XML_Char *sep;
237   puttc(T('<'), fp);
238 
239   sep = tcsrchr(name, NSSEP);
240   if (sep) {
241     fputts(T("n1:"), fp);
242     fputts(sep + 1, fp);
243     fputts(T(" xmlns:n1"), fp);
244     attributeValue(fp, name);
245     nsi = 2;
246   } else {
247     fputts(name, fp);
248     nsi = 1;
249   }
250 
251   p = atts;
252   while (*p)
253     ++p;
254   nAtts = (int)((p - atts) >> 1);
255   if (nAtts > 1)
256     qsort(atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp);
257   while (*atts) {
258     name = *atts++;
259     sep = tcsrchr(name, NSSEP);
260     puttc(T(' '), fp);
261     if (sep) {
262       ftprintf(fp, T("n%d:"), nsi);
263       fputts(sep + 1, fp);
264     } else
265       fputts(name, fp);
266     attributeValue(fp, *atts);
267     if (sep) {
268       ftprintf(fp, T(" xmlns:n%d"), nsi++);
269       attributeValue(fp, name);
270     }
271     atts++;
272   }
273   puttc(T('>'), fp);
274 }
275 
276 static void XMLCALL
endElementNS(void * userData,const XML_Char * name)277 endElementNS(void *userData, const XML_Char *name) {
278   FILE *fp = ((XmlwfUserData *)userData)->fp;
279   const XML_Char *sep;
280   puttc(T('<'), fp);
281   puttc(T('/'), fp);
282   sep = tcsrchr(name, NSSEP);
283   if (sep) {
284     fputts(T("n1:"), fp);
285     fputts(sep + 1, fp);
286   } else
287     fputts(name, fp);
288   puttc(T('>'), fp);
289 }
290 
291 #ifndef W3C14N
292 
293 static void XMLCALL
processingInstruction(void * userData,const XML_Char * target,const XML_Char * data)294 processingInstruction(void *userData, const XML_Char *target,
295                       const XML_Char *data) {
296   FILE *fp = ((XmlwfUserData *)userData)->fp;
297   puttc(T('<'), fp);
298   puttc(T('?'), fp);
299   fputts(target, fp);
300   puttc(T(' '), fp);
301   fputts(data, fp);
302   puttc(T('?'), fp);
303   puttc(T('>'), fp);
304 }
305 
306 static XML_Char *
xcsdup(const XML_Char * s)307 xcsdup(const XML_Char *s) {
308   XML_Char *result;
309   int count = 0;
310   size_t numBytes;
311 
312   /* Get the length of the string, including terminator */
313   while (s[count++] != 0) {
314     /* Do nothing */
315   }
316   numBytes = count * sizeof(XML_Char);
317   result = malloc(numBytes);
318   if (result == NULL)
319     return NULL;
320   memcpy(result, s, numBytes);
321   return result;
322 }
323 
324 static void XMLCALL
startDoctypeDecl(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * publid,int has_internal_subset)325 startDoctypeDecl(void *userData, const XML_Char *doctypeName,
326                  const XML_Char *sysid, const XML_Char *publid,
327                  int has_internal_subset) {
328   XmlwfUserData *data = userData;
329   UNUSED_P(sysid);
330   UNUSED_P(publid);
331   UNUSED_P(has_internal_subset);
332   data->currentDoctypeName = xcsdup(doctypeName);
333 }
334 
335 static void
freeNotations(XmlwfUserData * data)336 freeNotations(XmlwfUserData *data) {
337   NotationList *notationListHead = data->notationListHead;
338 
339   while (notationListHead != NULL) {
340     NotationList *next = notationListHead->next;
341     free((void *)notationListHead->notationName);
342     free((void *)notationListHead->systemId);
343     free((void *)notationListHead->publicId);
344     free(notationListHead);
345     notationListHead = next;
346   }
347   data->notationListHead = NULL;
348 }
349 
350 static void
cleanupUserData(XmlwfUserData * userData)351 cleanupUserData(XmlwfUserData *userData) {
352   free((void *)userData->currentDoctypeName);
353   userData->currentDoctypeName = NULL;
354   freeNotations(userData);
355 }
356 
357 static int
xcscmp(const XML_Char * xs,const XML_Char * xt)358 xcscmp(const XML_Char *xs, const XML_Char *xt) {
359   while (*xs != 0 && *xt != 0) {
360     if (*xs < *xt)
361       return -1;
362     if (*xs > *xt)
363       return 1;
364     xs++;
365     xt++;
366   }
367   if (*xs < *xt)
368     return -1;
369   if (*xs > *xt)
370     return 1;
371   return 0;
372 }
373 
374 static int
notationCmp(const void * a,const void * b)375 notationCmp(const void *a, const void *b) {
376   const NotationList *const n1 = *(const NotationList *const *)a;
377   const NotationList *const n2 = *(const NotationList *const *)b;
378 
379   return xcscmp(n1->notationName, n2->notationName);
380 }
381 
382 static void XMLCALL
endDoctypeDecl(void * userData)383 endDoctypeDecl(void *userData) {
384   XmlwfUserData *data = userData;
385   NotationList **notations;
386   int notationCount = 0;
387   NotationList *p;
388   int i;
389 
390   /* How many notations do we have? */
391   for (p = data->notationListHead; p != NULL; p = p->next)
392     notationCount++;
393   if (notationCount == 0) {
394     /* Nothing to report */
395     goto cleanUp;
396   }
397 
398   notations = malloc(notationCount * sizeof(NotationList *));
399   if (notations == NULL) {
400     fprintf(stderr, "Unable to sort notations");
401     goto cleanUp;
402   }
403 
404   for (p = data->notationListHead, i = 0; i < notationCount; p = p->next, i++) {
405     notations[i] = p;
406   }
407   qsort(notations, notationCount, sizeof(NotationList *), notationCmp);
408 
409   /* Output the DOCTYPE header */
410   fputts(T("<!DOCTYPE "), data->fp);
411   fputts(data->currentDoctypeName, data->fp);
412   fputts(T(" [\n"), data->fp);
413 
414   /* Now the NOTATIONs */
415   for (i = 0; i < notationCount; i++) {
416     fputts(T("<!NOTATION "), data->fp);
417     fputts(notations[i]->notationName, data->fp);
418     if (notations[i]->publicId != NULL) {
419       fputts(T(" PUBLIC '"), data->fp);
420       fputts(notations[i]->publicId, data->fp);
421       puttc(T('\''), data->fp);
422       if (notations[i]->systemId != NULL) {
423         puttc(T(' '), data->fp);
424         puttc(T('\''), data->fp);
425         fputts(notations[i]->systemId, data->fp);
426         puttc(T('\''), data->fp);
427       }
428     } else if (notations[i]->systemId != NULL) {
429       fputts(T(" SYSTEM '"), data->fp);
430       fputts(notations[i]->systemId, data->fp);
431       puttc(T('\''), data->fp);
432     }
433     puttc(T('>'), data->fp);
434     puttc(T('\n'), data->fp);
435   }
436 
437   /* Finally end the DOCTYPE */
438   fputts(T("]>\n"), data->fp);
439 
440   free(notations);
441 
442 cleanUp:
443   freeNotations(data);
444   free((void *)data->currentDoctypeName);
445   data->currentDoctypeName = NULL;
446 }
447 
448 static void XMLCALL
notationDecl(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)449 notationDecl(void *userData, const XML_Char *notationName, const XML_Char *base,
450              const XML_Char *systemId, const XML_Char *publicId) {
451   XmlwfUserData *data = userData;
452   NotationList *entry = malloc(sizeof(NotationList));
453   const char *errorMessage = "Unable to store NOTATION for output\n";
454 
455   UNUSED_P(base);
456   if (entry == NULL) {
457     fputs(errorMessage, stderr);
458     return; /* Nothing we can really do about this */
459   }
460   entry->notationName = xcsdup(notationName);
461   if (entry->notationName == NULL) {
462     fputs(errorMessage, stderr);
463     free(entry);
464     return;
465   }
466   if (systemId != NULL) {
467     entry->systemId = xcsdup(systemId);
468     if (entry->systemId == NULL) {
469       fputs(errorMessage, stderr);
470       free((void *)entry->notationName);
471       free(entry);
472       return;
473     }
474   } else {
475     entry->systemId = NULL;
476   }
477   if (publicId != NULL) {
478     entry->publicId = xcsdup(publicId);
479     if (entry->publicId == NULL) {
480       fputs(errorMessage, stderr);
481       free((void *)entry->systemId); /* Safe if it's NULL */
482       free((void *)entry->notationName);
483       free(entry);
484       return;
485     }
486   } else {
487     entry->publicId = NULL;
488   }
489 
490   entry->next = data->notationListHead;
491   data->notationListHead = entry;
492 }
493 
494 #endif /* not W3C14N */
495 
496 static void XMLCALL
defaultCharacterData(void * userData,const XML_Char * s,int len)497 defaultCharacterData(void *userData, const XML_Char *s, int len) {
498   UNUSED_P(s);
499   UNUSED_P(len);
500   XML_DefaultCurrent(userData);
501 }
502 
503 static void XMLCALL
defaultStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)504 defaultStartElement(void *userData, const XML_Char *name,
505                     const XML_Char **atts) {
506   UNUSED_P(name);
507   UNUSED_P(atts);
508   XML_DefaultCurrent(userData);
509 }
510 
511 static void XMLCALL
defaultEndElement(void * userData,const XML_Char * name)512 defaultEndElement(void *userData, const XML_Char *name) {
513   UNUSED_P(name);
514   XML_DefaultCurrent(userData);
515 }
516 
517 static void XMLCALL
defaultProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)518 defaultProcessingInstruction(void *userData, const XML_Char *target,
519                              const XML_Char *data) {
520   UNUSED_P(target);
521   UNUSED_P(data);
522   XML_DefaultCurrent(userData);
523 }
524 
525 static void XMLCALL
nopCharacterData(void * userData,const XML_Char * s,int len)526 nopCharacterData(void *userData, const XML_Char *s, int len) {
527   UNUSED_P(userData);
528   UNUSED_P(s);
529   UNUSED_P(len);
530 }
531 
532 static void XMLCALL
nopStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)533 nopStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
534   UNUSED_P(userData);
535   UNUSED_P(name);
536   UNUSED_P(atts);
537 }
538 
539 static void XMLCALL
nopEndElement(void * userData,const XML_Char * name)540 nopEndElement(void *userData, const XML_Char *name) {
541   UNUSED_P(userData);
542   UNUSED_P(name);
543 }
544 
545 static void XMLCALL
nopProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)546 nopProcessingInstruction(void *userData, const XML_Char *target,
547                          const XML_Char *data) {
548   UNUSED_P(userData);
549   UNUSED_P(target);
550   UNUSED_P(data);
551 }
552 
553 static void XMLCALL
markup(void * userData,const XML_Char * s,int len)554 markup(void *userData, const XML_Char *s, int len) {
555   FILE *fp = ((XmlwfUserData *)XML_GetUserData(userData))->fp;
556   for (; len > 0; --len, ++s)
557     puttc(*s, fp);
558 }
559 
560 static void
metaLocation(XML_Parser parser)561 metaLocation(XML_Parser parser) {
562   const XML_Char *uri = XML_GetBase(parser);
563   FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp;
564   if (uri)
565     ftprintf(fp, T(" uri=\"%s\""), uri);
566   ftprintf(fp,
567            T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"") T(" nbytes=\"%d\"")
568                T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"") T(" col=\"%")
569                    T(XML_FMT_INT_MOD) T("u\""),
570            XML_GetCurrentByteIndex(parser), XML_GetCurrentByteCount(parser),
571            XML_GetCurrentLineNumber(parser),
572            XML_GetCurrentColumnNumber(parser));
573 }
574 
575 static void
metaStartDocument(void * userData)576 metaStartDocument(void *userData) {
577   fputts(T("<document>\n"), ((XmlwfUserData *)XML_GetUserData(userData))->fp);
578 }
579 
580 static void
metaEndDocument(void * userData)581 metaEndDocument(void *userData) {
582   fputts(T("</document>\n"), ((XmlwfUserData *)XML_GetUserData(userData))->fp);
583 }
584 
585 static void XMLCALL
metaStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)586 metaStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
587   XML_Parser parser = userData;
588   XmlwfUserData *data = XML_GetUserData(parser);
589   FILE *fp = data->fp;
590   const XML_Char **specifiedAttsEnd
591       = atts + XML_GetSpecifiedAttributeCount(parser);
592   const XML_Char **idAttPtr;
593   int idAttIndex = XML_GetIdAttributeIndex(parser);
594   if (idAttIndex < 0)
595     idAttPtr = 0;
596   else
597     idAttPtr = atts + idAttIndex;
598 
599   ftprintf(fp, T("<starttag name=\"%s\""), name);
600   metaLocation(parser);
601   if (*atts) {
602     fputts(T(">\n"), fp);
603     do {
604       ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]);
605       characterData(data, atts[1], (int)tcslen(atts[1]));
606       if (atts >= specifiedAttsEnd)
607         fputts(T("\" defaulted=\"yes\"/>\n"), fp);
608       else if (atts == idAttPtr)
609         fputts(T("\" id=\"yes\"/>\n"), fp);
610       else
611         fputts(T("\"/>\n"), fp);
612     } while (*(atts += 2));
613     fputts(T("</starttag>\n"), fp);
614   } else
615     fputts(T("/>\n"), fp);
616 }
617 
618 static void XMLCALL
metaEndElement(void * userData,const XML_Char * name)619 metaEndElement(void *userData, const XML_Char *name) {
620   XML_Parser parser = userData;
621   XmlwfUserData *data = XML_GetUserData(parser);
622   FILE *fp = data->fp;
623   ftprintf(fp, T("<endtag name=\"%s\""), name);
624   metaLocation(parser);
625   fputts(T("/>\n"), fp);
626 }
627 
628 static void XMLCALL
metaProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)629 metaProcessingInstruction(void *userData, const XML_Char *target,
630                           const XML_Char *data) {
631   XML_Parser parser = userData;
632   XmlwfUserData *usrData = XML_GetUserData(parser);
633   FILE *fp = usrData->fp;
634   ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
635   characterData(usrData, data, (int)tcslen(data));
636   puttc(T('"'), fp);
637   metaLocation(parser);
638   fputts(T("/>\n"), fp);
639 }
640 
641 static void XMLCALL
metaComment(void * userData,const XML_Char * data)642 metaComment(void *userData, const XML_Char *data) {
643   XML_Parser parser = userData;
644   XmlwfUserData *usrData = XML_GetUserData(parser);
645   FILE *fp = usrData->fp;
646   fputts(T("<comment data=\""), fp);
647   characterData(usrData, data, (int)tcslen(data));
648   puttc(T('"'), fp);
649   metaLocation(parser);
650   fputts(T("/>\n"), fp);
651 }
652 
653 static void XMLCALL
metaStartCdataSection(void * userData)654 metaStartCdataSection(void *userData) {
655   XML_Parser parser = userData;
656   XmlwfUserData *data = XML_GetUserData(parser);
657   FILE *fp = data->fp;
658   fputts(T("<startcdata"), fp);
659   metaLocation(parser);
660   fputts(T("/>\n"), fp);
661 }
662 
663 static void XMLCALL
metaEndCdataSection(void * userData)664 metaEndCdataSection(void *userData) {
665   XML_Parser parser = userData;
666   XmlwfUserData *data = XML_GetUserData(parser);
667   FILE *fp = data->fp;
668   fputts(T("<endcdata"), fp);
669   metaLocation(parser);
670   fputts(T("/>\n"), fp);
671 }
672 
673 static void XMLCALL
metaCharacterData(void * userData,const XML_Char * s,int len)674 metaCharacterData(void *userData, const XML_Char *s, int len) {
675   XML_Parser parser = userData;
676   XmlwfUserData *data = XML_GetUserData(parser);
677   FILE *fp = data->fp;
678   fputts(T("<chars str=\""), fp);
679   characterData(data, s, len);
680   puttc(T('"'), fp);
681   metaLocation(parser);
682   fputts(T("/>\n"), fp);
683 }
684 
685 static void XMLCALL
metaStartDoctypeDecl(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)686 metaStartDoctypeDecl(void *userData, const XML_Char *doctypeName,
687                      const XML_Char *sysid, const XML_Char *pubid,
688                      int has_internal_subset) {
689   XML_Parser parser = userData;
690   XmlwfUserData *data = XML_GetUserData(parser);
691   FILE *fp = data->fp;
692   UNUSED_P(sysid);
693   UNUSED_P(pubid);
694   UNUSED_P(has_internal_subset);
695   ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName);
696   metaLocation(parser);
697   fputts(T("/>\n"), fp);
698 }
699 
700 static void XMLCALL
metaEndDoctypeDecl(void * userData)701 metaEndDoctypeDecl(void *userData) {
702   XML_Parser parser = userData;
703   XmlwfUserData *data = XML_GetUserData(parser);
704   FILE *fp = data->fp;
705   fputts(T("<enddoctype"), fp);
706   metaLocation(parser);
707   fputts(T("/>\n"), fp);
708 }
709 
710 static void XMLCALL
metaNotationDecl(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)711 metaNotationDecl(void *userData, const XML_Char *notationName,
712                  const XML_Char *base, const XML_Char *systemId,
713                  const XML_Char *publicId) {
714   XML_Parser parser = userData;
715   XmlwfUserData *data = XML_GetUserData(parser);
716   FILE *fp = data->fp;
717   UNUSED_P(base);
718   ftprintf(fp, T("<notation name=\"%s\""), notationName);
719   if (publicId)
720     ftprintf(fp, T(" public=\"%s\""), publicId);
721   if (systemId) {
722     fputts(T(" system=\""), fp);
723     characterData(data, systemId, (int)tcslen(systemId));
724     puttc(T('"'), fp);
725   }
726   metaLocation(parser);
727   fputts(T("/>\n"), fp);
728 }
729 
730 static void XMLCALL
metaEntityDecl(void * userData,const XML_Char * entityName,int is_param,const XML_Char * value,int value_length,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)731 metaEntityDecl(void *userData, const XML_Char *entityName, int is_param,
732                const XML_Char *value, int value_length, const XML_Char *base,
733                const XML_Char *systemId, const XML_Char *publicId,
734                const XML_Char *notationName) {
735   XML_Parser parser = userData;
736   XmlwfUserData *data = XML_GetUserData(parser);
737   FILE *fp = data->fp;
738 
739   UNUSED_P(is_param);
740   UNUSED_P(base);
741   if (value) {
742     ftprintf(fp, T("<entity name=\"%s\""), entityName);
743     metaLocation(parser);
744     puttc(T('>'), fp);
745     characterData(data, value, value_length);
746     fputts(T("</entity/>\n"), fp);
747   } else if (notationName) {
748     ftprintf(fp, T("<entity name=\"%s\""), entityName);
749     if (publicId)
750       ftprintf(fp, T(" public=\"%s\""), publicId);
751     fputts(T(" system=\""), fp);
752     characterData(data, systemId, (int)tcslen(systemId));
753     puttc(T('"'), fp);
754     ftprintf(fp, T(" notation=\"%s\""), notationName);
755     metaLocation(parser);
756     fputts(T("/>\n"), fp);
757   } else {
758     ftprintf(fp, T("<entity name=\"%s\""), entityName);
759     if (publicId)
760       ftprintf(fp, T(" public=\"%s\""), publicId);
761     fputts(T(" system=\""), fp);
762     characterData(data, systemId, (int)tcslen(systemId));
763     puttc(T('"'), fp);
764     metaLocation(parser);
765     fputts(T("/>\n"), fp);
766   }
767 }
768 
769 static void XMLCALL
metaStartNamespaceDecl(void * userData,const XML_Char * prefix,const XML_Char * uri)770 metaStartNamespaceDecl(void *userData, const XML_Char *prefix,
771                        const XML_Char *uri) {
772   XML_Parser parser = userData;
773   XmlwfUserData *data = XML_GetUserData(parser);
774   FILE *fp = data->fp;
775   fputts(T("<startns"), fp);
776   if (prefix)
777     ftprintf(fp, T(" prefix=\"%s\""), prefix);
778   if (uri) {
779     fputts(T(" ns=\""), fp);
780     characterData(data, uri, (int)tcslen(uri));
781     fputts(T("\"/>\n"), fp);
782   } else
783     fputts(T("/>\n"), fp);
784 }
785 
786 static void XMLCALL
metaEndNamespaceDecl(void * userData,const XML_Char * prefix)787 metaEndNamespaceDecl(void *userData, const XML_Char *prefix) {
788   XML_Parser parser = userData;
789   XmlwfUserData *data = XML_GetUserData(parser);
790   FILE *fp = data->fp;
791   if (! prefix)
792     fputts(T("<endns/>\n"), fp);
793   else
794     ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix);
795 }
796 
797 static int XMLCALL
unknownEncodingConvert(void * data,const char * p)798 unknownEncodingConvert(void *data, const char *p) {
799   return codepageConvert(*(int *)data, p);
800 }
801 
802 static int XMLCALL
unknownEncoding(void * userData,const XML_Char * name,XML_Encoding * info)803 unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info) {
804   int cp;
805   static const XML_Char prefixL[] = T("windows-");
806   static const XML_Char prefixU[] = T("WINDOWS-");
807   int i;
808 
809   UNUSED_P(userData);
810   for (i = 0; prefixU[i]; i++)
811     if (name[i] != prefixU[i] && name[i] != prefixL[i])
812       return 0;
813 
814   cp = 0;
815   for (; name[i]; i++) {
816     static const XML_Char digits[] = T("0123456789");
817     const XML_Char *s = tcschr(digits, name[i]);
818     if (! s)
819       return 0;
820     cp *= 10;
821     cp += (int)(s - digits);
822     if (cp >= 0x10000)
823       return 0;
824   }
825   if (! codepageMap(cp, info->map))
826     return 0;
827   info->convert = unknownEncodingConvert;
828   /* We could just cast the code page integer to a void *,
829   and avoid the use of release. */
830   info->release = free;
831   info->data = malloc(sizeof(int));
832   if (! info->data)
833     return 0;
834   *(int *)info->data = cp;
835   return 1;
836 }
837 
838 static int XMLCALL
notStandalone(void * userData)839 notStandalone(void *userData) {
840   UNUSED_P(userData);
841   return 0;
842 }
843 
844 static void
showVersion(XML_Char * prog)845 showVersion(XML_Char *prog) {
846   XML_Char *s = prog;
847   XML_Char ch;
848   const XML_Feature *features = XML_GetFeatureList();
849   while ((ch = *s) != 0) {
850     if (ch == '/'
851 #if defined(_WIN32)
852         || ch == '\\'
853 #endif
854     )
855       prog = s + 1;
856     ++s;
857   }
858   ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion());
859   if (features != NULL && features[0].feature != XML_FEATURE_END) {
860     int i = 1;
861     ftprintf(stdout, T("%s"), features[0].name);
862     if (features[0].value)
863       ftprintf(stdout, T("=%ld"), features[0].value);
864     while (features[i].feature != XML_FEATURE_END) {
865       ftprintf(stdout, T(", %s"), features[i].name);
866       if (features[i].value)
867         ftprintf(stdout, T("=%ld"), features[i].value);
868       ++i;
869     }
870     ftprintf(stdout, T("\n"));
871   }
872 }
873 
874 #if defined(__GNUC__)
875 __attribute__((noreturn))
876 #endif
877 static void
usage(const XML_Char * prog,int rc)878 usage(const XML_Char *prog, int rc) {
879   ftprintf(
880       stderr,
881       /* Generated with:
882        * $ xmlwf/xmlwf_helpgen.sh
883        * To update, change xmlwf/xmlwf_helpgen.py, then paste the output of
884        * xmlwf/xmlwf_helpgen.sh in here.
885        */
886       /* clang-format off */
887       T("usage:\n")
888       T("  %s [OPTIONS] [FILE ...]\n")
889       T("  %s -h|--help\n")
890       T("  %s -v|--version\n")
891       T("\n")
892       T("xmlwf - Determines if an XML document is well-formed\n")
893       T("\n")
894       T("positional arguments:\n")
895       T("  FILE           file to process (default: STDIN)\n")
896       T("\n")
897       T("input control arguments:\n")
898       T("  -s             print an error if the document is not [s]tandalone\n")
899       T("  -n             enable [n]amespace processing\n")
900       T("  -p             enable processing of external DTDs and [p]arameter entities\n")
901       T("  -x             enable processing of e[x]ternal entities\n")
902       T("                 (CAREFUL! This makes xmlwf vulnerable to external entity attacks (XXE).)\n")
903       T("  -e ENCODING    override any in-document [e]ncoding declaration\n")
904       T("  -w             enable support for [W]indows code pages\n")
905       T("  -r             disable memory-mapping and use [r]ead calls instead\n")
906       T("  -g BYTES       buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)\n")
907       T("  -k             when processing multiple files, [k]eep processing after first file with error\n")
908       T("\n")
909       T("output control arguments:\n")
910       T("  -d DIRECTORY   output [d]estination directory\n")
911       T("  -c             write a [c]opy of input XML, not canonical XML\n")
912       T("  -m             write [m]eta XML, not canonical XML\n")
913       T("  -t             write no XML output for [t]iming of plain parsing\n")
914       T("  -N             enable adding doctype and [n]otation declarations\n")
915       T("\n")
916       T("amplification attack protection (e.g. billion laughs):\n")
917       T("  NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n")
918       T("\n")
919       T("  -a FACTOR      set maximum tolerated [a]mplification factor (default: 100.0)\n")
920       T("  -b BYTES       set number of output [b]ytes needed to activate (default: 8 MiB/64 MiB)\n")
921       T("\n")
922       T("reparse deferral:\n")
923       T("  -q             disable reparse deferral, and allow [q]uadratic parse runtime with large tokens\n")
924       T("\n")
925       T("info arguments:\n")
926       T("  -h, --help     show this [h]elp message and exit\n")
927       T("  -v, --version  show program's [v]ersion number and exit\n")
928       T("\n")
929       T("environment variables:\n")
930       T("  EXPAT_ACCOUNTING_DEBUG=(0|1|2|3)\n")
931       T("                 Control verbosity of accounting debugging (default: 0)\n")
932       T("  EXPAT_ENTITY_DEBUG=(0|1)\n")
933       T("                 Control verbosity of entity debugging (default: 0)\n")
934       T("  EXPAT_ENTROPY_DEBUG=(0|1)\n")
935       T("                 Control verbosity of entropy debugging (default: 0)\n")
936       T("  EXPAT_MALLOC_DEBUG=(0|1|2)\n")
937       T("                 Control verbosity of allocation tracker (default: 0)\n")
938       T("\n")
939       T("exit status:\n")
940       T("  0              the input files are well-formed and the output (if requested) was written successfully\n")
941       T("  1              could not allocate data structures, signals a serious problem with execution environment\n")
942       T("  2              one or more input files were not well-formed\n")
943       T("  3              could not create an output file\n")
944       T("  4              command-line argument error\n")
945       T("\n")
946       T("xmlwf of libexpat is software libre, licensed under the MIT license.\n")
947       T("Please report bugs at https://github.com/libexpat/libexpat/issues -- thank you!\n")
948       , /* clang-format on */
949       prog, prog, prog);
950   exit(rc);
951 }
952 
953 #if defined(__MINGW32__) && defined(XML_UNICODE)
954 /* Silence warning about missing prototype */
955 int wmain(int argc, XML_Char **argv);
956 #endif
957 
958 #define XMLWF_SHIFT_ARG_INTO(constCharStarTarget, argc, argv, i, j)            \
959   {                                                                            \
960     if (argv[i][j + 1] == T('\0')) {                                           \
961       if (++i == argc) {                                                       \
962         usage(argv[0], XMLWF_EXIT_USAGE_ERROR);                                \
963         /* usage called exit(..), never gets here */                           \
964       }                                                                        \
965       constCharStarTarget = argv[i];                                           \
966     } else {                                                                   \
967       constCharStarTarget = argv[i] + j + 1;                                   \
968     }                                                                          \
969     i++;                                                                       \
970     j = 0;                                                                     \
971   }
972 
973 int
tmain(int argc,XML_Char ** argv)974 tmain(int argc, XML_Char **argv) {
975   int i, j;
976   const XML_Char *outputDir = NULL;
977   const XML_Char *encoding = NULL;
978   unsigned processFlags = XML_MAP_FILE;
979   int windowsCodePages = 0;
980   int outputType = 0;
981   int useNamespaces = 0;
982   int requireStandalone = 0;
983   int requiresNotations = 0;
984   int continueOnError = 0;
985 
986   float attackMaximumAmplification = -1.0f; /* signaling "not set" */
987   unsigned long long attackThresholdBytes = 0;
988   XML_Bool attackThresholdGiven = XML_FALSE;
989 
990   XML_Bool disableDeferral = XML_FALSE;
991 
992   int exitCode = XMLWF_EXIT_SUCCESS;
993   enum XML_ParamEntityParsing paramEntityParsing
994       = XML_PARAM_ENTITY_PARSING_NEVER;
995   int useStdin = 0;
996   XmlwfUserData userData = {NULL, NULL, NULL};
997 
998 #ifdef _MSC_VER
999   _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
1000 #endif
1001 
1002   i = 1;
1003   j = 0;
1004   while (i < argc) {
1005     if (j == 0) {
1006       if (argv[i][0] != T('-'))
1007         break;
1008       if (argv[i][1] == T('-')) {
1009         if (argv[i][2] == T('\0')) {
1010           i++;
1011           break;
1012         } else if (tcscmp(argv[i] + 2, T("help")) == 0) {
1013           usage(argv[0], XMLWF_EXIT_SUCCESS);
1014           // usage called exit(..), never gets here
1015         } else if (tcscmp(argv[i] + 2, T("version")) == 0) {
1016           showVersion(argv[0]);
1017           return XMLWF_EXIT_SUCCESS;
1018         }
1019       }
1020       j++;
1021     }
1022     switch (argv[i][j]) {
1023     case T('r'):
1024       processFlags &= ~XML_MAP_FILE;
1025       j++;
1026       break;
1027     case T('s'):
1028       requireStandalone = 1;
1029       j++;
1030       break;
1031     case T('n'):
1032       useNamespaces = 1;
1033       j++;
1034       break;
1035     case T('p'):
1036       paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS;
1037       /* fall through */
1038     case T('x'):
1039       processFlags |= XML_EXTERNAL_ENTITIES;
1040       j++;
1041       break;
1042     case T('w'):
1043       windowsCodePages = 1;
1044       j++;
1045       break;
1046     case T('m'):
1047       outputType = 'm';
1048       j++;
1049       break;
1050     case T('c'):
1051       outputType = 'c';
1052       useNamespaces = 0;
1053       j++;
1054       break;
1055     case T('t'):
1056       outputType = 't';
1057       j++;
1058       break;
1059     case T('N'):
1060       requiresNotations = 1;
1061       j++;
1062       break;
1063     case T('d'):
1064       XMLWF_SHIFT_ARG_INTO(outputDir, argc, argv, i, j);
1065       break;
1066     case T('e'):
1067       XMLWF_SHIFT_ARG_INTO(encoding, argc, argv, i, j);
1068       break;
1069     case T('h'):
1070       usage(argv[0], XMLWF_EXIT_SUCCESS);
1071       // usage called exit(..), never gets here
1072     case T('v'):
1073       showVersion(argv[0]);
1074       return XMLWF_EXIT_SUCCESS;
1075     case T('g'): {
1076       const XML_Char *valueText = NULL;
1077       XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1078 
1079       errno = 0;
1080       XML_Char *afterValueText = (XML_Char *)valueText;
1081       const long long read_size_bytes_candidate
1082           = tcstoull(valueText, &afterValueText, 10);
1083       if ((errno != 0) || (afterValueText[0] != T('\0'))
1084           || (read_size_bytes_candidate < 1)
1085           || (read_size_bytes_candidate > (INT_MAX / 2 + 1))) {
1086         // This prevents tperror(..) from reporting misleading "[..]: Success"
1087         errno = ERANGE;
1088         tperror(T("invalid buffer size") T(
1089             " (needs an integer from 1 to INT_MAX/2+1 i.e. 1,073,741,824 on most platforms)"));
1090         exit(XMLWF_EXIT_USAGE_ERROR);
1091       }
1092       g_read_size_bytes = (int)read_size_bytes_candidate;
1093       break;
1094     }
1095     case T('k'):
1096       continueOnError = 1;
1097       j++;
1098       break;
1099     case T('a'): {
1100       const XML_Char *valueText = NULL;
1101       XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1102 
1103       errno = 0;
1104       XML_Char *afterValueText = NULL;
1105       attackMaximumAmplification = tcstof(valueText, &afterValueText);
1106       if ((errno != 0) || (afterValueText[0] != T('\0'))
1107           || isnan(attackMaximumAmplification)
1108           || (attackMaximumAmplification < 1.0f)) {
1109         // This prevents tperror(..) from reporting misleading "[..]: Success"
1110         errno = ERANGE;
1111         tperror(T("invalid amplification limit") T(
1112             " (needs a floating point number greater or equal than 1.0)"));
1113         exit(XMLWF_EXIT_USAGE_ERROR);
1114       }
1115 #if XML_GE == 0
1116       ftprintf(stderr,
1117                T("Warning: Given amplification limit ignored")
1118                    T(", xmlwf has been compiled without DTD/GE support.\n"));
1119 #endif
1120       break;
1121     }
1122     case T('b'): {
1123       const XML_Char *valueText = NULL;
1124       XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1125 
1126       errno = 0;
1127       XML_Char *afterValueText = (XML_Char *)valueText;
1128       attackThresholdBytes = tcstoull(valueText, &afterValueText, 10);
1129       if ((errno != 0) || (afterValueText[0] != T('\0'))) {
1130         // This prevents tperror(..) from reporting misleading "[..]: Success"
1131         errno = ERANGE;
1132         tperror(T("invalid ignore threshold")
1133                     T(" (needs an integer from 0 to 2^64-1)"));
1134         exit(XMLWF_EXIT_USAGE_ERROR);
1135       }
1136       attackThresholdGiven = XML_TRUE;
1137 #if XML_GE == 0
1138       ftprintf(stderr,
1139                T("Warning: Given attack threshold ignored")
1140                    T(", xmlwf has been compiled without DTD/GE support.\n"));
1141 #endif
1142       break;
1143     }
1144     case T('q'): {
1145       disableDeferral = XML_TRUE;
1146       j++;
1147       break;
1148     }
1149     case T('\0'):
1150       if (j > 1) {
1151         i++;
1152         j = 0;
1153         break;
1154       }
1155       /* fall through */
1156     default:
1157       usage(argv[0], XMLWF_EXIT_USAGE_ERROR);
1158       // usage called exit(..), never gets here
1159     }
1160   }
1161   if (i == argc) {
1162     useStdin = 1;
1163     processFlags &= ~XML_MAP_FILE;
1164     i--;
1165   }
1166   for (; i < argc; i++) {
1167     XML_Char *outName = 0;
1168     int result;
1169     XML_Parser parser;
1170     if (useNamespaces)
1171       parser = XML_ParserCreateNS(encoding, NSSEP);
1172     else
1173       parser = XML_ParserCreate(encoding);
1174 
1175     if (! parser) {
1176       tperror(T("Could not instantiate parser"));
1177       exit(XMLWF_EXIT_INTERNAL_ERROR);
1178     }
1179 
1180     if (attackMaximumAmplification != -1.0f) {
1181 #if XML_GE == 1
1182       XML_SetBillionLaughsAttackProtectionMaximumAmplification(
1183           parser, attackMaximumAmplification);
1184       XML_SetAllocTrackerMaximumAmplification(parser,
1185                                               attackMaximumAmplification);
1186 #endif
1187     }
1188     if (attackThresholdGiven) {
1189 #if XML_GE == 1
1190       XML_SetBillionLaughsAttackProtectionActivationThreshold(
1191           parser, attackThresholdBytes);
1192       XML_SetAllocTrackerActivationThreshold(parser, attackThresholdBytes);
1193 #else
1194       (void)attackThresholdBytes; // silence -Wunused-but-set-variable
1195 #endif
1196     }
1197 
1198     if (disableDeferral) {
1199       const XML_Bool success = XML_SetReparseDeferralEnabled(parser, XML_FALSE);
1200       if (! success) {
1201         // This prevents tperror(..) from reporting misleading "[..]: Success"
1202         errno = EINVAL;
1203         tperror(T("Failed to disable reparse deferral"));
1204         exit(XMLWF_EXIT_INTERNAL_ERROR);
1205       }
1206     }
1207 
1208     if (requireStandalone)
1209       XML_SetNotStandaloneHandler(parser, notStandalone);
1210     XML_SetParamEntityParsing(parser, paramEntityParsing);
1211     if (outputType == 't') {
1212       /* This is for doing timings; this gives a more realistic estimate of
1213          the parsing time. */
1214       outputDir = 0;
1215       XML_SetElementHandler(parser, nopStartElement, nopEndElement);
1216       XML_SetCharacterDataHandler(parser, nopCharacterData);
1217       XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction);
1218     } else if (outputDir) {
1219       const XML_Char *delim = T("/");
1220       const XML_Char *file = useStdin ? T("STDIN") : argv[i];
1221       if (! useStdin) {
1222         /* Jump after last (back)slash */
1223         const XML_Char *lastDelim = tcsrchr(file, delim[0]);
1224         if (lastDelim)
1225           file = lastDelim + 1;
1226 #if defined(_WIN32)
1227         else {
1228           const XML_Char *winDelim = T("\\");
1229           lastDelim = tcsrchr(file, winDelim[0]);
1230           if (lastDelim) {
1231             file = lastDelim + 1;
1232             delim = winDelim;
1233           }
1234         }
1235 #endif
1236       }
1237       outName
1238           = malloc((tcslen(outputDir) + tcslen(file) + 2) * sizeof(XML_Char));
1239       if (! outName) {
1240         tperror(T("Could not allocate memory"));
1241         exit(XMLWF_EXIT_INTERNAL_ERROR);
1242       }
1243       tcscpy(outName, outputDir);
1244       tcscat(outName, delim);
1245       tcscat(outName, file);
1246       userData.fp = tfopen(outName, T("wb"));
1247       if (! userData.fp) {
1248         tperror(outName);
1249         exitCode = XMLWF_EXIT_OUTPUT_ERROR;
1250         free(outName);
1251         XML_ParserFree(parser);
1252         if (continueOnError) {
1253           continue;
1254         } else {
1255           break;
1256         }
1257       }
1258       setvbuf(userData.fp, NULL, _IOFBF, 16384);
1259 #ifdef XML_UNICODE
1260       puttc(0xFEFF, userData.fp);
1261 #endif
1262       XML_SetUserData(parser, &userData);
1263       switch (outputType) {
1264       case 'm':
1265         XML_UseParserAsHandlerArg(parser);
1266         XML_SetElementHandler(parser, metaStartElement, metaEndElement);
1267         XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
1268         XML_SetCommentHandler(parser, metaComment);
1269         XML_SetCdataSectionHandler(parser, metaStartCdataSection,
1270                                    metaEndCdataSection);
1271         XML_SetCharacterDataHandler(parser, metaCharacterData);
1272         XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl,
1273                                   metaEndDoctypeDecl);
1274         XML_SetEntityDeclHandler(parser, metaEntityDecl);
1275         XML_SetNotationDeclHandler(parser, metaNotationDecl);
1276         XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl,
1277                                     metaEndNamespaceDecl);
1278         metaStartDocument(parser);
1279         break;
1280       case 'c':
1281         XML_UseParserAsHandlerArg(parser);
1282         XML_SetDefaultHandler(parser, markup);
1283         XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
1284         XML_SetCharacterDataHandler(parser, defaultCharacterData);
1285         XML_SetProcessingInstructionHandler(parser,
1286                                             defaultProcessingInstruction);
1287         break;
1288       default:
1289         if (useNamespaces)
1290           XML_SetElementHandler(parser, startElementNS, endElementNS);
1291         else
1292           XML_SetElementHandler(parser, startElement, endElement);
1293         XML_SetCharacterDataHandler(parser, characterData);
1294 #ifndef W3C14N
1295         XML_SetProcessingInstructionHandler(parser, processingInstruction);
1296         if (requiresNotations) {
1297           XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl);
1298           XML_SetNotationDeclHandler(parser, notationDecl);
1299         }
1300 #endif /* not W3C14N */
1301         break;
1302       }
1303     }
1304     if (windowsCodePages)
1305       XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
1306     result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags);
1307     if (outputDir) {
1308       if (outputType == 'm')
1309         metaEndDocument(parser);
1310       fclose(userData.fp);
1311       if (! result) {
1312         tremove(outName);
1313       }
1314       free(outName);
1315     }
1316     XML_ParserFree(parser);
1317     if (! result) {
1318       exitCode = XMLWF_EXIT_NOT_WELLFORMED;
1319       cleanupUserData(&userData);
1320       if (! continueOnError) {
1321         break;
1322       }
1323     }
1324   }
1325   return exitCode;
1326 }
1327