xref: /freebsd/contrib/expat/xmlwf/xmlwf.c (revision c82aeee8a6d39371006f5eeb1b51704e7b97e2b7)
1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12    Copyright (c) 2004-2009 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
14    Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org>
15    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
16    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
17    Copyright (c) 2020      Joe Orton <jorton@redhat.com>
18    Copyright (c) 2020      Kleber Tarcísio <klebertarcisio@yahoo.com.br>
19    Copyright (c) 2021      Tim Bray <tbray@textuality.com>
20    Copyright (c) 2022      Martin Ettl <ettl.martin78@googlemail.com>
21    Copyright (c) 2022      Sean McBride <sean@rogue-research.com>
22    Copyright (c) 2025      Alfonso Gregory <gfunni234@gmail.com>
23    Copyright (c) 2026      Matthew Fernandez <matthew.fernandez@gmail.com>
24    Copyright (c) 2026      Nick Begg <nick@stunttruck.net>
25    Copyright (c) 2026      Kartik Kenchi <netliomax25@gmail.com>
26    Licensed under the MIT license:
27 
28    Permission is  hereby granted,  free of charge,  to any  person obtaining
29    a  copy  of  this  software   and  associated  documentation  files  (the
30    "Software"),  to  deal in  the  Software  without restriction,  including
31    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
32    distribute, sublicense, and/or sell copies of the Software, and to permit
33    persons  to whom  the Software  is  furnished to  do so,  subject to  the
34    following conditions:
35 
36    The above copyright  notice and this permission notice  shall be included
37    in all copies or substantial portions of the Software.
38 
39    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
40    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
41    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
42    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
43    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
44    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
45    USE OR OTHER DEALINGS IN THE SOFTWARE.
46 */
47 
48 #include "expat_config.h"
49 
50 #include <assert.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <stddef.h>
54 #include <string.h>
55 #include <math.h> /* for isnan */
56 #include <errno.h>
57 
58 #include "expat.h"
59 #include "codepage.h"
60 #include "internal.h" /* for UNUSED_P only */
61 #include "fallthrough.h"
62 #include "xmlfile.h"
63 #include "xmltchar.h"
64 
65 #ifdef _MSC_VER
66 #  include <crtdbg.h>
67 #endif
68 
69 #ifdef XML_UNICODE
70 #  include <wchar.h>
71 #endif
72 
73 #include "../lib/xcsinc.c"
74 
75 enum ExitCode {
76   XMLWF_EXIT_SUCCESS = 0,
77   XMLWF_EXIT_INTERNAL_ERROR = 1,
78   XMLWF_EXIT_NOT_WELLFORMED = 2,
79   XMLWF_EXIT_OUTPUT_ERROR = 3,
80   XMLWF_EXIT_USAGE_ERROR = 4,
81 };
82 
83 /* Structures for handler user data */
84 typedef struct NotationList {
85   struct NotationList *next;
86   const XML_Char *notationName;
87   const XML_Char *systemId;
88   const XML_Char *publicId;
89 } NotationList;
90 
91 typedef struct xmlwfUserData {
92   FILE *fp;
93   NotationList *notationListHead;
94   const XML_Char *currentDoctypeName;
95 } XmlwfUserData;
96 
97 /* This ensures proper sorting. */
98 
99 #define NSSEP T('\001')
100 
101 static void XMLCALL
102 characterData(void *userData, const XML_Char *s, int len) {
103   FILE *fp = ((XmlwfUserData *)userData)->fp;
104   for (; len > 0; --len, ++s) {
105     switch (*s) {
106     case T('&'):
107       fputts(T("&amp;"), fp);
108       break;
109     case T('<'):
110       fputts(T("&lt;"), fp);
111       break;
112     case T('>'):
113       fputts(T("&gt;"), fp);
114       break;
115 #ifdef W3C14N
116     case 13:
117       fputts(T("&#xD;"), fp);
118       break;
119 #else
120     case T('"'):
121       fputts(T("&quot;"), fp);
122       break;
123     case 9:
124     case 10:
125     case 13:
126       ftprintf(fp, T("&#%d;"), *s);
127       break;
128 #endif
129     default:
130       puttc(*s, fp);
131       break;
132     }
133   }
134 }
135 
136 static void
137 attributeValue(FILE *fp, const XML_Char *s) {
138   puttc(T('='), fp);
139   puttc(T('"'), fp);
140   assert(s);
141   for (;;) {
142     switch (*s) {
143     case 0:
144     case NSSEP:
145       puttc(T('"'), fp);
146       return;
147     case T('&'):
148       fputts(T("&amp;"), fp);
149       break;
150     case T('<'):
151       fputts(T("&lt;"), fp);
152       break;
153     case T('"'):
154       fputts(T("&quot;"), fp);
155       break;
156 #ifdef W3C14N
157     case 9:
158       fputts(T("&#x9;"), fp);
159       break;
160     case 10:
161       fputts(T("&#xA;"), fp);
162       break;
163     case 13:
164       fputts(T("&#xD;"), fp);
165       break;
166 #else
167     case T('>'):
168       fputts(T("&gt;"), fp);
169       break;
170     case 9:
171     case 10:
172     case 13:
173       ftprintf(fp, T("&#%d;"), *s);
174       break;
175 #endif
176     default:
177       puttc(*s, fp);
178       break;
179     }
180     s++;
181   }
182 }
183 
184 /* Lexicographically comparing UTF-8 encoded attribute values,
185 is equivalent to lexicographically comparing based on the character number. */
186 
187 static int
188 attcmp(const void *att1, const void *att2) {
189   return tcscmp(*(const XML_Char *const *)att1, *(const XML_Char *const *)att2);
190 }
191 
192 static void XMLCALL
193 startElement(void *userData, const XML_Char *name, const XML_Char **atts) {
194   int nAtts;
195   const XML_Char **p;
196   FILE *fp = ((XmlwfUserData *)userData)->fp;
197   puttc(T('<'), fp);
198   fputts(name, fp);
199 
200   p = atts;
201   while (*p)
202     ++p;
203   nAtts = (int)((p - atts) >> 1);
204   if (nAtts > 1)
205     qsort(atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
206   while (*atts) {
207     puttc(T(' '), fp);
208     fputts(*atts++, fp);
209     attributeValue(fp, *atts);
210     atts++;
211   }
212   puttc(T('>'), fp);
213 }
214 
215 static void XMLCALL
216 endElement(void *userData, const XML_Char *name) {
217   FILE *fp = ((XmlwfUserData *)userData)->fp;
218   puttc(T('<'), fp);
219   puttc(T('/'), fp);
220   fputts(name, fp);
221   puttc(T('>'), fp);
222 }
223 
224 static int
225 nsattcmp(const void *p1, const void *p2) {
226   const XML_Char *att1 = *(const XML_Char *const *)p1;
227   const XML_Char *att2 = *(const XML_Char *const *)p2;
228   int sep1 = (tcsrchr(att1, NSSEP) != 0);
229   int sep2 = (tcsrchr(att2, NSSEP) != 0);
230   if (sep1 != sep2)
231     return sep1 - sep2;
232   return tcscmp(att1, att2);
233 }
234 
235 static void XMLCALL
236 startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) {
237   int nAtts;
238   int nsi;
239   const XML_Char **p;
240   FILE *fp = ((XmlwfUserData *)userData)->fp;
241   const XML_Char *sep;
242   puttc(T('<'), fp);
243 
244   sep = tcsrchr(name, NSSEP);
245   if (sep) {
246     fputts(T("n1:"), fp);
247     fputts(sep + 1, fp);
248     fputts(T(" xmlns:n1"), fp);
249     attributeValue(fp, name);
250     nsi = 2;
251   } else {
252     fputts(name, fp);
253     nsi = 1;
254   }
255 
256   p = atts;
257   while (*p)
258     ++p;
259   nAtts = (int)((p - atts) >> 1);
260   if (nAtts > 1)
261     qsort(atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp);
262   while (*atts) {
263     name = *atts++;
264     sep = tcsrchr(name, NSSEP);
265     puttc(T(' '), fp);
266     if (sep) {
267       ftprintf(fp, T("n%d:"), nsi);
268       fputts(sep + 1, fp);
269     } else
270       fputts(name, fp);
271     attributeValue(fp, *atts);
272     if (sep) {
273       ftprintf(fp, T(" xmlns:n%d"), nsi++);
274       attributeValue(fp, name);
275     }
276     atts++;
277   }
278   puttc(T('>'), fp);
279 }
280 
281 static void XMLCALL
282 endElementNS(void *userData, const XML_Char *name) {
283   FILE *fp = ((XmlwfUserData *)userData)->fp;
284   const XML_Char *sep;
285   puttc(T('<'), fp);
286   puttc(T('/'), fp);
287   sep = tcsrchr(name, NSSEP);
288   if (sep) {
289     fputts(T("n1:"), fp);
290     fputts(sep + 1, fp);
291   } else
292     fputts(name, fp);
293   puttc(T('>'), fp);
294 }
295 
296 #ifndef W3C14N
297 
298 static void XMLCALL
299 processingInstruction(void *userData, const XML_Char *target,
300                       const XML_Char *data) {
301   FILE *fp = ((XmlwfUserData *)userData)->fp;
302   puttc(T('<'), fp);
303   puttc(T('?'), fp);
304   fputts(target, fp);
305   puttc(T(' '), fp);
306   fputts(data, fp);
307   puttc(T('?'), fp);
308   puttc(T('>'), fp);
309 }
310 
311 static XML_Char *
312 xcsdup(const XML_Char *s) {
313   const size_t count = xcslen(s) + /* null terminator */ 1;
314 
315   // Detect and prevent integer overflow
316   if (count > SIZE_MAX / sizeof(XML_Char))
317     return NULL;
318 
319   const size_t numBytes = count * sizeof(XML_Char);
320   XML_Char *const result = malloc(numBytes);
321   if (result == NULL)
322     return NULL;
323   memcpy(result, s, numBytes);
324   return result;
325 }
326 
327 static void XMLCALL
328 startDoctypeDecl(void *userData, const XML_Char *doctypeName,
329                  const XML_Char *sysid, const XML_Char *publid,
330                  int has_internal_subset) {
331   XmlwfUserData *data = userData;
332   UNUSED_P(sysid);
333   UNUSED_P(publid);
334   UNUSED_P(has_internal_subset);
335   data->currentDoctypeName = xcsdup(doctypeName);
336 }
337 
338 static void
339 freeNotations(XmlwfUserData *data) {
340   NotationList *notationListHead = data->notationListHead;
341 
342   while (notationListHead != NULL) {
343     NotationList *next = notationListHead->next;
344     free((void *)notationListHead->notationName);
345     free((void *)notationListHead->systemId);
346     free((void *)notationListHead->publicId);
347     free(notationListHead);
348     notationListHead = next;
349   }
350   data->notationListHead = NULL;
351 }
352 
353 static void
354 cleanupUserData(XmlwfUserData *userData) {
355   free((void *)userData->currentDoctypeName);
356   userData->currentDoctypeName = NULL;
357   freeNotations(userData);
358 }
359 
360 static int
361 xcscmp(const XML_Char *xs, const XML_Char *xt) {
362   while (*xs != 0 && *xt != 0) {
363     if (*xs < *xt)
364       return -1;
365     if (*xs > *xt)
366       return 1;
367     xs++;
368     xt++;
369   }
370   if (*xs < *xt)
371     return -1;
372   if (*xs > *xt)
373     return 1;
374   return 0;
375 }
376 
377 static int
378 notationCmp(const void *a, const void *b) {
379   const NotationList *const n1 = *(const NotationList *const *)a;
380   const NotationList *const n2 = *(const NotationList *const *)b;
381 
382   return xcscmp(n1->notationName, n2->notationName);
383 }
384 
385 /* Write a SystemLiteral/PubidLiteral, choosing a delimiter that does not
386    occur in the value.  The grammar forbids a literal from containing its
387    own delimiter, so a value reported by Expat never holds both quote
388    characters and a safe delimiter always exists. */
389 static void
390 writeLiteral(FILE *fp, const XML_Char *value) {
391   const XML_Char quote = (tcschr(value, T('\'')) != NULL) ? T('"') : T('\'');
392   puttc(quote, fp);
393   fputts(value, fp);
394   puttc(quote, fp);
395 }
396 
397 static void XMLCALL
398 endDoctypeDecl(void *userData) {
399   XmlwfUserData *data = userData;
400   NotationList **notations;
401   size_t notationCount = 0;
402   NotationList *p;
403   size_t i;
404 
405   /* How many notations do we have? */
406   for (p = data->notationListHead; p != NULL; p = p->next)
407     notationCount++;
408   if (notationCount == 0) {
409     /* Nothing to report */
410     goto cleanUp;
411   }
412 
413   /* Detect and prevent integer overflow in the multiplication, mirroring
414      the guards in xcsdup() and resolveSystemId() */
415   if (notationCount > SIZE_MAX / sizeof(NotationList *)) {
416     fprintf(stderr, "Unable to sort notations");
417     goto cleanUp;
418   }
419 
420   notations = malloc(notationCount * sizeof(NotationList *));
421   if (notations == NULL) {
422     fprintf(stderr, "Unable to sort notations");
423     goto cleanUp;
424   }
425 
426   for (p = data->notationListHead, i = 0; i < notationCount; p = p->next, i++) {
427     notations[i] = p;
428   }
429   qsort(notations, notationCount, sizeof(NotationList *), notationCmp);
430 
431   /* Output the DOCTYPE header */
432   fputts(T("<!DOCTYPE "), data->fp);
433   fputts(data->currentDoctypeName, data->fp);
434   fputts(T(" [\n"), data->fp);
435 
436   /* Now the NOTATIONs */
437   for (i = 0; i < notationCount; i++) {
438     fputts(T("<!NOTATION "), data->fp);
439     fputts(notations[i]->notationName, data->fp);
440     if (notations[i]->publicId != NULL) {
441       fputts(T(" PUBLIC "), data->fp);
442       writeLiteral(data->fp, notations[i]->publicId);
443       if (notations[i]->systemId != NULL) {
444         puttc(T(' '), data->fp);
445         writeLiteral(data->fp, notations[i]->systemId);
446       }
447     } else if (notations[i]->systemId != NULL) {
448       fputts(T(" SYSTEM "), data->fp);
449       writeLiteral(data->fp, notations[i]->systemId);
450     }
451     puttc(T('>'), data->fp);
452     puttc(T('\n'), data->fp);
453   }
454 
455   /* Finally end the DOCTYPE */
456   fputts(T("]>\n"), data->fp);
457 
458   free(notations);
459 
460 cleanUp:
461   freeNotations(data);
462   free((void *)data->currentDoctypeName);
463   data->currentDoctypeName = NULL;
464 }
465 
466 static void XMLCALL
467 notationDecl(void *userData, const XML_Char *notationName, const XML_Char *base,
468              const XML_Char *systemId, const XML_Char *publicId) {
469   XmlwfUserData *data = userData;
470   NotationList *entry = malloc(sizeof(NotationList));
471   const char *errorMessage = "Unable to store NOTATION for output\n";
472 
473   UNUSED_P(base);
474   if (entry == NULL) {
475     fputs(errorMessage, stderr);
476     return; /* Nothing we can really do about this */
477   }
478   entry->notationName = xcsdup(notationName);
479   if (entry->notationName == NULL) {
480     fputs(errorMessage, stderr);
481     free(entry);
482     return;
483   }
484   if (systemId != NULL) {
485     entry->systemId = xcsdup(systemId);
486     if (entry->systemId == NULL) {
487       fputs(errorMessage, stderr);
488       free((void *)entry->notationName);
489       free(entry);
490       return;
491     }
492   } else {
493     entry->systemId = NULL;
494   }
495   if (publicId != NULL) {
496     entry->publicId = xcsdup(publicId);
497     if (entry->publicId == NULL) {
498       fputs(errorMessage, stderr);
499       free((void *)entry->systemId); /* Safe if it's NULL */
500       free((void *)entry->notationName);
501       free(entry);
502       return;
503     }
504   } else {
505     entry->publicId = NULL;
506   }
507 
508   entry->next = data->notationListHead;
509   data->notationListHead = entry;
510 }
511 
512 #endif /* not W3C14N */
513 
514 static void XMLCALL
515 defaultCharacterData(void *userData, const XML_Char *s, int len) {
516   UNUSED_P(s);
517   UNUSED_P(len);
518   XML_DefaultCurrent(userData);
519 }
520 
521 static void XMLCALL
522 defaultStartElement(void *userData, const XML_Char *name,
523                     const XML_Char **atts) {
524   UNUSED_P(name);
525   UNUSED_P(atts);
526   XML_DefaultCurrent(userData);
527 }
528 
529 static void XMLCALL
530 defaultEndElement(void *userData, const XML_Char *name) {
531   UNUSED_P(name);
532   XML_DefaultCurrent(userData);
533 }
534 
535 static void XMLCALL
536 defaultProcessingInstruction(void *userData, const XML_Char *target,
537                              const XML_Char *data) {
538   UNUSED_P(target);
539   UNUSED_P(data);
540   XML_DefaultCurrent(userData);
541 }
542 
543 static void XMLCALL
544 nopCharacterData(void *userData, const XML_Char *s, int len) {
545   UNUSED_P(userData);
546   UNUSED_P(s);
547   UNUSED_P(len);
548 }
549 
550 static void XMLCALL
551 nopStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
552   UNUSED_P(userData);
553   UNUSED_P(name);
554   UNUSED_P(atts);
555 }
556 
557 static void XMLCALL
558 nopEndElement(void *userData, const XML_Char *name) {
559   UNUSED_P(userData);
560   UNUSED_P(name);
561 }
562 
563 static void XMLCALL
564 nopProcessingInstruction(void *userData, const XML_Char *target,
565                          const XML_Char *data) {
566   UNUSED_P(userData);
567   UNUSED_P(target);
568   UNUSED_P(data);
569 }
570 
571 static void XMLCALL
572 markup(void *userData, const XML_Char *s, int len) {
573   FILE *fp = ((XmlwfUserData *)XML_GetUserData(userData))->fp;
574   for (; len > 0; --len, ++s)
575     puttc(*s, fp);
576 }
577 
578 static void
579 metaLocation(XML_Parser parser) {
580   const XML_Char *uri = XML_GetBase(parser);
581   FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp;
582   if (uri) {
583     fputts(T(" uri=\""), fp);
584     characterData(XML_GetUserData(parser), uri, (int)tcslen(uri));
585     puttc(T('"'), fp);
586   }
587   ftprintf(fp,
588            T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"") T(" nbytes=\"%d\"")
589                T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"") T(" col=\"%")
590                    T(XML_FMT_INT_MOD) T("u\""),
591            XML_GetCurrentByteIndex(parser), XML_GetCurrentByteCount(parser),
592            XML_GetCurrentLineNumber(parser),
593            XML_GetCurrentColumnNumber(parser));
594 }
595 
596 static void
597 metaStartDocument(void *userData) {
598   fputts(T("<document>\n"), ((XmlwfUserData *)XML_GetUserData(userData))->fp);
599 }
600 
601 static void
602 metaEndDocument(void *userData) {
603   fputts(T("</document>\n"), ((XmlwfUserData *)XML_GetUserData(userData))->fp);
604 }
605 
606 static void XMLCALL
607 metaStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
608   XML_Parser parser = userData;
609   XmlwfUserData *data = XML_GetUserData(parser);
610   FILE *fp = data->fp;
611   const XML_Char **specifiedAttsEnd
612       = atts + XML_GetSpecifiedAttributeCount(parser);
613   const XML_Char **idAttPtr;
614   int idAttIndex = XML_GetIdAttributeIndex(parser);
615   if (idAttIndex < 0)
616     idAttPtr = 0;
617   else
618     idAttPtr = atts + idAttIndex;
619 
620   fputts(T("<starttag name=\""), fp);
621   characterData(data, name, (int)tcslen(name));
622   puttc(T('"'), fp);
623   metaLocation(parser);
624   if (*atts) {
625     fputts(T(">\n"), fp);
626     do {
627       fputts(T("<attribute name=\""), fp);
628       characterData(data, atts[0], (int)tcslen(atts[0]));
629       fputts(T("\" value=\""), fp);
630       characterData(data, atts[1], (int)tcslen(atts[1]));
631       if (atts >= specifiedAttsEnd)
632         fputts(T("\" defaulted=\"yes\"/>\n"), fp);
633       else if (atts == idAttPtr)
634         fputts(T("\" id=\"yes\"/>\n"), fp);
635       else
636         fputts(T("\"/>\n"), fp);
637     } while (*(atts += 2));
638     fputts(T("</starttag>\n"), fp);
639   } else
640     fputts(T("/>\n"), fp);
641 }
642 
643 static void XMLCALL
644 metaEndElement(void *userData, const XML_Char *name) {
645   XML_Parser parser = userData;
646   XmlwfUserData *data = XML_GetUserData(parser);
647   FILE *fp = data->fp;
648   fputts(T("<endtag name=\""), fp);
649   characterData(data, name, (int)tcslen(name));
650   puttc(T('"'), fp);
651   metaLocation(parser);
652   fputts(T("/>\n"), fp);
653 }
654 
655 static void XMLCALL
656 metaProcessingInstruction(void *userData, const XML_Char *target,
657                           const XML_Char *data) {
658   XML_Parser parser = userData;
659   XmlwfUserData *usrData = XML_GetUserData(parser);
660   FILE *fp = usrData->fp;
661   ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
662   characterData(usrData, data, (int)tcslen(data));
663   puttc(T('"'), fp);
664   metaLocation(parser);
665   fputts(T("/>\n"), fp);
666 }
667 
668 static void XMLCALL
669 metaComment(void *userData, const XML_Char *data) {
670   XML_Parser parser = userData;
671   XmlwfUserData *usrData = XML_GetUserData(parser);
672   FILE *fp = usrData->fp;
673   fputts(T("<comment data=\""), fp);
674   characterData(usrData, data, (int)tcslen(data));
675   puttc(T('"'), fp);
676   metaLocation(parser);
677   fputts(T("/>\n"), fp);
678 }
679 
680 static void XMLCALL
681 metaStartCdataSection(void *userData) {
682   XML_Parser parser = userData;
683   XmlwfUserData *data = XML_GetUserData(parser);
684   FILE *fp = data->fp;
685   fputts(T("<startcdata"), fp);
686   metaLocation(parser);
687   fputts(T("/>\n"), fp);
688 }
689 
690 static void XMLCALL
691 metaEndCdataSection(void *userData) {
692   XML_Parser parser = userData;
693   XmlwfUserData *data = XML_GetUserData(parser);
694   FILE *fp = data->fp;
695   fputts(T("<endcdata"), fp);
696   metaLocation(parser);
697   fputts(T("/>\n"), fp);
698 }
699 
700 static void XMLCALL
701 metaCharacterData(void *userData, const XML_Char *s, int len) {
702   XML_Parser parser = userData;
703   XmlwfUserData *data = XML_GetUserData(parser);
704   FILE *fp = data->fp;
705   fputts(T("<chars str=\""), fp);
706   characterData(data, s, len);
707   puttc(T('"'), fp);
708   metaLocation(parser);
709   fputts(T("/>\n"), fp);
710 }
711 
712 static void XMLCALL
713 metaStartDoctypeDecl(void *userData, const XML_Char *doctypeName,
714                      const XML_Char *sysid, const XML_Char *pubid,
715                      int has_internal_subset) {
716   XML_Parser parser = userData;
717   XmlwfUserData *data = XML_GetUserData(parser);
718   FILE *fp = data->fp;
719   UNUSED_P(sysid);
720   UNUSED_P(pubid);
721   UNUSED_P(has_internal_subset);
722   ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName);
723   metaLocation(parser);
724   fputts(T("/>\n"), fp);
725 }
726 
727 static void XMLCALL
728 metaEndDoctypeDecl(void *userData) {
729   XML_Parser parser = userData;
730   XmlwfUserData *data = XML_GetUserData(parser);
731   FILE *fp = data->fp;
732   fputts(T("<enddoctype"), fp);
733   metaLocation(parser);
734   fputts(T("/>\n"), fp);
735 }
736 
737 static void XMLCALL
738 metaNotationDecl(void *userData, const XML_Char *notationName,
739                  const XML_Char *base, const XML_Char *systemId,
740                  const XML_Char *publicId) {
741   XML_Parser parser = userData;
742   XmlwfUserData *data = XML_GetUserData(parser);
743   FILE *fp = data->fp;
744   UNUSED_P(base);
745   ftprintf(fp, T("<notation name=\"%s\""), notationName);
746   if (publicId)
747     ftprintf(fp, T(" public=\"%s\""), publicId);
748   if (systemId) {
749     fputts(T(" system=\""), fp);
750     characterData(data, systemId, (int)tcslen(systemId));
751     puttc(T('"'), fp);
752   }
753   metaLocation(parser);
754   fputts(T("/>\n"), fp);
755 }
756 
757 static void XMLCALL
758 metaEntityDecl(void *userData, const XML_Char *entityName, int is_param,
759                const XML_Char *value, int value_length, const XML_Char *base,
760                const XML_Char *systemId, const XML_Char *publicId,
761                const XML_Char *notationName) {
762   XML_Parser parser = userData;
763   XmlwfUserData *data = XML_GetUserData(parser);
764   FILE *fp = data->fp;
765 
766   UNUSED_P(is_param);
767   UNUSED_P(base);
768   if (value) {
769     ftprintf(fp, T("<entity name=\"%s\""), entityName);
770     metaLocation(parser);
771     puttc(T('>'), fp);
772     characterData(data, value, value_length);
773     fputts(T("</entity/>\n"), fp);
774   } else if (notationName) {
775     ftprintf(fp, T("<entity name=\"%s\""), entityName);
776     if (publicId)
777       ftprintf(fp, T(" public=\"%s\""), publicId);
778     fputts(T(" system=\""), fp);
779     characterData(data, systemId, (int)tcslen(systemId));
780     puttc(T('"'), fp);
781     ftprintf(fp, T(" notation=\"%s\""), notationName);
782     metaLocation(parser);
783     fputts(T("/>\n"), fp);
784   } else {
785     ftprintf(fp, T("<entity name=\"%s\""), entityName);
786     if (publicId)
787       ftprintf(fp, T(" public=\"%s\""), publicId);
788     fputts(T(" system=\""), fp);
789     characterData(data, systemId, (int)tcslen(systemId));
790     puttc(T('"'), fp);
791     metaLocation(parser);
792     fputts(T("/>\n"), fp);
793   }
794 }
795 
796 static void XMLCALL
797 metaStartNamespaceDecl(void *userData, const XML_Char *prefix,
798                        const XML_Char *uri) {
799   XML_Parser parser = userData;
800   XmlwfUserData *data = XML_GetUserData(parser);
801   FILE *fp = data->fp;
802   fputts(T("<startns"), fp);
803   if (prefix)
804     ftprintf(fp, T(" prefix=\"%s\""), prefix);
805   if (uri) {
806     fputts(T(" ns=\""), fp);
807     characterData(data, uri, (int)tcslen(uri));
808     fputts(T("\"/>\n"), fp);
809   } else
810     fputts(T("/>\n"), fp);
811 }
812 
813 static void XMLCALL
814 metaEndNamespaceDecl(void *userData, const XML_Char *prefix) {
815   XML_Parser parser = userData;
816   XmlwfUserData *data = XML_GetUserData(parser);
817   FILE *fp = data->fp;
818   if (! prefix)
819     fputts(T("<endns/>\n"), fp);
820   else
821     ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix);
822 }
823 
824 static int XMLCALL
825 unknownEncodingConvert(void *data, const char *p) {
826   return codepageConvert(*(int *)data, p);
827 }
828 
829 static int XMLCALL
830 unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info) {
831   int cp;
832   static const XML_Char prefixL[] = T("windows-");
833   static const XML_Char prefixU[] = T("WINDOWS-");
834   int i;
835 
836   UNUSED_P(userData);
837   for (i = 0; prefixU[i]; i++)
838     if (name[i] != prefixU[i] && name[i] != prefixL[i])
839       return 0;
840 
841   cp = 0;
842   for (; name[i]; i++) {
843     static const XML_Char digits[] = T("0123456789");
844     const XML_Char *s = tcschr(digits, name[i]);
845     if (! s)
846       return 0;
847     cp *= 10;
848     cp += (int)(s - digits);
849     if (cp >= 0x10000)
850       return 0;
851   }
852   if (! codepageMap(cp, info->map))
853     return 0;
854   info->convert = unknownEncodingConvert;
855   /* We could just cast the code page integer to a void *,
856   and avoid the use of release. */
857   info->release = free;
858   info->data = malloc(sizeof(int));
859   if (! info->data)
860     return 0;
861   *(int *)info->data = cp;
862   return 1;
863 }
864 
865 static int XMLCALL
866 notStandalone(void *userData) {
867   UNUSED_P(userData);
868   return 0;
869 }
870 
871 static void
872 showVersion(XML_Char *prog) {
873   XML_Char *s = prog;
874   XML_Char ch;
875   const XML_Feature *features = XML_GetFeatureList();
876   while ((ch = *s) != 0) {
877     if (ch == '/'
878 #if defined(_WIN32)
879         || ch == '\\'
880 #endif
881     )
882       prog = s + 1;
883     ++s;
884   }
885   ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion());
886   if (features != NULL && features[0].feature != XML_FEATURE_END) {
887     int i = 1;
888     ftprintf(stdout, T("%s"), features[0].name);
889     if (features[0].value)
890       ftprintf(stdout, T("=%ld"), features[0].value);
891     while (features[i].feature != XML_FEATURE_END) {
892       ftprintf(stdout, T(", %s"), features[i].name);
893       if (features[i].value)
894         ftprintf(stdout, T("=%ld"), features[i].value);
895       ++i;
896     }
897     ftprintf(stdout, T("\n"));
898   }
899 }
900 
901 #if defined(__GNUC__)
902 __attribute__((noreturn))
903 #endif
904 static void
905 usage(const XML_Char *prog, int rc) {
906   ftprintf(
907       stderr,
908       /* Generated with:
909        * $ xmlwf/xmlwf_helpgen.sh
910        * To update, change xmlwf/xmlwf_helpgen.py, then paste the output of
911        * xmlwf/xmlwf_helpgen.sh in here.
912        */
913       /* clang-format off */
914       T("usage:\n")
915       T("  %s [OPTIONS] [FILE ...]\n")
916       T("  %s -h|--help\n")
917       T("  %s -v|--version\n")
918       T("\n")
919       T("xmlwf - Determines if an XML document is well-formed\n")
920       T("\n")
921       T("positional arguments:\n")
922       T("  FILE           file to process (default: STDIN)\n")
923       T("\n")
924       T("input control arguments:\n")
925       T("  -s             print an error if the document is not [s]tandalone\n")
926       T("  -n             enable [n]amespace processing\n")
927       T("  -p             enable processing of external DTDs and [p]arameter entities\n")
928       T("  -x             enable processing of e[x]ternal entities\n")
929       T("                 (CAREFUL! This makes xmlwf vulnerable to external entity attacks (XXE).)\n")
930       T("  -e ENCODING    override any in-document [e]ncoding declaration\n")
931       T("  -w             enable support for [W]indows code pages\n")
932       T("  -r             disable memory-mapping and use [r]ead calls instead\n")
933       T("  -g BYTES       buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)\n")
934       T("  -k             when processing multiple files, [k]eep processing after first file with error\n")
935       T("\n")
936       T("output control arguments:\n")
937       T("  -d DIRECTORY   output [d]estination directory\n")
938       T("  -c             write a [c]opy of input XML, not canonical XML\n")
939       T("  -m             write [m]eta XML, not canonical XML\n")
940       T("  -t             write no XML output for [t]iming of plain parsing\n")
941       T("  -N             enable adding doctype and [n]otation declarations\n")
942       T("\n")
943       T("amplification attack protection (e.g. billion laughs):\n")
944       T("  NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n")
945       T("\n")
946       T("  -a FACTOR      set maximum tolerated [a]mplification factor (default: 100.0)\n")
947       T("  -b BYTES       set number of output [b]ytes needed to activate (default: 8 MiB/64 MiB)\n")
948       T("\n")
949       T("reparse deferral:\n")
950       T("  -q             disable reparse deferral, and allow [q]uadratic parse runtime with large tokens\n")
951       T("\n")
952       T("info arguments:\n")
953       T("  -h, --help     show this [h]elp message and exit\n")
954       T("  -v, --version  show program's [v]ersion number and exit\n")
955       T("\n")
956       T("environment variables:\n")
957       T("  EXPAT_ACCOUNTING_DEBUG=(0|1|2|3)\n")
958       T("                 Control verbosity of accounting debugging (default: 0)\n")
959       T("  EXPAT_ENTITY_DEBUG=(0|1|2)\n")
960       T("                 Control verbosity of entity debugging (default: 0)\n")
961       T("  EXPAT_ENTROPY_DEBUG=(0|1)\n")
962       T("                 Control verbosity of entropy debugging (default: 0)\n")
963       T("  EXPAT_MALLOC_DEBUG=(0|1|2)\n")
964       T("                 Control verbosity of allocation tracker (default: 0)\n")
965       T("\n")
966       T("exit status:\n")
967       T("  0              the input files are well-formed and the output (if requested) was written successfully\n")
968       T("  1              could not allocate data structures, signals a serious problem with execution environment\n")
969       T("  2              one or more input files were not well-formed\n")
970       T("  3              could not create an output file\n")
971       T("  4              command-line argument error\n")
972       T("\n")
973       T("xmlwf of libexpat is software libre, licensed under the MIT license.\n")
974       T("Please report bugs at https://github.com/libexpat/libexpat/issues -- thank you!\n")
975       , /* clang-format on */
976       prog, prog, prog);
977   exit(rc);
978 }
979 
980 #if defined(__MINGW32__) && defined(XML_UNICODE)
981 /* Silence warning about missing prototype */
982 int wmain(int argc, XML_Char **argv);
983 #endif
984 
985 #define XMLWF_SHIFT_ARG_INTO(constCharStarTarget, argc, argv, i, j)            \
986   {                                                                            \
987     if (argv[i][j + 1] == T('\0')) {                                           \
988       if (++i == argc) {                                                       \
989         usage(argv[0], XMLWF_EXIT_USAGE_ERROR);                                \
990         /* usage called exit(..), never gets here */                           \
991       }                                                                        \
992       constCharStarTarget = argv[i];                                           \
993     } else {                                                                   \
994       constCharStarTarget = argv[i] + j + 1;                                   \
995     }                                                                          \
996     i++;                                                                       \
997     j = 0;                                                                     \
998   }
999 
1000 int
1001 tmain(int argc, XML_Char **argv) {
1002   int i, j;
1003   const XML_Char *outputDir = NULL;
1004   const XML_Char *encoding = NULL;
1005   unsigned processFlags = XML_MAP_FILE;
1006   int windowsCodePages = 0;
1007   int outputType = 0;
1008   int useNamespaces = 0;
1009   int requireStandalone = 0;
1010   int requiresNotations = 0;
1011   int continueOnError = 0;
1012 
1013   float attackMaximumAmplification = -1.0f; /* signaling "not set" */
1014   unsigned long long attackThresholdBytes = 0;
1015   XML_Bool attackThresholdGiven = XML_FALSE;
1016 
1017   XML_Bool disableDeferral = XML_FALSE;
1018 
1019   int exitCode = XMLWF_EXIT_SUCCESS;
1020   enum XML_ParamEntityParsing paramEntityParsing
1021       = XML_PARAM_ENTITY_PARSING_NEVER;
1022   int useStdin = 0;
1023   XmlwfUserData userData = {NULL, NULL, NULL};
1024 
1025 #ifdef _MSC_VER
1026   _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
1027 #endif
1028 
1029   i = 1;
1030   j = 0;
1031   while (i < argc) {
1032     if (j == 0) {
1033       if (argv[i][0] != T('-'))
1034         break;
1035       if (argv[i][1] == T('-')) {
1036         if (argv[i][2] == T('\0')) {
1037           i++;
1038           break;
1039         } else if (tcscmp(argv[i] + 2, T("help")) == 0) {
1040           usage(argv[0], XMLWF_EXIT_SUCCESS);
1041           // usage called exit(..), never gets here
1042         } else if (tcscmp(argv[i] + 2, T("version")) == 0) {
1043           showVersion(argv[0]);
1044           return XMLWF_EXIT_SUCCESS;
1045         }
1046       }
1047       j++;
1048     }
1049     switch (argv[i][j]) {
1050     case T('r'):
1051       processFlags &= ~XML_MAP_FILE;
1052       j++;
1053       break;
1054     case T('s'):
1055       requireStandalone = 1;
1056       j++;
1057       break;
1058     case T('n'):
1059       useNamespaces = 1;
1060       j++;
1061       break;
1062     case T('p'):
1063       paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS;
1064       EXPAT_FALLTHROUGH;
1065     case T('x'):
1066       processFlags |= XML_EXTERNAL_ENTITIES;
1067       j++;
1068       break;
1069     case T('w'):
1070       windowsCodePages = 1;
1071       j++;
1072       break;
1073     case T('m'):
1074       outputType = 'm';
1075       j++;
1076       break;
1077     case T('c'):
1078       outputType = 'c';
1079       useNamespaces = 0;
1080       j++;
1081       break;
1082     case T('t'):
1083       outputType = 't';
1084       j++;
1085       break;
1086     case T('N'):
1087       requiresNotations = 1;
1088       j++;
1089       break;
1090     case T('d'):
1091       XMLWF_SHIFT_ARG_INTO(outputDir, argc, argv, i, j);
1092       break;
1093     case T('e'):
1094       XMLWF_SHIFT_ARG_INTO(encoding, argc, argv, i, j);
1095       break;
1096     case T('h'):
1097       usage(argv[0], XMLWF_EXIT_SUCCESS);
1098       // usage called exit(..), never gets here
1099     case T('v'):
1100       showVersion(argv[0]);
1101       return XMLWF_EXIT_SUCCESS;
1102     case T('g'): {
1103       const XML_Char *valueText = NULL;
1104       XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1105 
1106       errno = 0;
1107       XML_Char *afterValueText = (XML_Char *)valueText;
1108       const long long read_size_bytes_candidate
1109           = tcstoull(valueText, &afterValueText, 10);
1110       if ((errno != 0) || (afterValueText[0] != T('\0'))
1111           || (read_size_bytes_candidate < 1)
1112           || (read_size_bytes_candidate > (INT_MAX / 2 + 1))) {
1113         // This prevents tperror(..) from reporting misleading "[..]: Success"
1114         errno = ERANGE;
1115         tperror(T("invalid buffer size") T(
1116             " (needs an integer from 1 to INT_MAX/2+1 i.e. 1,073,741,824 on most platforms)"));
1117         exit(XMLWF_EXIT_USAGE_ERROR);
1118       }
1119       g_read_size_bytes = (int)read_size_bytes_candidate;
1120       break;
1121     }
1122     case T('k'):
1123       continueOnError = 1;
1124       j++;
1125       break;
1126     case T('a'): {
1127       const XML_Char *valueText = NULL;
1128       XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1129 
1130       errno = 0;
1131       XML_Char *afterValueText = NULL;
1132       attackMaximumAmplification = tcstof(valueText, &afterValueText);
1133       if ((errno != 0) || (afterValueText[0] != T('\0'))
1134           || isnan(attackMaximumAmplification)
1135           || (attackMaximumAmplification < 1.0f)) {
1136         // This prevents tperror(..) from reporting misleading "[..]: Success"
1137         errno = ERANGE;
1138         tperror(T("invalid amplification limit") T(
1139             " (needs a floating point number greater or equal than 1.0)"));
1140         exit(XMLWF_EXIT_USAGE_ERROR);
1141       }
1142 #if XML_GE == 0
1143       ftprintf(stderr,
1144                T("Warning: Given amplification limit ignored")
1145                    T(", xmlwf has been compiled without DTD/GE support.\n"));
1146 #endif
1147       break;
1148     }
1149     case T('b'): {
1150       const XML_Char *valueText = NULL;
1151       XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1152 
1153       errno = 0;
1154       XML_Char *afterValueText = (XML_Char *)valueText;
1155       attackThresholdBytes = tcstoull(valueText, &afterValueText, 10);
1156       if ((errno != 0) || (afterValueText[0] != T('\0'))) {
1157         // This prevents tperror(..) from reporting misleading "[..]: Success"
1158         errno = ERANGE;
1159         tperror(T("invalid ignore threshold")
1160                     T(" (needs an integer from 0 to 2^64-1)"));
1161         exit(XMLWF_EXIT_USAGE_ERROR);
1162       }
1163       attackThresholdGiven = XML_TRUE;
1164 #if XML_GE == 0
1165       ftprintf(stderr,
1166                T("Warning: Given attack threshold ignored")
1167                    T(", xmlwf has been compiled without DTD/GE support.\n"));
1168 #endif
1169       break;
1170     }
1171     case T('q'): {
1172       disableDeferral = XML_TRUE;
1173       j++;
1174       break;
1175     }
1176     case T('\0'):
1177       if (j > 1) {
1178         i++;
1179         j = 0;
1180         break;
1181       }
1182       EXPAT_FALLTHROUGH;
1183     default:
1184       usage(argv[0], XMLWF_EXIT_USAGE_ERROR);
1185       // usage called exit(..), never gets here
1186     }
1187   }
1188   if (i == argc) {
1189     useStdin = 1;
1190     processFlags &= ~XML_MAP_FILE;
1191     i--;
1192   }
1193   for (; i < argc; i++) {
1194     XML_Char *outName = 0;
1195     int result;
1196     XML_Parser parser;
1197     if (useNamespaces)
1198       parser = XML_ParserCreateNS(encoding, NSSEP);
1199     else
1200       parser = XML_ParserCreate(encoding);
1201 
1202     if (! parser) {
1203       tperror(T("Could not instantiate parser"));
1204       exit(XMLWF_EXIT_INTERNAL_ERROR);
1205     }
1206 
1207     if (attackMaximumAmplification != -1.0f) {
1208 #if XML_GE == 1
1209       XML_SetBillionLaughsAttackProtectionMaximumAmplification(
1210           parser, attackMaximumAmplification);
1211       XML_SetAllocTrackerMaximumAmplification(parser,
1212                                               attackMaximumAmplification);
1213 #endif
1214     }
1215     if (attackThresholdGiven) {
1216 #if XML_GE == 1
1217       XML_SetBillionLaughsAttackProtectionActivationThreshold(
1218           parser, attackThresholdBytes);
1219       XML_SetAllocTrackerActivationThreshold(parser, attackThresholdBytes);
1220 #else
1221       (void)attackThresholdBytes; // silence -Wunused-but-set-variable
1222 #endif
1223     }
1224 
1225     if (disableDeferral) {
1226       const XML_Bool success = XML_SetReparseDeferralEnabled(parser, XML_FALSE);
1227       if (! success) {
1228         // This prevents tperror(..) from reporting misleading "[..]: Success"
1229         errno = EINVAL;
1230         tperror(T("Failed to disable reparse deferral"));
1231         exit(XMLWF_EXIT_INTERNAL_ERROR);
1232       }
1233     }
1234 
1235     if (requireStandalone)
1236       XML_SetNotStandaloneHandler(parser, notStandalone);
1237     XML_SetParamEntityParsing(parser, paramEntityParsing);
1238     if (outputType == 't') {
1239       /* This is for doing timings; this gives a more realistic estimate of
1240          the parsing time. */
1241       outputDir = 0;
1242       XML_SetElementHandler(parser, nopStartElement, nopEndElement);
1243       XML_SetCharacterDataHandler(parser, nopCharacterData);
1244       XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction);
1245     } else if (outputDir) {
1246       const XML_Char *delim = T("/");
1247       const XML_Char *file = useStdin ? T("STDIN") : argv[i];
1248       if (! useStdin) {
1249         /* Jump after last (back)slash */
1250         const XML_Char *lastDelim = tcsrchr(file, delim[0]);
1251         if (lastDelim)
1252           file = lastDelim + 1;
1253 #if defined(_WIN32)
1254         else {
1255           const XML_Char *winDelim = T("\\");
1256           lastDelim = tcsrchr(file, winDelim[0]);
1257           if (lastDelim) {
1258             file = lastDelim + 1;
1259             delim = winDelim;
1260           }
1261         }
1262 #endif
1263       }
1264       const size_t outputDirLen = tcslen(outputDir);
1265       const size_t fileLen = tcslen(file);
1266 
1267       /* Detect and prevent integer overflow in the addition (without
1268          risking underflow) and the multiplication, mirroring the guards
1269          in xcsdup() and resolveSystemId() */
1270       if (outputDirLen > SIZE_MAX - fileLen
1271           || outputDirLen > SIZE_MAX - fileLen - 2) {
1272         tperror(T("Could not allocate memory"));
1273         exit(XMLWF_EXIT_INTERNAL_ERROR);
1274       }
1275 
1276       const size_t charsRequired = outputDirLen + fileLen + 2;
1277 
1278       if (charsRequired > SIZE_MAX / sizeof(XML_Char)) {
1279         tperror(T("Could not allocate memory"));
1280         exit(XMLWF_EXIT_INTERNAL_ERROR);
1281       }
1282 
1283       outName = malloc(charsRequired * sizeof(XML_Char));
1284       if (! outName) {
1285         tperror(T("Could not allocate memory"));
1286         exit(XMLWF_EXIT_INTERNAL_ERROR);
1287       }
1288       tcscpy(outName, outputDir);
1289       tcscat(outName, delim);
1290       tcscat(outName, file);
1291       userData.fp = tfopen(outName, T("wb"));
1292       if (! userData.fp) {
1293         tperror(outName);
1294         exitCode = XMLWF_EXIT_OUTPUT_ERROR;
1295         free(outName);
1296         XML_ParserFree(parser);
1297         if (continueOnError) {
1298           continue;
1299         } else {
1300           break;
1301         }
1302       }
1303       setvbuf(userData.fp, NULL, _IOFBF, 16384);
1304 #ifdef XML_UNICODE
1305       puttc(0xFEFF, userData.fp);
1306 #endif
1307       XML_SetUserData(parser, &userData);
1308       switch (outputType) {
1309       case 'm':
1310         XML_UseParserAsHandlerArg(parser);
1311         XML_SetElementHandler(parser, metaStartElement, metaEndElement);
1312         XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
1313         XML_SetCommentHandler(parser, metaComment);
1314         XML_SetCdataSectionHandler(parser, metaStartCdataSection,
1315                                    metaEndCdataSection);
1316         XML_SetCharacterDataHandler(parser, metaCharacterData);
1317         XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl,
1318                                   metaEndDoctypeDecl);
1319         XML_SetEntityDeclHandler(parser, metaEntityDecl);
1320         XML_SetNotationDeclHandler(parser, metaNotationDecl);
1321         XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl,
1322                                     metaEndNamespaceDecl);
1323         metaStartDocument(parser);
1324         break;
1325       case 'c':
1326         XML_UseParserAsHandlerArg(parser);
1327         XML_SetDefaultHandler(parser, markup);
1328         XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
1329         XML_SetCharacterDataHandler(parser, defaultCharacterData);
1330         XML_SetProcessingInstructionHandler(parser,
1331                                             defaultProcessingInstruction);
1332         break;
1333       default:
1334         if (useNamespaces)
1335           XML_SetElementHandler(parser, startElementNS, endElementNS);
1336         else
1337           XML_SetElementHandler(parser, startElement, endElement);
1338         XML_SetCharacterDataHandler(parser, characterData);
1339 #ifndef W3C14N
1340         XML_SetProcessingInstructionHandler(parser, processingInstruction);
1341         if (requiresNotations) {
1342           XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl);
1343           XML_SetNotationDeclHandler(parser, notationDecl);
1344         }
1345 #endif /* not W3C14N */
1346         break;
1347       }
1348     }
1349     if (windowsCodePages)
1350       XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
1351     result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags);
1352     if (outputDir) {
1353       if (outputType == 'm')
1354         metaEndDocument(parser);
1355       fclose(userData.fp);
1356       if (! result) {
1357         tremove(outName);
1358       }
1359       free(outName);
1360     }
1361     XML_ParserFree(parser);
1362     if (! result) {
1363       exitCode = XMLWF_EXIT_NOT_WELLFORMED;
1364       cleanupUserData(&userData);
1365       if (! continueOnError) {
1366         break;
1367       }
1368     }
1369   }
1370   return exitCode;
1371 }
1372