xref: /freebsd/contrib/expat/xmlwf/xmlwf.c (revision d4eeb02986980bf33dd56c41ceb9fc5f180c0d47)
1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12    Copyright (c) 2004-2009 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
14    Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
15    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
16    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
17    Copyright (c) 2020      Joe Orton <jorton@redhat.com>
18    Copyright (c) 2020      Kleber Tarcísio <klebertarcisio@yahoo.com.br>
19    Copyright (c) 2021      Tim Bray <tbray@textuality.com>
20    Licensed under the MIT license:
21 
22    Permission is  hereby granted,  free of charge,  to any  person obtaining
23    a  copy  of  this  software   and  associated  documentation  files  (the
24    "Software"),  to  deal in  the  Software  without restriction,  including
25    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
26    distribute, sublicense, and/or sell copies of the Software, and to permit
27    persons  to whom  the Software  is  furnished to  do so,  subject to  the
28    following conditions:
29 
30    The above copyright  notice and this permission notice  shall be included
31    in all copies or substantial portions of the Software.
32 
33    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
34    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
35    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
36    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
37    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
38    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
39    USE OR OTHER DEALINGS IN THE SOFTWARE.
40 */
41 
42 #include <expat_config.h>
43 
44 #include <assert.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <stddef.h>
48 #include <string.h>
49 #include <math.h> /* for isnan */
50 #include <errno.h>
51 
52 #include "expat.h"
53 #include "codepage.h"
54 #include "internal.h" /* for UNUSED_P only */
55 #include "xmlfile.h"
56 #include "xmltchar.h"
57 
58 #ifdef _MSC_VER
59 #  include <crtdbg.h>
60 #endif
61 
62 #ifdef XML_UNICODE
63 #  include <wchar.h>
64 #endif
65 
66 enum ExitCode {
67   XMLWF_EXIT_SUCCESS = 0,
68   XMLWF_EXIT_INTERNAL_ERROR = 1,
69   XMLWF_EXIT_NOT_WELLFORMED = 2,
70   XMLWF_EXIT_OUTPUT_ERROR = 3,
71   XMLWF_EXIT_USAGE_ERROR = 4,
72 };
73 
74 /* Structures for handler user data */
75 typedef struct NotationList {
76   struct NotationList *next;
77   const XML_Char *notationName;
78   const XML_Char *systemId;
79   const XML_Char *publicId;
80 } NotationList;
81 
82 typedef struct xmlwfUserData {
83   FILE *fp;
84   NotationList *notationListHead;
85   const XML_Char *currentDoctypeName;
86 } XmlwfUserData;
87 
88 /* This ensures proper sorting. */
89 
90 #define NSSEP T('\001')
91 
92 static void XMLCALL
93 characterData(void *userData, const XML_Char *s, int len) {
94   FILE *fp = ((XmlwfUserData *)userData)->fp;
95   for (; len > 0; --len, ++s) {
96     switch (*s) {
97     case T('&'):
98       fputts(T("&amp;"), fp);
99       break;
100     case T('<'):
101       fputts(T("&lt;"), fp);
102       break;
103     case T('>'):
104       fputts(T("&gt;"), fp);
105       break;
106 #ifdef W3C14N
107     case 13:
108       fputts(T("&#xD;"), fp);
109       break;
110 #else
111     case T('"'):
112       fputts(T("&quot;"), fp);
113       break;
114     case 9:
115     case 10:
116     case 13:
117       ftprintf(fp, T("&#%d;"), *s);
118       break;
119 #endif
120     default:
121       puttc(*s, fp);
122       break;
123     }
124   }
125 }
126 
127 static void
128 attributeValue(FILE *fp, const XML_Char *s) {
129   puttc(T('='), fp);
130   puttc(T('"'), fp);
131   assert(s);
132   for (;;) {
133     switch (*s) {
134     case 0:
135     case NSSEP:
136       puttc(T('"'), fp);
137       return;
138     case T('&'):
139       fputts(T("&amp;"), fp);
140       break;
141     case T('<'):
142       fputts(T("&lt;"), fp);
143       break;
144     case T('"'):
145       fputts(T("&quot;"), fp);
146       break;
147 #ifdef W3C14N
148     case 9:
149       fputts(T("&#x9;"), fp);
150       break;
151     case 10:
152       fputts(T("&#xA;"), fp);
153       break;
154     case 13:
155       fputts(T("&#xD;"), fp);
156       break;
157 #else
158     case T('>'):
159       fputts(T("&gt;"), fp);
160       break;
161     case 9:
162     case 10:
163     case 13:
164       ftprintf(fp, T("&#%d;"), *s);
165       break;
166 #endif
167     default:
168       puttc(*s, fp);
169       break;
170     }
171     s++;
172   }
173 }
174 
175 /* Lexicographically comparing UTF-8 encoded attribute values,
176 is equivalent to lexicographically comparing based on the character number. */
177 
178 static int
179 attcmp(const void *att1, const void *att2) {
180   return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2);
181 }
182 
183 static void XMLCALL
184 startElement(void *userData, const XML_Char *name, const XML_Char **atts) {
185   int nAtts;
186   const XML_Char **p;
187   FILE *fp = ((XmlwfUserData *)userData)->fp;
188   puttc(T('<'), fp);
189   fputts(name, fp);
190 
191   p = atts;
192   while (*p)
193     ++p;
194   nAtts = (int)((p - atts) >> 1);
195   if (nAtts > 1)
196     qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
197   while (*atts) {
198     puttc(T(' '), fp);
199     fputts(*atts++, fp);
200     attributeValue(fp, *atts);
201     atts++;
202   }
203   puttc(T('>'), fp);
204 }
205 
206 static void XMLCALL
207 endElement(void *userData, const XML_Char *name) {
208   FILE *fp = ((XmlwfUserData *)userData)->fp;
209   puttc(T('<'), fp);
210   puttc(T('/'), fp);
211   fputts(name, fp);
212   puttc(T('>'), fp);
213 }
214 
215 static int
216 nsattcmp(const void *p1, const void *p2) {
217   const XML_Char *att1 = *(const XML_Char **)p1;
218   const XML_Char *att2 = *(const XML_Char **)p2;
219   int sep1 = (tcsrchr(att1, NSSEP) != 0);
220   int sep2 = (tcsrchr(att1, NSSEP) != 0);
221   if (sep1 != sep2)
222     return sep1 - sep2;
223   return tcscmp(att1, att2);
224 }
225 
226 static void XMLCALL
227 startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) {
228   int nAtts;
229   int nsi;
230   const XML_Char **p;
231   FILE *fp = ((XmlwfUserData *)userData)->fp;
232   const XML_Char *sep;
233   puttc(T('<'), fp);
234 
235   sep = tcsrchr(name, NSSEP);
236   if (sep) {
237     fputts(T("n1:"), fp);
238     fputts(sep + 1, fp);
239     fputts(T(" xmlns:n1"), fp);
240     attributeValue(fp, name);
241     nsi = 2;
242   } else {
243     fputts(name, fp);
244     nsi = 1;
245   }
246 
247   p = atts;
248   while (*p)
249     ++p;
250   nAtts = (int)((p - atts) >> 1);
251   if (nAtts > 1)
252     qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp);
253   while (*atts) {
254     name = *atts++;
255     sep = tcsrchr(name, NSSEP);
256     puttc(T(' '), fp);
257     if (sep) {
258       ftprintf(fp, T("n%d:"), nsi);
259       fputts(sep + 1, fp);
260     } else
261       fputts(name, fp);
262     attributeValue(fp, *atts);
263     if (sep) {
264       ftprintf(fp, T(" xmlns:n%d"), nsi++);
265       attributeValue(fp, name);
266     }
267     atts++;
268   }
269   puttc(T('>'), fp);
270 }
271 
272 static void XMLCALL
273 endElementNS(void *userData, const XML_Char *name) {
274   FILE *fp = ((XmlwfUserData *)userData)->fp;
275   const XML_Char *sep;
276   puttc(T('<'), fp);
277   puttc(T('/'), fp);
278   sep = tcsrchr(name, NSSEP);
279   if (sep) {
280     fputts(T("n1:"), fp);
281     fputts(sep + 1, fp);
282   } else
283     fputts(name, fp);
284   puttc(T('>'), fp);
285 }
286 
287 #ifndef W3C14N
288 
289 static void XMLCALL
290 processingInstruction(void *userData, const XML_Char *target,
291                       const XML_Char *data) {
292   FILE *fp = ((XmlwfUserData *)userData)->fp;
293   puttc(T('<'), fp);
294   puttc(T('?'), fp);
295   fputts(target, fp);
296   puttc(T(' '), fp);
297   fputts(data, fp);
298   puttc(T('?'), fp);
299   puttc(T('>'), fp);
300 }
301 
302 static XML_Char *
303 xcsdup(const XML_Char *s) {
304   XML_Char *result;
305   int count = 0;
306   int numBytes;
307 
308   /* Get the length of the string, including terminator */
309   while (s[count++] != 0) {
310     /* Do nothing */
311   }
312   numBytes = count * sizeof(XML_Char);
313   result = malloc(numBytes);
314   if (result == NULL)
315     return NULL;
316   memcpy(result, s, numBytes);
317   return result;
318 }
319 
320 static void XMLCALL
321 startDoctypeDecl(void *userData, const XML_Char *doctypeName,
322                  const XML_Char *sysid, const XML_Char *publid,
323                  int has_internal_subset) {
324   XmlwfUserData *data = (XmlwfUserData *)userData;
325   UNUSED_P(sysid);
326   UNUSED_P(publid);
327   UNUSED_P(has_internal_subset);
328   data->currentDoctypeName = xcsdup(doctypeName);
329 }
330 
331 static void
332 freeNotations(XmlwfUserData *data) {
333   NotationList *notationListHead = data->notationListHead;
334 
335   while (notationListHead != NULL) {
336     NotationList *next = notationListHead->next;
337     free((void *)notationListHead->notationName);
338     free((void *)notationListHead->systemId);
339     free((void *)notationListHead->publicId);
340     free(notationListHead);
341     notationListHead = next;
342   }
343   data->notationListHead = NULL;
344 }
345 
346 static void
347 cleanupUserData(XmlwfUserData *userData) {
348   free((void *)userData->currentDoctypeName);
349   userData->currentDoctypeName = NULL;
350   freeNotations(userData);
351 }
352 
353 static int
354 xcscmp(const XML_Char *xs, const XML_Char *xt) {
355   while (*xs != 0 && *xt != 0) {
356     if (*xs < *xt)
357       return -1;
358     if (*xs > *xt)
359       return 1;
360     xs++;
361     xt++;
362   }
363   if (*xs < *xt)
364     return -1;
365   if (*xs > *xt)
366     return 1;
367   return 0;
368 }
369 
370 static int
371 notationCmp(const void *a, const void *b) {
372   const NotationList *const n1 = *(NotationList **)a;
373   const NotationList *const n2 = *(NotationList **)b;
374 
375   return xcscmp(n1->notationName, n2->notationName);
376 }
377 
378 static void XMLCALL
379 endDoctypeDecl(void *userData) {
380   XmlwfUserData *data = (XmlwfUserData *)userData;
381   NotationList **notations;
382   int notationCount = 0;
383   NotationList *p;
384   int i;
385 
386   /* How many notations do we have? */
387   for (p = data->notationListHead; p != NULL; p = p->next)
388     notationCount++;
389   if (notationCount == 0) {
390     /* Nothing to report */
391     free((void *)data->currentDoctypeName);
392     data->currentDoctypeName = NULL;
393     return;
394   }
395 
396   notations = malloc(notationCount * sizeof(NotationList *));
397   if (notations == NULL) {
398     fprintf(stderr, "Unable to sort notations");
399     freeNotations(data);
400     return;
401   }
402 
403   for (p = data->notationListHead, i = 0; i < notationCount; p = p->next, i++) {
404     notations[i] = p;
405   }
406   qsort(notations, notationCount, sizeof(NotationList *), notationCmp);
407 
408   /* Output the DOCTYPE header */
409   fputts(T("<!DOCTYPE "), data->fp);
410   fputts(data->currentDoctypeName, data->fp);
411   fputts(T(" [\n"), data->fp);
412 
413   /* Now the NOTATIONs */
414   for (i = 0; i < notationCount; i++) {
415     fputts(T("<!NOTATION "), data->fp);
416     fputts(notations[i]->notationName, data->fp);
417     if (notations[i]->publicId != NULL) {
418       fputts(T(" PUBLIC '"), data->fp);
419       fputts(notations[i]->publicId, data->fp);
420       puttc(T('\''), data->fp);
421       if (notations[i]->systemId != NULL) {
422         puttc(T(' '), data->fp);
423         puttc(T('\''), data->fp);
424         fputts(notations[i]->systemId, data->fp);
425         puttc(T('\''), data->fp);
426       }
427     } else if (notations[i]->systemId != NULL) {
428       fputts(T(" SYSTEM '"), data->fp);
429       fputts(notations[i]->systemId, data->fp);
430       puttc(T('\''), data->fp);
431     }
432     puttc(T('>'), data->fp);
433     puttc(T('\n'), data->fp);
434   }
435 
436   /* Finally end the DOCTYPE */
437   fputts(T("]>\n"), data->fp);
438 
439   free(notations);
440   freeNotations(data);
441   free((void *)data->currentDoctypeName);
442   data->currentDoctypeName = NULL;
443 }
444 
445 static void XMLCALL
446 notationDecl(void *userData, const XML_Char *notationName, const XML_Char *base,
447              const XML_Char *systemId, const XML_Char *publicId) {
448   XmlwfUserData *data = (XmlwfUserData *)userData;
449   NotationList *entry = malloc(sizeof(NotationList));
450   const char *errorMessage = "Unable to store NOTATION for output\n";
451 
452   UNUSED_P(base);
453   if (entry == NULL) {
454     fputs(errorMessage, stderr);
455     return; /* Nothing we can really do about this */
456   }
457   entry->notationName = xcsdup(notationName);
458   if (entry->notationName == NULL) {
459     fputs(errorMessage, stderr);
460     free(entry);
461     return;
462   }
463   if (systemId != NULL) {
464     entry->systemId = xcsdup(systemId);
465     if (entry->systemId == NULL) {
466       fputs(errorMessage, stderr);
467       free((void *)entry->notationName);
468       free(entry);
469       return;
470     }
471   } else {
472     entry->systemId = NULL;
473   }
474   if (publicId != NULL) {
475     entry->publicId = xcsdup(publicId);
476     if (entry->publicId == NULL) {
477       fputs(errorMessage, stderr);
478       free((void *)entry->systemId); /* Safe if it's NULL */
479       free((void *)entry->notationName);
480       free(entry);
481       return;
482     }
483   } else {
484     entry->publicId = NULL;
485   }
486 
487   entry->next = data->notationListHead;
488   data->notationListHead = entry;
489 }
490 
491 #endif /* not W3C14N */
492 
493 static void XMLCALL
494 defaultCharacterData(void *userData, const XML_Char *s, int len) {
495   UNUSED_P(s);
496   UNUSED_P(len);
497   XML_DefaultCurrent((XML_Parser)userData);
498 }
499 
500 static void XMLCALL
501 defaultStartElement(void *userData, const XML_Char *name,
502                     const XML_Char **atts) {
503   UNUSED_P(name);
504   UNUSED_P(atts);
505   XML_DefaultCurrent((XML_Parser)userData);
506 }
507 
508 static void XMLCALL
509 defaultEndElement(void *userData, const XML_Char *name) {
510   UNUSED_P(name);
511   XML_DefaultCurrent((XML_Parser)userData);
512 }
513 
514 static void XMLCALL
515 defaultProcessingInstruction(void *userData, const XML_Char *target,
516                              const XML_Char *data) {
517   UNUSED_P(target);
518   UNUSED_P(data);
519   XML_DefaultCurrent((XML_Parser)userData);
520 }
521 
522 static void XMLCALL
523 nopCharacterData(void *userData, const XML_Char *s, int len) {
524   UNUSED_P(userData);
525   UNUSED_P(s);
526   UNUSED_P(len);
527 }
528 
529 static void XMLCALL
530 nopStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
531   UNUSED_P(userData);
532   UNUSED_P(name);
533   UNUSED_P(atts);
534 }
535 
536 static void XMLCALL
537 nopEndElement(void *userData, const XML_Char *name) {
538   UNUSED_P(userData);
539   UNUSED_P(name);
540 }
541 
542 static void XMLCALL
543 nopProcessingInstruction(void *userData, const XML_Char *target,
544                          const XML_Char *data) {
545   UNUSED_P(userData);
546   UNUSED_P(target);
547   UNUSED_P(data);
548 }
549 
550 static void XMLCALL
551 markup(void *userData, const XML_Char *s, int len) {
552   FILE *fp = ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp;
553   for (; len > 0; --len, ++s)
554     puttc(*s, fp);
555 }
556 
557 static void
558 metaLocation(XML_Parser parser) {
559   const XML_Char *uri = XML_GetBase(parser);
560   FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp;
561   if (uri)
562     ftprintf(fp, T(" uri=\"%s\""), uri);
563   ftprintf(fp,
564            T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"") T(" nbytes=\"%d\"")
565                T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"") T(" col=\"%")
566                    T(XML_FMT_INT_MOD) T("u\""),
567            XML_GetCurrentByteIndex(parser), XML_GetCurrentByteCount(parser),
568            XML_GetCurrentLineNumber(parser),
569            XML_GetCurrentColumnNumber(parser));
570 }
571 
572 static void
573 metaStartDocument(void *userData) {
574   fputts(T("<document>\n"),
575          ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
576 }
577 
578 static void
579 metaEndDocument(void *userData) {
580   fputts(T("</document>\n"),
581          ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
582 }
583 
584 static void XMLCALL
585 metaStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
586   XML_Parser parser = (XML_Parser)userData;
587   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
588   FILE *fp = data->fp;
589   const XML_Char **specifiedAttsEnd
590       = atts + XML_GetSpecifiedAttributeCount(parser);
591   const XML_Char **idAttPtr;
592   int idAttIndex = XML_GetIdAttributeIndex(parser);
593   if (idAttIndex < 0)
594     idAttPtr = 0;
595   else
596     idAttPtr = atts + idAttIndex;
597 
598   ftprintf(fp, T("<starttag name=\"%s\""), name);
599   metaLocation(parser);
600   if (*atts) {
601     fputts(T(">\n"), fp);
602     do {
603       ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]);
604       characterData(data, atts[1], (int)tcslen(atts[1]));
605       if (atts >= specifiedAttsEnd)
606         fputts(T("\" defaulted=\"yes\"/>\n"), fp);
607       else if (atts == idAttPtr)
608         fputts(T("\" id=\"yes\"/>\n"), fp);
609       else
610         fputts(T("\"/>\n"), fp);
611     } while (*(atts += 2));
612     fputts(T("</starttag>\n"), fp);
613   } else
614     fputts(T("/>\n"), fp);
615 }
616 
617 static void XMLCALL
618 metaEndElement(void *userData, const XML_Char *name) {
619   XML_Parser parser = (XML_Parser)userData;
620   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
621   FILE *fp = data->fp;
622   ftprintf(fp, T("<endtag name=\"%s\""), name);
623   metaLocation(parser);
624   fputts(T("/>\n"), fp);
625 }
626 
627 static void XMLCALL
628 metaProcessingInstruction(void *userData, const XML_Char *target,
629                           const XML_Char *data) {
630   XML_Parser parser = (XML_Parser)userData;
631   XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
632   FILE *fp = usrData->fp;
633   ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
634   characterData(usrData, data, (int)tcslen(data));
635   puttc(T('"'), fp);
636   metaLocation(parser);
637   fputts(T("/>\n"), fp);
638 }
639 
640 static void XMLCALL
641 metaComment(void *userData, const XML_Char *data) {
642   XML_Parser parser = (XML_Parser)userData;
643   XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
644   FILE *fp = usrData->fp;
645   fputts(T("<comment data=\""), fp);
646   characterData(usrData, data, (int)tcslen(data));
647   puttc(T('"'), fp);
648   metaLocation(parser);
649   fputts(T("/>\n"), fp);
650 }
651 
652 static void XMLCALL
653 metaStartCdataSection(void *userData) {
654   XML_Parser parser = (XML_Parser)userData;
655   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
656   FILE *fp = data->fp;
657   fputts(T("<startcdata"), fp);
658   metaLocation(parser);
659   fputts(T("/>\n"), fp);
660 }
661 
662 static void XMLCALL
663 metaEndCdataSection(void *userData) {
664   XML_Parser parser = (XML_Parser)userData;
665   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
666   FILE *fp = data->fp;
667   fputts(T("<endcdata"), fp);
668   metaLocation(parser);
669   fputts(T("/>\n"), fp);
670 }
671 
672 static void XMLCALL
673 metaCharacterData(void *userData, const XML_Char *s, int len) {
674   XML_Parser parser = (XML_Parser)userData;
675   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
676   FILE *fp = data->fp;
677   fputts(T("<chars str=\""), fp);
678   characterData(data, s, len);
679   puttc(T('"'), fp);
680   metaLocation(parser);
681   fputts(T("/>\n"), fp);
682 }
683 
684 static void XMLCALL
685 metaStartDoctypeDecl(void *userData, const XML_Char *doctypeName,
686                      const XML_Char *sysid, const XML_Char *pubid,
687                      int has_internal_subset) {
688   XML_Parser parser = (XML_Parser)userData;
689   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
690   FILE *fp = data->fp;
691   UNUSED_P(sysid);
692   UNUSED_P(pubid);
693   UNUSED_P(has_internal_subset);
694   ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName);
695   metaLocation(parser);
696   fputts(T("/>\n"), fp);
697 }
698 
699 static void XMLCALL
700 metaEndDoctypeDecl(void *userData) {
701   XML_Parser parser = (XML_Parser)userData;
702   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
703   FILE *fp = data->fp;
704   fputts(T("<enddoctype"), fp);
705   metaLocation(parser);
706   fputts(T("/>\n"), fp);
707 }
708 
709 static void XMLCALL
710 metaNotationDecl(void *userData, const XML_Char *notationName,
711                  const XML_Char *base, const XML_Char *systemId,
712                  const XML_Char *publicId) {
713   XML_Parser parser = (XML_Parser)userData;
714   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
715   FILE *fp = data->fp;
716   UNUSED_P(base);
717   ftprintf(fp, T("<notation name=\"%s\""), notationName);
718   if (publicId)
719     ftprintf(fp, T(" public=\"%s\""), publicId);
720   if (systemId) {
721     fputts(T(" system=\""), fp);
722     characterData(data, systemId, (int)tcslen(systemId));
723     puttc(T('"'), fp);
724   }
725   metaLocation(parser);
726   fputts(T("/>\n"), fp);
727 }
728 
729 static void XMLCALL
730 metaEntityDecl(void *userData, const XML_Char *entityName, int is_param,
731                const XML_Char *value, int value_length, const XML_Char *base,
732                const XML_Char *systemId, const XML_Char *publicId,
733                const XML_Char *notationName) {
734   XML_Parser parser = (XML_Parser)userData;
735   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
736   FILE *fp = data->fp;
737 
738   UNUSED_P(is_param);
739   UNUSED_P(base);
740   if (value) {
741     ftprintf(fp, T("<entity name=\"%s\""), entityName);
742     metaLocation(parser);
743     puttc(T('>'), fp);
744     characterData(data, value, value_length);
745     fputts(T("</entity/>\n"), fp);
746   } else if (notationName) {
747     ftprintf(fp, T("<entity name=\"%s\""), entityName);
748     if (publicId)
749       ftprintf(fp, T(" public=\"%s\""), publicId);
750     fputts(T(" system=\""), fp);
751     characterData(data, systemId, (int)tcslen(systemId));
752     puttc(T('"'), fp);
753     ftprintf(fp, T(" notation=\"%s\""), notationName);
754     metaLocation(parser);
755     fputts(T("/>\n"), fp);
756   } else {
757     ftprintf(fp, T("<entity name=\"%s\""), entityName);
758     if (publicId)
759       ftprintf(fp, T(" public=\"%s\""), publicId);
760     fputts(T(" system=\""), fp);
761     characterData(data, systemId, (int)tcslen(systemId));
762     puttc(T('"'), fp);
763     metaLocation(parser);
764     fputts(T("/>\n"), fp);
765   }
766 }
767 
768 static void XMLCALL
769 metaStartNamespaceDecl(void *userData, const XML_Char *prefix,
770                        const XML_Char *uri) {
771   XML_Parser parser = (XML_Parser)userData;
772   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
773   FILE *fp = data->fp;
774   fputts(T("<startns"), fp);
775   if (prefix)
776     ftprintf(fp, T(" prefix=\"%s\""), prefix);
777   if (uri) {
778     fputts(T(" ns=\""), fp);
779     characterData(data, uri, (int)tcslen(uri));
780     fputts(T("\"/>\n"), fp);
781   } else
782     fputts(T("/>\n"), fp);
783 }
784 
785 static void XMLCALL
786 metaEndNamespaceDecl(void *userData, const XML_Char *prefix) {
787   XML_Parser parser = (XML_Parser)userData;
788   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
789   FILE *fp = data->fp;
790   if (! prefix)
791     fputts(T("<endns/>\n"), fp);
792   else
793     ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix);
794 }
795 
796 static int XMLCALL
797 unknownEncodingConvert(void *data, const char *p) {
798   return codepageConvert(*(int *)data, p);
799 }
800 
801 static int XMLCALL
802 unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info) {
803   int cp;
804   static const XML_Char prefixL[] = T("windows-");
805   static const XML_Char prefixU[] = T("WINDOWS-");
806   int i;
807 
808   UNUSED_P(userData);
809   for (i = 0; prefixU[i]; i++)
810     if (name[i] != prefixU[i] && name[i] != prefixL[i])
811       return 0;
812 
813   cp = 0;
814   for (; name[i]; i++) {
815     static const XML_Char digits[] = T("0123456789");
816     const XML_Char *s = tcschr(digits, name[i]);
817     if (! s)
818       return 0;
819     cp *= 10;
820     cp += (int)(s - digits);
821     if (cp >= 0x10000)
822       return 0;
823   }
824   if (! codepageMap(cp, info->map))
825     return 0;
826   info->convert = unknownEncodingConvert;
827   /* We could just cast the code page integer to a void *,
828   and avoid the use of release. */
829   info->release = free;
830   info->data = malloc(sizeof(int));
831   if (! info->data)
832     return 0;
833   *(int *)info->data = cp;
834   return 1;
835 }
836 
837 static int XMLCALL
838 notStandalone(void *userData) {
839   UNUSED_P(userData);
840   return 0;
841 }
842 
843 static void
844 showVersion(XML_Char *prog) {
845   XML_Char *s = prog;
846   XML_Char ch;
847   const XML_Feature *features = XML_GetFeatureList();
848   while ((ch = *s) != 0) {
849     if (ch == '/'
850 #if defined(_WIN32)
851         || ch == '\\'
852 #endif
853     )
854       prog = s + 1;
855     ++s;
856   }
857   ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion());
858   if (features != NULL && features[0].feature != XML_FEATURE_END) {
859     int i = 1;
860     ftprintf(stdout, T("%s"), features[0].name);
861     if (features[0].value)
862       ftprintf(stdout, T("=%ld"), features[0].value);
863     while (features[i].feature != XML_FEATURE_END) {
864       ftprintf(stdout, T(", %s"), features[i].name);
865       if (features[i].value)
866         ftprintf(stdout, T("=%ld"), features[i].value);
867       ++i;
868     }
869     ftprintf(stdout, T("\n"));
870   }
871 }
872 
873 static void
874 usage(const XML_Char *prog, int rc) {
875   ftprintf(
876       stderr,
877       /* Generated with:
878        * $ xmlwf/xmlwf_helpgen.sh
879        * To update, change xmlwf/xmlwf_helpgen.py, then paste the output of
880        * xmlwf/xmlwf_helpgen.sh in here.
881        */
882       /* clang-format off */
883       T("usage:\n")
884       T("  %s [OPTIONS] [FILE ...]\n")
885       T("  %s -h\n")
886       T("  %s -v\n")
887       T("\n")
888       T("xmlwf - Determines if an XML document is well-formed\n")
889       T("\n")
890       T("positional arguments:\n")
891       T("  FILE          file to process (default: STDIN)\n")
892       T("\n")
893       T("input control arguments:\n")
894       T("  -s            print an error if the document is not [s]tandalone\n")
895       T("  -n            enable [n]amespace processing\n")
896       T("  -p            enable processing external DTDs and [p]arameter entities\n")
897       T("  -x            enable processing of e[x]ternal entities\n")
898       T("  -e ENCODING   override any in-document [e]ncoding declaration\n")
899       T("  -w            enable support for [W]indows code pages\n")
900       T("  -r            disable memory-mapping and use normal file [r]ead IO calls instead\n")
901       T("  -k            when processing multiple files, [k]eep processing after first file with error\n")
902       T("\n")
903       T("output control arguments:\n")
904       T("  -d DIRECTORY  output [d]estination directory\n")
905       T("  -c            write a [c]opy of input XML, not canonical XML\n")
906       T("  -m            write [m]eta XML, not canonical XML\n")
907       T("  -t            write no XML output for [t]iming of plain parsing\n")
908       T("  -N            enable adding doctype and [n]otation declarations\n")
909       T("\n")
910       T("billion laughs attack protection:\n")
911       T("  NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n")
912       T("\n")
913       T("  -a FACTOR     set maximum tolerated [a]mplification factor (default: 100.0)\n")
914       T("  -b BYTES      set number of output [b]ytes needed to activate (default: 8 MiB)\n")
915       T("\n")
916       T("info arguments:\n")
917       T("  -h            show this [h]elp message and exit\n")
918       T("  -v            show program's [v]ersion number and exit\n")
919       T("\n")
920       T("exit status:\n")
921       T("  0             the input files are well-formed and the output (if requested) was written successfully\n")
922       T("  1             could not allocate data structures, signals a serious problem with execution environment\n")
923       T("  2             one or more input files were not well-formed\n")
924       T("  3             could not create an output file\n")
925       T("  4             command-line argument error\n")
926       T("\n")
927       T("xmlwf of libexpat is software libre, licensed under the MIT license.\n")
928       T("Please report bugs at https://github.com/libexpat/libexpat/issues.  Thank you!\n")
929       , /* clang-format on */
930       prog, prog, prog);
931   exit(rc);
932 }
933 
934 #if defined(__MINGW32__) && defined(XML_UNICODE)
935 /* Silence warning about missing prototype */
936 int wmain(int argc, XML_Char **argv);
937 #endif
938 
939 #define XMLWF_SHIFT_ARG_INTO(constCharStarTarget, argc, argv, i, j)            \
940   {                                                                            \
941     if (argv[i][j + 1] == T('\0')) {                                           \
942       if (++i == argc)                                                         \
943         usage(argv[0], XMLWF_EXIT_USAGE_ERROR);                                \
944       constCharStarTarget = argv[i];                                           \
945     } else {                                                                   \
946       constCharStarTarget = argv[i] + j + 1;                                   \
947     }                                                                          \
948     i++;                                                                       \
949     j = 0;                                                                     \
950   }
951 
952 int
953 tmain(int argc, XML_Char **argv) {
954   int i, j;
955   const XML_Char *outputDir = NULL;
956   const XML_Char *encoding = NULL;
957   unsigned processFlags = XML_MAP_FILE;
958   int windowsCodePages = 0;
959   int outputType = 0;
960   int useNamespaces = 0;
961   int requireStandalone = 0;
962   int requiresNotations = 0;
963   int continueOnError = 0;
964 
965   float attackMaximumAmplification = -1.0f; /* signaling "not set" */
966   unsigned long long attackThresholdBytes;
967   XML_Bool attackThresholdGiven = XML_FALSE;
968 
969   int exitCode = XMLWF_EXIT_SUCCESS;
970   enum XML_ParamEntityParsing paramEntityParsing
971       = XML_PARAM_ENTITY_PARSING_NEVER;
972   int useStdin = 0;
973   XmlwfUserData userData = {NULL, NULL, NULL};
974 
975 #ifdef _MSC_VER
976   _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
977 #endif
978 
979   i = 1;
980   j = 0;
981   while (i < argc) {
982     if (j == 0) {
983       if (argv[i][0] != T('-'))
984         break;
985       if (argv[i][1] == T('-') && argv[i][2] == T('\0')) {
986         i++;
987         break;
988       }
989       j++;
990     }
991     switch (argv[i][j]) {
992     case T('r'):
993       processFlags &= ~XML_MAP_FILE;
994       j++;
995       break;
996     case T('s'):
997       requireStandalone = 1;
998       j++;
999       break;
1000     case T('n'):
1001       useNamespaces = 1;
1002       j++;
1003       break;
1004     case T('p'):
1005       paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS;
1006       /* fall through */
1007     case T('x'):
1008       processFlags |= XML_EXTERNAL_ENTITIES;
1009       j++;
1010       break;
1011     case T('w'):
1012       windowsCodePages = 1;
1013       j++;
1014       break;
1015     case T('m'):
1016       outputType = 'm';
1017       j++;
1018       break;
1019     case T('c'):
1020       outputType = 'c';
1021       useNamespaces = 0;
1022       j++;
1023       break;
1024     case T('t'):
1025       outputType = 't';
1026       j++;
1027       break;
1028     case T('N'):
1029       requiresNotations = 1;
1030       j++;
1031       break;
1032     case T('d'):
1033       XMLWF_SHIFT_ARG_INTO(outputDir, argc, argv, i, j);
1034       break;
1035     case T('e'):
1036       XMLWF_SHIFT_ARG_INTO(encoding, argc, argv, i, j);
1037       break;
1038     case T('h'):
1039       usage(argv[0], XMLWF_EXIT_SUCCESS);
1040       return 0;
1041     case T('v'):
1042       showVersion(argv[0]);
1043       return 0;
1044     case T('k'):
1045       continueOnError = 1;
1046       j++;
1047       break;
1048     case T('a'): {
1049       const XML_Char *valueText = NULL;
1050       XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1051 
1052       errno = 0;
1053       XML_Char *afterValueText = (XML_Char *)valueText;
1054       attackMaximumAmplification = tcstof(valueText, &afterValueText);
1055       if ((errno != 0) || (afterValueText[0] != T('\0'))
1056           || isnan(attackMaximumAmplification)
1057           || (attackMaximumAmplification < 1.0f)) {
1058         // This prevents tperror(..) from reporting misleading "[..]: Success"
1059         errno = ERANGE;
1060         tperror(T("invalid amplification limit") T(
1061             " (needs a floating point number greater or equal than 1.0)"));
1062         exit(XMLWF_EXIT_USAGE_ERROR);
1063       }
1064 #ifndef XML_DTD
1065       ftprintf(stderr, T("Warning: Given amplification limit ignored") T(
1066                            ", xmlwf has been compiled without DTD support.\n"));
1067 #endif
1068       break;
1069     }
1070     case T('b'): {
1071       const XML_Char *valueText = NULL;
1072       XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1073 
1074       errno = 0;
1075       XML_Char *afterValueText = (XML_Char *)valueText;
1076       attackThresholdBytes = tcstoull(valueText, &afterValueText, 10);
1077       if ((errno != 0) || (afterValueText[0] != T('\0'))) {
1078         // This prevents tperror(..) from reporting misleading "[..]: Success"
1079         errno = ERANGE;
1080         tperror(T("invalid ignore threshold")
1081                     T(" (needs an integer from 0 to 2^64-1)"));
1082         exit(XMLWF_EXIT_USAGE_ERROR);
1083       }
1084       attackThresholdGiven = XML_TRUE;
1085 #ifndef XML_DTD
1086       ftprintf(stderr, T("Warning: Given attack threshold ignored") T(
1087                            ", xmlwf has been compiled without DTD support.\n"));
1088 #endif
1089       break;
1090     }
1091     case T('\0'):
1092       if (j > 1) {
1093         i++;
1094         j = 0;
1095         break;
1096       }
1097       /* fall through */
1098     default:
1099       usage(argv[0], XMLWF_EXIT_USAGE_ERROR);
1100     }
1101   }
1102   if (i == argc) {
1103     useStdin = 1;
1104     processFlags &= ~XML_MAP_FILE;
1105     i--;
1106   }
1107   for (; i < argc; i++) {
1108     XML_Char *outName = 0;
1109     int result;
1110     XML_Parser parser;
1111     if (useNamespaces)
1112       parser = XML_ParserCreateNS(encoding, NSSEP);
1113     else
1114       parser = XML_ParserCreate(encoding);
1115 
1116     if (! parser) {
1117       tperror(T("Could not instantiate parser"));
1118       exit(XMLWF_EXIT_INTERNAL_ERROR);
1119     }
1120 
1121     if (attackMaximumAmplification != -1.0f) {
1122 #ifdef XML_DTD
1123       XML_SetBillionLaughsAttackProtectionMaximumAmplification(
1124           parser, attackMaximumAmplification);
1125 #endif
1126     }
1127     if (attackThresholdGiven) {
1128 #ifdef XML_DTD
1129       XML_SetBillionLaughsAttackProtectionActivationThreshold(
1130           parser, attackThresholdBytes);
1131 #else
1132       (void)attackThresholdBytes; // silence -Wunused-but-set-variable
1133 #endif
1134     }
1135 
1136     if (requireStandalone)
1137       XML_SetNotStandaloneHandler(parser, notStandalone);
1138     XML_SetParamEntityParsing(parser, paramEntityParsing);
1139     if (outputType == 't') {
1140       /* This is for doing timings; this gives a more realistic estimate of
1141          the parsing time. */
1142       outputDir = 0;
1143       XML_SetElementHandler(parser, nopStartElement, nopEndElement);
1144       XML_SetCharacterDataHandler(parser, nopCharacterData);
1145       XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction);
1146     } else if (outputDir) {
1147       const XML_Char *delim = T("/");
1148       const XML_Char *file = useStdin ? T("STDIN") : argv[i];
1149       if (! useStdin) {
1150         /* Jump after last (back)slash */
1151         const XML_Char *lastDelim = tcsrchr(file, delim[0]);
1152         if (lastDelim)
1153           file = lastDelim + 1;
1154 #if defined(_WIN32)
1155         else {
1156           const XML_Char *winDelim = T("\\");
1157           lastDelim = tcsrchr(file, winDelim[0]);
1158           if (lastDelim) {
1159             file = lastDelim + 1;
1160             delim = winDelim;
1161           }
1162         }
1163 #endif
1164       }
1165       outName = (XML_Char *)malloc((tcslen(outputDir) + tcslen(file) + 2)
1166                                    * sizeof(XML_Char));
1167       if (! outName) {
1168         tperror(T("Could not allocate memory"));
1169         exit(XMLWF_EXIT_INTERNAL_ERROR);
1170       }
1171       tcscpy(outName, outputDir);
1172       tcscat(outName, delim);
1173       tcscat(outName, file);
1174       userData.fp = tfopen(outName, T("wb"));
1175       if (! userData.fp) {
1176         tperror(outName);
1177         exitCode = XMLWF_EXIT_OUTPUT_ERROR;
1178         free(outName);
1179         XML_ParserFree(parser);
1180         if (continueOnError) {
1181           continue;
1182         } else {
1183           break;
1184         }
1185       }
1186       setvbuf(userData.fp, NULL, _IOFBF, 16384);
1187 #ifdef XML_UNICODE
1188       puttc(0xFEFF, userData.fp);
1189 #endif
1190       XML_SetUserData(parser, &userData);
1191       switch (outputType) {
1192       case 'm':
1193         XML_UseParserAsHandlerArg(parser);
1194         XML_SetElementHandler(parser, metaStartElement, metaEndElement);
1195         XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
1196         XML_SetCommentHandler(parser, metaComment);
1197         XML_SetCdataSectionHandler(parser, metaStartCdataSection,
1198                                    metaEndCdataSection);
1199         XML_SetCharacterDataHandler(parser, metaCharacterData);
1200         XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl,
1201                                   metaEndDoctypeDecl);
1202         XML_SetEntityDeclHandler(parser, metaEntityDecl);
1203         XML_SetNotationDeclHandler(parser, metaNotationDecl);
1204         XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl,
1205                                     metaEndNamespaceDecl);
1206         metaStartDocument(parser);
1207         break;
1208       case 'c':
1209         XML_UseParserAsHandlerArg(parser);
1210         XML_SetDefaultHandler(parser, markup);
1211         XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
1212         XML_SetCharacterDataHandler(parser, defaultCharacterData);
1213         XML_SetProcessingInstructionHandler(parser,
1214                                             defaultProcessingInstruction);
1215         break;
1216       default:
1217         if (useNamespaces)
1218           XML_SetElementHandler(parser, startElementNS, endElementNS);
1219         else
1220           XML_SetElementHandler(parser, startElement, endElement);
1221         XML_SetCharacterDataHandler(parser, characterData);
1222 #ifndef W3C14N
1223         XML_SetProcessingInstructionHandler(parser, processingInstruction);
1224         if (requiresNotations) {
1225           XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl);
1226           XML_SetNotationDeclHandler(parser, notationDecl);
1227         }
1228 #endif /* not W3C14N */
1229         break;
1230       }
1231     }
1232     if (windowsCodePages)
1233       XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
1234     result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags);
1235     if (outputDir) {
1236       if (outputType == 'm')
1237         metaEndDocument(parser);
1238       fclose(userData.fp);
1239       if (! result) {
1240         tremove(outName);
1241       }
1242       free(outName);
1243     }
1244     XML_ParserFree(parser);
1245     if (! result) {
1246       exitCode = XMLWF_EXIT_NOT_WELLFORMED;
1247       cleanupUserData(&userData);
1248       if (! continueOnError) {
1249         break;
1250       }
1251     }
1252   }
1253   return exitCode;
1254 }
1255