1 /*
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2004-2009 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
14 Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org>
15 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
16 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
17 Copyright (c) 2020 Joe Orton <jorton@redhat.com>
18 Copyright (c) 2020 Kleber Tarcísio <klebertarcisio@yahoo.com.br>
19 Copyright (c) 2021 Tim Bray <tbray@textuality.com>
20 Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com>
21 Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
22 Licensed under the MIT license:
23
24 Permission is hereby granted, free of charge, to any person obtaining
25 a copy of this software and associated documentation files (the
26 "Software"), to deal in the Software without restriction, including
27 without limitation the rights to use, copy, modify, merge, publish,
28 distribute, sublicense, and/or sell copies of the Software, and to permit
29 persons to whom the Software is furnished to do so, subject to the
30 following conditions:
31
32 The above copyright notice and this permission notice shall be included
33 in all copies or substantial portions of the Software.
34
35 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
36 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
40 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41 USE OR OTHER DEALINGS IN THE SOFTWARE.
42 */
43
44 #include "expat_config.h"
45
46 #include <assert.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <stddef.h>
50 #include <string.h>
51 #include <math.h> /* for isnan */
52 #include <errno.h>
53
54 #include "expat.h"
55 #include "codepage.h"
56 #include "internal.h" /* for UNUSED_P only */
57 #include "xmlfile.h"
58 #include "xmltchar.h"
59
60 #ifdef _MSC_VER
61 # include <crtdbg.h>
62 #endif
63
64 #ifdef XML_UNICODE
65 # include <wchar.h>
66 #endif
67
68 enum ExitCode {
69 XMLWF_EXIT_SUCCESS = 0,
70 XMLWF_EXIT_INTERNAL_ERROR = 1,
71 XMLWF_EXIT_NOT_WELLFORMED = 2,
72 XMLWF_EXIT_OUTPUT_ERROR = 3,
73 XMLWF_EXIT_USAGE_ERROR = 4,
74 };
75
76 /* Structures for handler user data */
77 typedef struct NotationList {
78 struct NotationList *next;
79 const XML_Char *notationName;
80 const XML_Char *systemId;
81 const XML_Char *publicId;
82 } NotationList;
83
84 typedef struct xmlwfUserData {
85 FILE *fp;
86 NotationList *notationListHead;
87 const XML_Char *currentDoctypeName;
88 } XmlwfUserData;
89
90 /* This ensures proper sorting. */
91
92 #define NSSEP T('\001')
93
94 static void XMLCALL
characterData(void * userData,const XML_Char * s,int len)95 characterData(void *userData, const XML_Char *s, int len) {
96 FILE *fp = ((XmlwfUserData *)userData)->fp;
97 for (; len > 0; --len, ++s) {
98 switch (*s) {
99 case T('&'):
100 fputts(T("&"), fp);
101 break;
102 case T('<'):
103 fputts(T("<"), fp);
104 break;
105 case T('>'):
106 fputts(T(">"), fp);
107 break;
108 #ifdef W3C14N
109 case 13:
110 fputts(T("
"), fp);
111 break;
112 #else
113 case T('"'):
114 fputts(T("""), fp);
115 break;
116 case 9:
117 case 10:
118 case 13:
119 ftprintf(fp, T("&#%d;"), *s);
120 break;
121 #endif
122 default:
123 puttc(*s, fp);
124 break;
125 }
126 }
127 }
128
129 static void
attributeValue(FILE * fp,const XML_Char * s)130 attributeValue(FILE *fp, const XML_Char *s) {
131 puttc(T('='), fp);
132 puttc(T('"'), fp);
133 assert(s);
134 for (;;) {
135 switch (*s) {
136 case 0:
137 case NSSEP:
138 puttc(T('"'), fp);
139 return;
140 case T('&'):
141 fputts(T("&"), fp);
142 break;
143 case T('<'):
144 fputts(T("<"), fp);
145 break;
146 case T('"'):
147 fputts(T("""), fp);
148 break;
149 #ifdef W3C14N
150 case 9:
151 fputts(T("	"), fp);
152 break;
153 case 10:
154 fputts(T("
"), fp);
155 break;
156 case 13:
157 fputts(T("
"), fp);
158 break;
159 #else
160 case T('>'):
161 fputts(T(">"), fp);
162 break;
163 case 9:
164 case 10:
165 case 13:
166 ftprintf(fp, T("&#%d;"), *s);
167 break;
168 #endif
169 default:
170 puttc(*s, fp);
171 break;
172 }
173 s++;
174 }
175 }
176
177 /* Lexicographically comparing UTF-8 encoded attribute values,
178 is equivalent to lexicographically comparing based on the character number. */
179
180 static int
attcmp(const void * att1,const void * att2)181 attcmp(const void *att1, const void *att2) {
182 return tcscmp(*(const XML_Char *const *)att1, *(const XML_Char *const *)att2);
183 }
184
185 static void XMLCALL
startElement(void * userData,const XML_Char * name,const XML_Char ** atts)186 startElement(void *userData, const XML_Char *name, const XML_Char **atts) {
187 int nAtts;
188 const XML_Char **p;
189 FILE *fp = ((XmlwfUserData *)userData)->fp;
190 puttc(T('<'), fp);
191 fputts(name, fp);
192
193 p = atts;
194 while (*p)
195 ++p;
196 nAtts = (int)((p - atts) >> 1);
197 if (nAtts > 1)
198 qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
199 while (*atts) {
200 puttc(T(' '), fp);
201 fputts(*atts++, fp);
202 attributeValue(fp, *atts);
203 atts++;
204 }
205 puttc(T('>'), fp);
206 }
207
208 static void XMLCALL
endElement(void * userData,const XML_Char * name)209 endElement(void *userData, const XML_Char *name) {
210 FILE *fp = ((XmlwfUserData *)userData)->fp;
211 puttc(T('<'), fp);
212 puttc(T('/'), fp);
213 fputts(name, fp);
214 puttc(T('>'), fp);
215 }
216
217 static int
nsattcmp(const void * p1,const void * p2)218 nsattcmp(const void *p1, const void *p2) {
219 const XML_Char *att1 = *(const XML_Char *const *)p1;
220 const XML_Char *att2 = *(const XML_Char *const *)p2;
221 int sep1 = (tcsrchr(att1, NSSEP) != 0);
222 int sep2 = (tcsrchr(att2, NSSEP) != 0);
223 if (sep1 != sep2)
224 return sep1 - sep2;
225 return tcscmp(att1, att2);
226 }
227
228 static void XMLCALL
startElementNS(void * userData,const XML_Char * name,const XML_Char ** atts)229 startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) {
230 int nAtts;
231 int nsi;
232 const XML_Char **p;
233 FILE *fp = ((XmlwfUserData *)userData)->fp;
234 const XML_Char *sep;
235 puttc(T('<'), fp);
236
237 sep = tcsrchr(name, NSSEP);
238 if (sep) {
239 fputts(T("n1:"), fp);
240 fputts(sep + 1, fp);
241 fputts(T(" xmlns:n1"), fp);
242 attributeValue(fp, name);
243 nsi = 2;
244 } else {
245 fputts(name, fp);
246 nsi = 1;
247 }
248
249 p = atts;
250 while (*p)
251 ++p;
252 nAtts = (int)((p - atts) >> 1);
253 if (nAtts > 1)
254 qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp);
255 while (*atts) {
256 name = *atts++;
257 sep = tcsrchr(name, NSSEP);
258 puttc(T(' '), fp);
259 if (sep) {
260 ftprintf(fp, T("n%d:"), nsi);
261 fputts(sep + 1, fp);
262 } else
263 fputts(name, fp);
264 attributeValue(fp, *atts);
265 if (sep) {
266 ftprintf(fp, T(" xmlns:n%d"), nsi++);
267 attributeValue(fp, name);
268 }
269 atts++;
270 }
271 puttc(T('>'), fp);
272 }
273
274 static void XMLCALL
endElementNS(void * userData,const XML_Char * name)275 endElementNS(void *userData, const XML_Char *name) {
276 FILE *fp = ((XmlwfUserData *)userData)->fp;
277 const XML_Char *sep;
278 puttc(T('<'), fp);
279 puttc(T('/'), fp);
280 sep = tcsrchr(name, NSSEP);
281 if (sep) {
282 fputts(T("n1:"), fp);
283 fputts(sep + 1, fp);
284 } else
285 fputts(name, fp);
286 puttc(T('>'), fp);
287 }
288
289 #ifndef W3C14N
290
291 static void XMLCALL
processingInstruction(void * userData,const XML_Char * target,const XML_Char * data)292 processingInstruction(void *userData, const XML_Char *target,
293 const XML_Char *data) {
294 FILE *fp = ((XmlwfUserData *)userData)->fp;
295 puttc(T('<'), fp);
296 puttc(T('?'), fp);
297 fputts(target, fp);
298 puttc(T(' '), fp);
299 fputts(data, fp);
300 puttc(T('?'), fp);
301 puttc(T('>'), fp);
302 }
303
304 static XML_Char *
xcsdup(const XML_Char * s)305 xcsdup(const XML_Char *s) {
306 XML_Char *result;
307 int count = 0;
308 size_t numBytes;
309
310 /* Get the length of the string, including terminator */
311 while (s[count++] != 0) {
312 /* Do nothing */
313 }
314 numBytes = count * sizeof(XML_Char);
315 result = malloc(numBytes);
316 if (result == NULL)
317 return NULL;
318 memcpy(result, s, numBytes);
319 return result;
320 }
321
322 static void XMLCALL
startDoctypeDecl(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * publid,int has_internal_subset)323 startDoctypeDecl(void *userData, const XML_Char *doctypeName,
324 const XML_Char *sysid, const XML_Char *publid,
325 int has_internal_subset) {
326 XmlwfUserData *data = (XmlwfUserData *)userData;
327 UNUSED_P(sysid);
328 UNUSED_P(publid);
329 UNUSED_P(has_internal_subset);
330 data->currentDoctypeName = xcsdup(doctypeName);
331 }
332
333 static void
freeNotations(XmlwfUserData * data)334 freeNotations(XmlwfUserData *data) {
335 NotationList *notationListHead = data->notationListHead;
336
337 while (notationListHead != NULL) {
338 NotationList *next = notationListHead->next;
339 free((void *)notationListHead->notationName);
340 free((void *)notationListHead->systemId);
341 free((void *)notationListHead->publicId);
342 free(notationListHead);
343 notationListHead = next;
344 }
345 data->notationListHead = NULL;
346 }
347
348 static void
cleanupUserData(XmlwfUserData * userData)349 cleanupUserData(XmlwfUserData *userData) {
350 free((void *)userData->currentDoctypeName);
351 userData->currentDoctypeName = NULL;
352 freeNotations(userData);
353 }
354
355 static int
xcscmp(const XML_Char * xs,const XML_Char * xt)356 xcscmp(const XML_Char *xs, const XML_Char *xt) {
357 while (*xs != 0 && *xt != 0) {
358 if (*xs < *xt)
359 return -1;
360 if (*xs > *xt)
361 return 1;
362 xs++;
363 xt++;
364 }
365 if (*xs < *xt)
366 return -1;
367 if (*xs > *xt)
368 return 1;
369 return 0;
370 }
371
372 static int
notationCmp(const void * a,const void * b)373 notationCmp(const void *a, const void *b) {
374 const NotationList *const n1 = *(const NotationList *const *)a;
375 const NotationList *const n2 = *(const NotationList *const *)b;
376
377 return xcscmp(n1->notationName, n2->notationName);
378 }
379
380 static void XMLCALL
endDoctypeDecl(void * userData)381 endDoctypeDecl(void *userData) {
382 XmlwfUserData *data = (XmlwfUserData *)userData;
383 NotationList **notations;
384 int notationCount = 0;
385 NotationList *p;
386 int i;
387
388 /* How many notations do we have? */
389 for (p = data->notationListHead; p != NULL; p = p->next)
390 notationCount++;
391 if (notationCount == 0) {
392 /* Nothing to report */
393 free((void *)data->currentDoctypeName);
394 data->currentDoctypeName = NULL;
395 return;
396 }
397
398 notations = malloc(notationCount * sizeof(NotationList *));
399 if (notations == NULL) {
400 fprintf(stderr, "Unable to sort notations");
401 freeNotations(data);
402 return;
403 }
404
405 for (p = data->notationListHead, i = 0; i < notationCount; p = p->next, i++) {
406 notations[i] = p;
407 }
408 qsort(notations, notationCount, sizeof(NotationList *), notationCmp);
409
410 /* Output the DOCTYPE header */
411 fputts(T("<!DOCTYPE "), data->fp);
412 fputts(data->currentDoctypeName, data->fp);
413 fputts(T(" [\n"), data->fp);
414
415 /* Now the NOTATIONs */
416 for (i = 0; i < notationCount; i++) {
417 fputts(T("<!NOTATION "), data->fp);
418 fputts(notations[i]->notationName, data->fp);
419 if (notations[i]->publicId != NULL) {
420 fputts(T(" PUBLIC '"), data->fp);
421 fputts(notations[i]->publicId, data->fp);
422 puttc(T('\''), data->fp);
423 if (notations[i]->systemId != NULL) {
424 puttc(T(' '), data->fp);
425 puttc(T('\''), data->fp);
426 fputts(notations[i]->systemId, data->fp);
427 puttc(T('\''), data->fp);
428 }
429 } else if (notations[i]->systemId != NULL) {
430 fputts(T(" SYSTEM '"), data->fp);
431 fputts(notations[i]->systemId, data->fp);
432 puttc(T('\''), data->fp);
433 }
434 puttc(T('>'), data->fp);
435 puttc(T('\n'), data->fp);
436 }
437
438 /* Finally end the DOCTYPE */
439 fputts(T("]>\n"), data->fp);
440
441 free(notations);
442 freeNotations(data);
443 free((void *)data->currentDoctypeName);
444 data->currentDoctypeName = NULL;
445 }
446
447 static void XMLCALL
notationDecl(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)448 notationDecl(void *userData, const XML_Char *notationName, const XML_Char *base,
449 const XML_Char *systemId, const XML_Char *publicId) {
450 XmlwfUserData *data = (XmlwfUserData *)userData;
451 NotationList *entry = malloc(sizeof(NotationList));
452 const char *errorMessage = "Unable to store NOTATION for output\n";
453
454 UNUSED_P(base);
455 if (entry == NULL) {
456 fputs(errorMessage, stderr);
457 return; /* Nothing we can really do about this */
458 }
459 entry->notationName = xcsdup(notationName);
460 if (entry->notationName == NULL) {
461 fputs(errorMessage, stderr);
462 free(entry);
463 return;
464 }
465 if (systemId != NULL) {
466 entry->systemId = xcsdup(systemId);
467 if (entry->systemId == NULL) {
468 fputs(errorMessage, stderr);
469 free((void *)entry->notationName);
470 free(entry);
471 return;
472 }
473 } else {
474 entry->systemId = NULL;
475 }
476 if (publicId != NULL) {
477 entry->publicId = xcsdup(publicId);
478 if (entry->publicId == NULL) {
479 fputs(errorMessage, stderr);
480 free((void *)entry->systemId); /* Safe if it's NULL */
481 free((void *)entry->notationName);
482 free(entry);
483 return;
484 }
485 } else {
486 entry->publicId = NULL;
487 }
488
489 entry->next = data->notationListHead;
490 data->notationListHead = entry;
491 }
492
493 #endif /* not W3C14N */
494
495 static void XMLCALL
defaultCharacterData(void * userData,const XML_Char * s,int len)496 defaultCharacterData(void *userData, const XML_Char *s, int len) {
497 UNUSED_P(s);
498 UNUSED_P(len);
499 XML_DefaultCurrent((XML_Parser)userData);
500 }
501
502 static void XMLCALL
defaultStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)503 defaultStartElement(void *userData, const XML_Char *name,
504 const XML_Char **atts) {
505 UNUSED_P(name);
506 UNUSED_P(atts);
507 XML_DefaultCurrent((XML_Parser)userData);
508 }
509
510 static void XMLCALL
defaultEndElement(void * userData,const XML_Char * name)511 defaultEndElement(void *userData, const XML_Char *name) {
512 UNUSED_P(name);
513 XML_DefaultCurrent((XML_Parser)userData);
514 }
515
516 static void XMLCALL
defaultProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)517 defaultProcessingInstruction(void *userData, const XML_Char *target,
518 const XML_Char *data) {
519 UNUSED_P(target);
520 UNUSED_P(data);
521 XML_DefaultCurrent((XML_Parser)userData);
522 }
523
524 static void XMLCALL
nopCharacterData(void * userData,const XML_Char * s,int len)525 nopCharacterData(void *userData, const XML_Char *s, int len) {
526 UNUSED_P(userData);
527 UNUSED_P(s);
528 UNUSED_P(len);
529 }
530
531 static void XMLCALL
nopStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)532 nopStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
533 UNUSED_P(userData);
534 UNUSED_P(name);
535 UNUSED_P(atts);
536 }
537
538 static void XMLCALL
nopEndElement(void * userData,const XML_Char * name)539 nopEndElement(void *userData, const XML_Char *name) {
540 UNUSED_P(userData);
541 UNUSED_P(name);
542 }
543
544 static void XMLCALL
nopProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)545 nopProcessingInstruction(void *userData, const XML_Char *target,
546 const XML_Char *data) {
547 UNUSED_P(userData);
548 UNUSED_P(target);
549 UNUSED_P(data);
550 }
551
552 static void XMLCALL
markup(void * userData,const XML_Char * s,int len)553 markup(void *userData, const XML_Char *s, int len) {
554 FILE *fp = ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp;
555 for (; len > 0; --len, ++s)
556 puttc(*s, fp);
557 }
558
559 static void
metaLocation(XML_Parser parser)560 metaLocation(XML_Parser parser) {
561 const XML_Char *uri = XML_GetBase(parser);
562 FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp;
563 if (uri)
564 ftprintf(fp, T(" uri=\"%s\""), uri);
565 ftprintf(fp,
566 T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"") T(" nbytes=\"%d\"")
567 T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"") T(" col=\"%")
568 T(XML_FMT_INT_MOD) T("u\""),
569 XML_GetCurrentByteIndex(parser), XML_GetCurrentByteCount(parser),
570 XML_GetCurrentLineNumber(parser),
571 XML_GetCurrentColumnNumber(parser));
572 }
573
574 static void
metaStartDocument(void * userData)575 metaStartDocument(void *userData) {
576 fputts(T("<document>\n"),
577 ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
578 }
579
580 static void
metaEndDocument(void * userData)581 metaEndDocument(void *userData) {
582 fputts(T("</document>\n"),
583 ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
584 }
585
586 static void XMLCALL
metaStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)587 metaStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
588 XML_Parser parser = (XML_Parser)userData;
589 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
590 FILE *fp = data->fp;
591 const XML_Char **specifiedAttsEnd
592 = atts + XML_GetSpecifiedAttributeCount(parser);
593 const XML_Char **idAttPtr;
594 int idAttIndex = XML_GetIdAttributeIndex(parser);
595 if (idAttIndex < 0)
596 idAttPtr = 0;
597 else
598 idAttPtr = atts + idAttIndex;
599
600 ftprintf(fp, T("<starttag name=\"%s\""), name);
601 metaLocation(parser);
602 if (*atts) {
603 fputts(T(">\n"), fp);
604 do {
605 ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]);
606 characterData(data, atts[1], (int)tcslen(atts[1]));
607 if (atts >= specifiedAttsEnd)
608 fputts(T("\" defaulted=\"yes\"/>\n"), fp);
609 else if (atts == idAttPtr)
610 fputts(T("\" id=\"yes\"/>\n"), fp);
611 else
612 fputts(T("\"/>\n"), fp);
613 } while (*(atts += 2));
614 fputts(T("</starttag>\n"), fp);
615 } else
616 fputts(T("/>\n"), fp);
617 }
618
619 static void XMLCALL
metaEndElement(void * userData,const XML_Char * name)620 metaEndElement(void *userData, const XML_Char *name) {
621 XML_Parser parser = (XML_Parser)userData;
622 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
623 FILE *fp = data->fp;
624 ftprintf(fp, T("<endtag name=\"%s\""), name);
625 metaLocation(parser);
626 fputts(T("/>\n"), fp);
627 }
628
629 static void XMLCALL
metaProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)630 metaProcessingInstruction(void *userData, const XML_Char *target,
631 const XML_Char *data) {
632 XML_Parser parser = (XML_Parser)userData;
633 XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
634 FILE *fp = usrData->fp;
635 ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
636 characterData(usrData, data, (int)tcslen(data));
637 puttc(T('"'), fp);
638 metaLocation(parser);
639 fputts(T("/>\n"), fp);
640 }
641
642 static void XMLCALL
metaComment(void * userData,const XML_Char * data)643 metaComment(void *userData, const XML_Char *data) {
644 XML_Parser parser = (XML_Parser)userData;
645 XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
646 FILE *fp = usrData->fp;
647 fputts(T("<comment data=\""), fp);
648 characterData(usrData, data, (int)tcslen(data));
649 puttc(T('"'), fp);
650 metaLocation(parser);
651 fputts(T("/>\n"), fp);
652 }
653
654 static void XMLCALL
metaStartCdataSection(void * userData)655 metaStartCdataSection(void *userData) {
656 XML_Parser parser = (XML_Parser)userData;
657 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
658 FILE *fp = data->fp;
659 fputts(T("<startcdata"), fp);
660 metaLocation(parser);
661 fputts(T("/>\n"), fp);
662 }
663
664 static void XMLCALL
metaEndCdataSection(void * userData)665 metaEndCdataSection(void *userData) {
666 XML_Parser parser = (XML_Parser)userData;
667 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
668 FILE *fp = data->fp;
669 fputts(T("<endcdata"), fp);
670 metaLocation(parser);
671 fputts(T("/>\n"), fp);
672 }
673
674 static void XMLCALL
metaCharacterData(void * userData,const XML_Char * s,int len)675 metaCharacterData(void *userData, const XML_Char *s, int len) {
676 XML_Parser parser = (XML_Parser)userData;
677 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
678 FILE *fp = data->fp;
679 fputts(T("<chars str=\""), fp);
680 characterData(data, s, len);
681 puttc(T('"'), fp);
682 metaLocation(parser);
683 fputts(T("/>\n"), fp);
684 }
685
686 static void XMLCALL
metaStartDoctypeDecl(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)687 metaStartDoctypeDecl(void *userData, const XML_Char *doctypeName,
688 const XML_Char *sysid, const XML_Char *pubid,
689 int has_internal_subset) {
690 XML_Parser parser = (XML_Parser)userData;
691 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
692 FILE *fp = data->fp;
693 UNUSED_P(sysid);
694 UNUSED_P(pubid);
695 UNUSED_P(has_internal_subset);
696 ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName);
697 metaLocation(parser);
698 fputts(T("/>\n"), fp);
699 }
700
701 static void XMLCALL
metaEndDoctypeDecl(void * userData)702 metaEndDoctypeDecl(void *userData) {
703 XML_Parser parser = (XML_Parser)userData;
704 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
705 FILE *fp = data->fp;
706 fputts(T("<enddoctype"), fp);
707 metaLocation(parser);
708 fputts(T("/>\n"), fp);
709 }
710
711 static void XMLCALL
metaNotationDecl(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)712 metaNotationDecl(void *userData, const XML_Char *notationName,
713 const XML_Char *base, const XML_Char *systemId,
714 const XML_Char *publicId) {
715 XML_Parser parser = (XML_Parser)userData;
716 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
717 FILE *fp = data->fp;
718 UNUSED_P(base);
719 ftprintf(fp, T("<notation name=\"%s\""), notationName);
720 if (publicId)
721 ftprintf(fp, T(" public=\"%s\""), publicId);
722 if (systemId) {
723 fputts(T(" system=\""), fp);
724 characterData(data, systemId, (int)tcslen(systemId));
725 puttc(T('"'), fp);
726 }
727 metaLocation(parser);
728 fputts(T("/>\n"), fp);
729 }
730
731 static void XMLCALL
metaEntityDecl(void * userData,const XML_Char * entityName,int is_param,const XML_Char * value,int value_length,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)732 metaEntityDecl(void *userData, const XML_Char *entityName, int is_param,
733 const XML_Char *value, int value_length, const XML_Char *base,
734 const XML_Char *systemId, const XML_Char *publicId,
735 const XML_Char *notationName) {
736 XML_Parser parser = (XML_Parser)userData;
737 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
738 FILE *fp = data->fp;
739
740 UNUSED_P(is_param);
741 UNUSED_P(base);
742 if (value) {
743 ftprintf(fp, T("<entity name=\"%s\""), entityName);
744 metaLocation(parser);
745 puttc(T('>'), fp);
746 characterData(data, value, value_length);
747 fputts(T("</entity/>\n"), fp);
748 } else if (notationName) {
749 ftprintf(fp, T("<entity name=\"%s\""), entityName);
750 if (publicId)
751 ftprintf(fp, T(" public=\"%s\""), publicId);
752 fputts(T(" system=\""), fp);
753 characterData(data, systemId, (int)tcslen(systemId));
754 puttc(T('"'), fp);
755 ftprintf(fp, T(" notation=\"%s\""), notationName);
756 metaLocation(parser);
757 fputts(T("/>\n"), fp);
758 } else {
759 ftprintf(fp, T("<entity name=\"%s\""), entityName);
760 if (publicId)
761 ftprintf(fp, T(" public=\"%s\""), publicId);
762 fputts(T(" system=\""), fp);
763 characterData(data, systemId, (int)tcslen(systemId));
764 puttc(T('"'), fp);
765 metaLocation(parser);
766 fputts(T("/>\n"), fp);
767 }
768 }
769
770 static void XMLCALL
metaStartNamespaceDecl(void * userData,const XML_Char * prefix,const XML_Char * uri)771 metaStartNamespaceDecl(void *userData, const XML_Char *prefix,
772 const XML_Char *uri) {
773 XML_Parser parser = (XML_Parser)userData;
774 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
775 FILE *fp = data->fp;
776 fputts(T("<startns"), fp);
777 if (prefix)
778 ftprintf(fp, T(" prefix=\"%s\""), prefix);
779 if (uri) {
780 fputts(T(" ns=\""), fp);
781 characterData(data, uri, (int)tcslen(uri));
782 fputts(T("\"/>\n"), fp);
783 } else
784 fputts(T("/>\n"), fp);
785 }
786
787 static void XMLCALL
metaEndNamespaceDecl(void * userData,const XML_Char * prefix)788 metaEndNamespaceDecl(void *userData, const XML_Char *prefix) {
789 XML_Parser parser = (XML_Parser)userData;
790 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
791 FILE *fp = data->fp;
792 if (! prefix)
793 fputts(T("<endns/>\n"), fp);
794 else
795 ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix);
796 }
797
798 static int XMLCALL
unknownEncodingConvert(void * data,const char * p)799 unknownEncodingConvert(void *data, const char *p) {
800 return codepageConvert(*(int *)data, p);
801 }
802
803 static int XMLCALL
unknownEncoding(void * userData,const XML_Char * name,XML_Encoding * info)804 unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info) {
805 int cp;
806 static const XML_Char prefixL[] = T("windows-");
807 static const XML_Char prefixU[] = T("WINDOWS-");
808 int i;
809
810 UNUSED_P(userData);
811 for (i = 0; prefixU[i]; i++)
812 if (name[i] != prefixU[i] && name[i] != prefixL[i])
813 return 0;
814
815 cp = 0;
816 for (; name[i]; i++) {
817 static const XML_Char digits[] = T("0123456789");
818 const XML_Char *s = tcschr(digits, name[i]);
819 if (! s)
820 return 0;
821 cp *= 10;
822 cp += (int)(s - digits);
823 if (cp >= 0x10000)
824 return 0;
825 }
826 if (! codepageMap(cp, info->map))
827 return 0;
828 info->convert = unknownEncodingConvert;
829 /* We could just cast the code page integer to a void *,
830 and avoid the use of release. */
831 info->release = free;
832 info->data = malloc(sizeof(int));
833 if (! info->data)
834 return 0;
835 *(int *)info->data = cp;
836 return 1;
837 }
838
839 static int XMLCALL
notStandalone(void * userData)840 notStandalone(void *userData) {
841 UNUSED_P(userData);
842 return 0;
843 }
844
845 static void
showVersion(XML_Char * prog)846 showVersion(XML_Char *prog) {
847 XML_Char *s = prog;
848 XML_Char ch;
849 const XML_Feature *features = XML_GetFeatureList();
850 while ((ch = *s) != 0) {
851 if (ch == '/'
852 #if defined(_WIN32)
853 || ch == '\\'
854 #endif
855 )
856 prog = s + 1;
857 ++s;
858 }
859 ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion());
860 if (features != NULL && features[0].feature != XML_FEATURE_END) {
861 int i = 1;
862 ftprintf(stdout, T("%s"), features[0].name);
863 if (features[0].value)
864 ftprintf(stdout, T("=%ld"), features[0].value);
865 while (features[i].feature != XML_FEATURE_END) {
866 ftprintf(stdout, T(", %s"), features[i].name);
867 if (features[i].value)
868 ftprintf(stdout, T("=%ld"), features[i].value);
869 ++i;
870 }
871 ftprintf(stdout, T("\n"));
872 }
873 }
874
875 #if defined(__GNUC__)
876 __attribute__((noreturn))
877 #endif
878 static void
usage(const XML_Char * prog,int rc)879 usage(const XML_Char *prog, int rc) {
880 ftprintf(
881 stderr,
882 /* Generated with:
883 * $ xmlwf/xmlwf_helpgen.sh
884 * To update, change xmlwf/xmlwf_helpgen.py, then paste the output of
885 * xmlwf/xmlwf_helpgen.sh in here.
886 */
887 /* clang-format off */
888 T("usage:\n")
889 T(" %s [OPTIONS] [FILE ...]\n")
890 T(" %s -h|--help\n")
891 T(" %s -v|--version\n")
892 T("\n")
893 T("xmlwf - Determines if an XML document is well-formed\n")
894 T("\n")
895 T("positional arguments:\n")
896 T(" FILE file to process (default: STDIN)\n")
897 T("\n")
898 T("input control arguments:\n")
899 T(" -s print an error if the document is not [s]tandalone\n")
900 T(" -n enable [n]amespace processing\n")
901 T(" -p enable processing of external DTDs and [p]arameter entities\n")
902 T(" -x enable processing of e[x]ternal entities\n")
903 T(" -e ENCODING override any in-document [e]ncoding declaration\n")
904 T(" -w enable support for [W]indows code pages\n")
905 T(" -r disable memory-mapping and use [r]ead calls instead\n")
906 T(" -g BYTES buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)\n")
907 T(" -k when processing multiple files, [k]eep processing after first file with error\n")
908 T("\n")
909 T("output control arguments:\n")
910 T(" -d DIRECTORY output [d]estination directory\n")
911 T(" -c write a [c]opy of input XML, not canonical XML\n")
912 T(" -m write [m]eta XML, not canonical XML\n")
913 T(" -t write no XML output for [t]iming of plain parsing\n")
914 T(" -N enable adding doctype and [n]otation declarations\n")
915 T("\n")
916 T("amplification attack protection (e.g. billion laughs):\n")
917 T(" NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n")
918 T("\n")
919 T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n")
920 T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB/64 MiB)\n")
921 T("\n")
922 T("reparse deferral:\n")
923 T(" -q disable reparse deferral, and allow [q]uadratic parse runtime with large tokens\n")
924 T("\n")
925 T("info arguments:\n")
926 T(" -h, --help show this [h]elp message and exit\n")
927 T(" -v, --version show program's [v]ersion number and exit\n")
928 T("\n")
929 T("environment variables:\n")
930 T(" EXPAT_ACCOUNTING_DEBUG=(0|1|2|3)\n")
931 T(" Control verbosity of accounting debugging (default: 0)\n")
932 T(" EXPAT_ENTITY_DEBUG=(0|1)\n")
933 T(" Control verbosity of entity debugging (default: 0)\n")
934 T(" EXPAT_ENTROPY_DEBUG=(0|1)\n")
935 T(" Control verbosity of entropy debugging (default: 0)\n")
936 T(" EXPAT_MALLOC_DEBUG=(0|1|2)\n")
937 T(" Control verbosity of allocation tracker (default: 0)\n")
938 T("\n")
939 T("exit status:\n")
940 T(" 0 the input files are well-formed and the output (if requested) was written successfully\n")
941 T(" 1 could not allocate data structures, signals a serious problem with execution environment\n")
942 T(" 2 one or more input files were not well-formed\n")
943 T(" 3 could not create an output file\n")
944 T(" 4 command-line argument error\n")
945 T("\n")
946 T("xmlwf of libexpat is software libre, licensed under the MIT license.\n")
947 T("Please report bugs at https://github.com/libexpat/libexpat/issues -- thank you!\n")
948 , /* clang-format on */
949 prog, prog, prog);
950 exit(rc);
951 }
952
953 #if defined(__MINGW32__) && defined(XML_UNICODE)
954 /* Silence warning about missing prototype */
955 int wmain(int argc, XML_Char **argv);
956 #endif
957
958 #define XMLWF_SHIFT_ARG_INTO(constCharStarTarget, argc, argv, i, j) \
959 { \
960 if (argv[i][j + 1] == T('\0')) { \
961 if (++i == argc) { \
962 usage(argv[0], XMLWF_EXIT_USAGE_ERROR); \
963 /* usage called exit(..), never gets here */ \
964 } \
965 constCharStarTarget = argv[i]; \
966 } else { \
967 constCharStarTarget = argv[i] + j + 1; \
968 } \
969 i++; \
970 j = 0; \
971 }
972
973 int
tmain(int argc,XML_Char ** argv)974 tmain(int argc, XML_Char **argv) {
975 int i, j;
976 const XML_Char *outputDir = NULL;
977 const XML_Char *encoding = NULL;
978 unsigned processFlags = XML_MAP_FILE;
979 int windowsCodePages = 0;
980 int outputType = 0;
981 int useNamespaces = 0;
982 int requireStandalone = 0;
983 int requiresNotations = 0;
984 int continueOnError = 0;
985
986 float attackMaximumAmplification = -1.0f; /* signaling "not set" */
987 unsigned long long attackThresholdBytes = 0;
988 XML_Bool attackThresholdGiven = XML_FALSE;
989
990 XML_Bool disableDeferral = XML_FALSE;
991
992 int exitCode = XMLWF_EXIT_SUCCESS;
993 enum XML_ParamEntityParsing paramEntityParsing
994 = XML_PARAM_ENTITY_PARSING_NEVER;
995 int useStdin = 0;
996 XmlwfUserData userData = {NULL, NULL, NULL};
997
998 #ifdef _MSC_VER
999 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
1000 #endif
1001
1002 i = 1;
1003 j = 0;
1004 while (i < argc) {
1005 if (j == 0) {
1006 if (argv[i][0] != T('-'))
1007 break;
1008 if (argv[i][1] == T('-')) {
1009 if (argv[i][2] == T('\0')) {
1010 i++;
1011 break;
1012 } else if (tcscmp(argv[i] + 2, T("help")) == 0) {
1013 usage(argv[0], XMLWF_EXIT_SUCCESS);
1014 // usage called exit(..), never gets here
1015 } else if (tcscmp(argv[i] + 2, T("version")) == 0) {
1016 showVersion(argv[0]);
1017 return XMLWF_EXIT_SUCCESS;
1018 }
1019 }
1020 j++;
1021 }
1022 switch (argv[i][j]) {
1023 case T('r'):
1024 processFlags &= ~XML_MAP_FILE;
1025 j++;
1026 break;
1027 case T('s'):
1028 requireStandalone = 1;
1029 j++;
1030 break;
1031 case T('n'):
1032 useNamespaces = 1;
1033 j++;
1034 break;
1035 case T('p'):
1036 paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS;
1037 /* fall through */
1038 case T('x'):
1039 processFlags |= XML_EXTERNAL_ENTITIES;
1040 j++;
1041 break;
1042 case T('w'):
1043 windowsCodePages = 1;
1044 j++;
1045 break;
1046 case T('m'):
1047 outputType = 'm';
1048 j++;
1049 break;
1050 case T('c'):
1051 outputType = 'c';
1052 useNamespaces = 0;
1053 j++;
1054 break;
1055 case T('t'):
1056 outputType = 't';
1057 j++;
1058 break;
1059 case T('N'):
1060 requiresNotations = 1;
1061 j++;
1062 break;
1063 case T('d'):
1064 XMLWF_SHIFT_ARG_INTO(outputDir, argc, argv, i, j);
1065 break;
1066 case T('e'):
1067 XMLWF_SHIFT_ARG_INTO(encoding, argc, argv, i, j);
1068 break;
1069 case T('h'):
1070 usage(argv[0], XMLWF_EXIT_SUCCESS);
1071 // usage called exit(..), never gets here
1072 case T('v'):
1073 showVersion(argv[0]);
1074 return XMLWF_EXIT_SUCCESS;
1075 case T('g'): {
1076 const XML_Char *valueText = NULL;
1077 XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1078
1079 errno = 0;
1080 XML_Char *afterValueText = (XML_Char *)valueText;
1081 const long long read_size_bytes_candidate
1082 = tcstoull(valueText, &afterValueText, 10);
1083 if ((errno != 0) || (afterValueText[0] != T('\0'))
1084 || (read_size_bytes_candidate < 1)
1085 || (read_size_bytes_candidate > (INT_MAX / 2 + 1))) {
1086 // This prevents tperror(..) from reporting misleading "[..]: Success"
1087 errno = ERANGE;
1088 tperror(T("invalid buffer size") T(
1089 " (needs an integer from 1 to INT_MAX/2+1 i.e. 1,073,741,824 on most platforms)"));
1090 exit(XMLWF_EXIT_USAGE_ERROR);
1091 }
1092 g_read_size_bytes = (int)read_size_bytes_candidate;
1093 break;
1094 }
1095 case T('k'):
1096 continueOnError = 1;
1097 j++;
1098 break;
1099 case T('a'): {
1100 const XML_Char *valueText = NULL;
1101 XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1102
1103 errno = 0;
1104 XML_Char *afterValueText = NULL;
1105 attackMaximumAmplification = tcstof(valueText, &afterValueText);
1106 if ((errno != 0) || (afterValueText[0] != T('\0'))
1107 || isnan(attackMaximumAmplification)
1108 || (attackMaximumAmplification < 1.0f)) {
1109 // This prevents tperror(..) from reporting misleading "[..]: Success"
1110 errno = ERANGE;
1111 tperror(T("invalid amplification limit") T(
1112 " (needs a floating point number greater or equal than 1.0)"));
1113 exit(XMLWF_EXIT_USAGE_ERROR);
1114 }
1115 #if XML_GE == 0
1116 ftprintf(stderr,
1117 T("Warning: Given amplification limit ignored")
1118 T(", xmlwf has been compiled without DTD/GE support.\n"));
1119 #endif
1120 break;
1121 }
1122 case T('b'): {
1123 const XML_Char *valueText = NULL;
1124 XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1125
1126 errno = 0;
1127 XML_Char *afterValueText = (XML_Char *)valueText;
1128 attackThresholdBytes = tcstoull(valueText, &afterValueText, 10);
1129 if ((errno != 0) || (afterValueText[0] != T('\0'))) {
1130 // This prevents tperror(..) from reporting misleading "[..]: Success"
1131 errno = ERANGE;
1132 tperror(T("invalid ignore threshold")
1133 T(" (needs an integer from 0 to 2^64-1)"));
1134 exit(XMLWF_EXIT_USAGE_ERROR);
1135 }
1136 attackThresholdGiven = XML_TRUE;
1137 #if XML_GE == 0
1138 ftprintf(stderr,
1139 T("Warning: Given attack threshold ignored")
1140 T(", xmlwf has been compiled without DTD/GE support.\n"));
1141 #endif
1142 break;
1143 }
1144 case T('q'): {
1145 disableDeferral = XML_TRUE;
1146 j++;
1147 break;
1148 }
1149 case T('\0'):
1150 if (j > 1) {
1151 i++;
1152 j = 0;
1153 break;
1154 }
1155 /* fall through */
1156 default:
1157 usage(argv[0], XMLWF_EXIT_USAGE_ERROR);
1158 // usage called exit(..), never gets here
1159 }
1160 }
1161 if (i == argc) {
1162 useStdin = 1;
1163 processFlags &= ~XML_MAP_FILE;
1164 i--;
1165 }
1166 for (; i < argc; i++) {
1167 XML_Char *outName = 0;
1168 int result;
1169 XML_Parser parser;
1170 if (useNamespaces)
1171 parser = XML_ParserCreateNS(encoding, NSSEP);
1172 else
1173 parser = XML_ParserCreate(encoding);
1174
1175 if (! parser) {
1176 tperror(T("Could not instantiate parser"));
1177 exit(XMLWF_EXIT_INTERNAL_ERROR);
1178 }
1179
1180 if (attackMaximumAmplification != -1.0f) {
1181 #if XML_GE == 1
1182 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
1183 parser, attackMaximumAmplification);
1184 XML_SetAllocTrackerMaximumAmplification(parser,
1185 attackMaximumAmplification);
1186 #endif
1187 }
1188 if (attackThresholdGiven) {
1189 #if XML_GE == 1
1190 XML_SetBillionLaughsAttackProtectionActivationThreshold(
1191 parser, attackThresholdBytes);
1192 XML_SetAllocTrackerActivationThreshold(parser, attackThresholdBytes);
1193 #else
1194 (void)attackThresholdBytes; // silence -Wunused-but-set-variable
1195 #endif
1196 }
1197
1198 if (disableDeferral) {
1199 const XML_Bool success = XML_SetReparseDeferralEnabled(parser, XML_FALSE);
1200 if (! success) {
1201 // This prevents tperror(..) from reporting misleading "[..]: Success"
1202 errno = EINVAL;
1203 tperror(T("Failed to disable reparse deferral"));
1204 exit(XMLWF_EXIT_INTERNAL_ERROR);
1205 }
1206 }
1207
1208 if (requireStandalone)
1209 XML_SetNotStandaloneHandler(parser, notStandalone);
1210 XML_SetParamEntityParsing(parser, paramEntityParsing);
1211 if (outputType == 't') {
1212 /* This is for doing timings; this gives a more realistic estimate of
1213 the parsing time. */
1214 outputDir = 0;
1215 XML_SetElementHandler(parser, nopStartElement, nopEndElement);
1216 XML_SetCharacterDataHandler(parser, nopCharacterData);
1217 XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction);
1218 } else if (outputDir) {
1219 const XML_Char *delim = T("/");
1220 const XML_Char *file = useStdin ? T("STDIN") : argv[i];
1221 if (! useStdin) {
1222 /* Jump after last (back)slash */
1223 const XML_Char *lastDelim = tcsrchr(file, delim[0]);
1224 if (lastDelim)
1225 file = lastDelim + 1;
1226 #if defined(_WIN32)
1227 else {
1228 const XML_Char *winDelim = T("\\");
1229 lastDelim = tcsrchr(file, winDelim[0]);
1230 if (lastDelim) {
1231 file = lastDelim + 1;
1232 delim = winDelim;
1233 }
1234 }
1235 #endif
1236 }
1237 outName = (XML_Char *)malloc((tcslen(outputDir) + tcslen(file) + 2)
1238 * sizeof(XML_Char));
1239 if (! outName) {
1240 tperror(T("Could not allocate memory"));
1241 exit(XMLWF_EXIT_INTERNAL_ERROR);
1242 }
1243 tcscpy(outName, outputDir);
1244 tcscat(outName, delim);
1245 tcscat(outName, file);
1246 userData.fp = tfopen(outName, T("wb"));
1247 if (! userData.fp) {
1248 tperror(outName);
1249 exitCode = XMLWF_EXIT_OUTPUT_ERROR;
1250 free(outName);
1251 XML_ParserFree(parser);
1252 if (continueOnError) {
1253 continue;
1254 } else {
1255 break;
1256 }
1257 }
1258 setvbuf(userData.fp, NULL, _IOFBF, 16384);
1259 #ifdef XML_UNICODE
1260 puttc(0xFEFF, userData.fp);
1261 #endif
1262 XML_SetUserData(parser, &userData);
1263 switch (outputType) {
1264 case 'm':
1265 XML_UseParserAsHandlerArg(parser);
1266 XML_SetElementHandler(parser, metaStartElement, metaEndElement);
1267 XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
1268 XML_SetCommentHandler(parser, metaComment);
1269 XML_SetCdataSectionHandler(parser, metaStartCdataSection,
1270 metaEndCdataSection);
1271 XML_SetCharacterDataHandler(parser, metaCharacterData);
1272 XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl,
1273 metaEndDoctypeDecl);
1274 XML_SetEntityDeclHandler(parser, metaEntityDecl);
1275 XML_SetNotationDeclHandler(parser, metaNotationDecl);
1276 XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl,
1277 metaEndNamespaceDecl);
1278 metaStartDocument(parser);
1279 break;
1280 case 'c':
1281 XML_UseParserAsHandlerArg(parser);
1282 XML_SetDefaultHandler(parser, markup);
1283 XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
1284 XML_SetCharacterDataHandler(parser, defaultCharacterData);
1285 XML_SetProcessingInstructionHandler(parser,
1286 defaultProcessingInstruction);
1287 break;
1288 default:
1289 if (useNamespaces)
1290 XML_SetElementHandler(parser, startElementNS, endElementNS);
1291 else
1292 XML_SetElementHandler(parser, startElement, endElement);
1293 XML_SetCharacterDataHandler(parser, characterData);
1294 #ifndef W3C14N
1295 XML_SetProcessingInstructionHandler(parser, processingInstruction);
1296 if (requiresNotations) {
1297 XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl);
1298 XML_SetNotationDeclHandler(parser, notationDecl);
1299 }
1300 #endif /* not W3C14N */
1301 break;
1302 }
1303 }
1304 if (windowsCodePages)
1305 XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
1306 result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags);
1307 if (outputDir) {
1308 if (outputType == 'm')
1309 metaEndDocument(parser);
1310 fclose(userData.fp);
1311 if (! result) {
1312 tremove(outName);
1313 }
1314 free(outName);
1315 }
1316 XML_ParserFree(parser);
1317 if (! result) {
1318 exitCode = XMLWF_EXIT_NOT_WELLFORMED;
1319 cleanupUserData(&userData);
1320 if (! continueOnError) {
1321 break;
1322 }
1323 }
1324 }
1325 return exitCode;
1326 }
1327