1 /* 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 Copyright (c) 2004-2009 Karl Waclawek <karl@waclawek.net> 13 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 14 Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> 15 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> 16 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 17 Copyright (c) 2020 Joe Orton <jorton@redhat.com> 18 Copyright (c) 2020 Kleber Tarcísio <klebertarcisio@yahoo.com.br> 19 Copyright (c) 2021 Tim Bray <tbray@textuality.com> 20 Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com> 21 Copyright (c) 2022 Sean McBride <sean@rogue-research.com> 22 Copyright (c) 2025 Alfonso Gregory <gfunni234@gmail.com> 23 Copyright (c) 2026 Matthew Fernandez <matthew.fernandez@gmail.com> 24 Copyright (c) 2026 Nick Begg <nick@stunttruck.net> 25 Copyright (c) 2026 Kartik Kenchi <netliomax25@gmail.com> 26 Licensed under the MIT license: 27 28 Permission is hereby granted, free of charge, to any person obtaining 29 a copy of this software and associated documentation files (the 30 "Software"), to deal in the Software without restriction, including 31 without limitation the rights to use, copy, modify, merge, publish, 32 distribute, sublicense, and/or sell copies of the Software, and to permit 33 persons to whom the Software is furnished to do so, subject to the 34 following conditions: 35 36 The above copyright notice and this permission notice shall be included 37 in all copies or substantial portions of the Software. 38 39 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 40 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 41 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 42 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 43 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 44 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 45 USE OR OTHER DEALINGS IN THE SOFTWARE. 46 */ 47 48 #include "expat_config.h" 49 50 #include <assert.h> 51 #include <stdio.h> 52 #include <stdlib.h> 53 #include <stddef.h> 54 #include <string.h> 55 #include <math.h> /* for isnan */ 56 #include <errno.h> 57 58 #include "expat.h" 59 #include "codepage.h" 60 #include "internal.h" /* for UNUSED_P only */ 61 #include "fallthrough.h" 62 #include "xmlfile.h" 63 #include "xmltchar.h" 64 65 #ifdef _MSC_VER 66 # include <crtdbg.h> 67 #endif 68 69 #ifdef XML_UNICODE 70 # include <wchar.h> 71 #endif 72 73 #include "../lib/xcsinc.c" 74 75 enum ExitCode { 76 XMLWF_EXIT_SUCCESS = 0, 77 XMLWF_EXIT_INTERNAL_ERROR = 1, 78 XMLWF_EXIT_NOT_WELLFORMED = 2, 79 XMLWF_EXIT_OUTPUT_ERROR = 3, 80 XMLWF_EXIT_USAGE_ERROR = 4, 81 }; 82 83 /* Structures for handler user data */ 84 typedef struct NotationList { 85 struct NotationList *next; 86 const XML_Char *notationName; 87 const XML_Char *systemId; 88 const XML_Char *publicId; 89 } NotationList; 90 91 typedef struct xmlwfUserData { 92 FILE *fp; 93 NotationList *notationListHead; 94 const XML_Char *currentDoctypeName; 95 } XmlwfUserData; 96 97 /* This ensures proper sorting. */ 98 99 #define NSSEP T('\001') 100 101 static void XMLCALL 102 characterData(void *userData, const XML_Char *s, int len) { 103 FILE *fp = ((XmlwfUserData *)userData)->fp; 104 for (; len > 0; --len, ++s) { 105 switch (*s) { 106 case T('&'): 107 fputts(T("&"), fp); 108 break; 109 case T('<'): 110 fputts(T("<"), fp); 111 break; 112 case T('>'): 113 fputts(T(">"), fp); 114 break; 115 #ifdef W3C14N 116 case 13: 117 fputts(T("
"), fp); 118 break; 119 #else 120 case T('"'): 121 fputts(T("""), fp); 122 break; 123 case 9: 124 case 10: 125 case 13: 126 ftprintf(fp, T("&#%d;"), *s); 127 break; 128 #endif 129 default: 130 puttc(*s, fp); 131 break; 132 } 133 } 134 } 135 136 static void 137 attributeValue(FILE *fp, const XML_Char *s) { 138 puttc(T('='), fp); 139 puttc(T('"'), fp); 140 assert(s); 141 for (;;) { 142 switch (*s) { 143 case 0: 144 case NSSEP: 145 puttc(T('"'), fp); 146 return; 147 case T('&'): 148 fputts(T("&"), fp); 149 break; 150 case T('<'): 151 fputts(T("<"), fp); 152 break; 153 case T('"'): 154 fputts(T("""), fp); 155 break; 156 #ifdef W3C14N 157 case 9: 158 fputts(T("	"), fp); 159 break; 160 case 10: 161 fputts(T("
"), fp); 162 break; 163 case 13: 164 fputts(T("
"), fp); 165 break; 166 #else 167 case T('>'): 168 fputts(T(">"), fp); 169 break; 170 case 9: 171 case 10: 172 case 13: 173 ftprintf(fp, T("&#%d;"), *s); 174 break; 175 #endif 176 default: 177 puttc(*s, fp); 178 break; 179 } 180 s++; 181 } 182 } 183 184 /* Lexicographically comparing UTF-8 encoded attribute values, 185 is equivalent to lexicographically comparing based on the character number. */ 186 187 static int 188 attcmp(const void *att1, const void *att2) { 189 return tcscmp(*(const XML_Char *const *)att1, *(const XML_Char *const *)att2); 190 } 191 192 static void XMLCALL 193 startElement(void *userData, const XML_Char *name, const XML_Char **atts) { 194 int nAtts; 195 const XML_Char **p; 196 FILE *fp = ((XmlwfUserData *)userData)->fp; 197 puttc(T('<'), fp); 198 fputts(name, fp); 199 200 p = atts; 201 while (*p) 202 ++p; 203 nAtts = (int)((p - atts) >> 1); 204 if (nAtts > 1) 205 qsort(atts, nAtts, sizeof(XML_Char *) * 2, attcmp); 206 while (*atts) { 207 puttc(T(' '), fp); 208 fputts(*atts++, fp); 209 attributeValue(fp, *atts); 210 atts++; 211 } 212 puttc(T('>'), fp); 213 } 214 215 static void XMLCALL 216 endElement(void *userData, const XML_Char *name) { 217 FILE *fp = ((XmlwfUserData *)userData)->fp; 218 puttc(T('<'), fp); 219 puttc(T('/'), fp); 220 fputts(name, fp); 221 puttc(T('>'), fp); 222 } 223 224 static int 225 nsattcmp(const void *p1, const void *p2) { 226 const XML_Char *att1 = *(const XML_Char *const *)p1; 227 const XML_Char *att2 = *(const XML_Char *const *)p2; 228 int sep1 = (tcsrchr(att1, NSSEP) != 0); 229 int sep2 = (tcsrchr(att2, NSSEP) != 0); 230 if (sep1 != sep2) 231 return sep1 - sep2; 232 return tcscmp(att1, att2); 233 } 234 235 static void XMLCALL 236 startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) { 237 int nAtts; 238 int nsi; 239 const XML_Char **p; 240 FILE *fp = ((XmlwfUserData *)userData)->fp; 241 const XML_Char *sep; 242 puttc(T('<'), fp); 243 244 sep = tcsrchr(name, NSSEP); 245 if (sep) { 246 fputts(T("n1:"), fp); 247 fputts(sep + 1, fp); 248 fputts(T(" xmlns:n1"), fp); 249 attributeValue(fp, name); 250 nsi = 2; 251 } else { 252 fputts(name, fp); 253 nsi = 1; 254 } 255 256 p = atts; 257 while (*p) 258 ++p; 259 nAtts = (int)((p - atts) >> 1); 260 if (nAtts > 1) 261 qsort(atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp); 262 while (*atts) { 263 name = *atts++; 264 sep = tcsrchr(name, NSSEP); 265 puttc(T(' '), fp); 266 if (sep) { 267 ftprintf(fp, T("n%d:"), nsi); 268 fputts(sep + 1, fp); 269 } else 270 fputts(name, fp); 271 attributeValue(fp, *atts); 272 if (sep) { 273 ftprintf(fp, T(" xmlns:n%d"), nsi++); 274 attributeValue(fp, name); 275 } 276 atts++; 277 } 278 puttc(T('>'), fp); 279 } 280 281 static void XMLCALL 282 endElementNS(void *userData, const XML_Char *name) { 283 FILE *fp = ((XmlwfUserData *)userData)->fp; 284 const XML_Char *sep; 285 puttc(T('<'), fp); 286 puttc(T('/'), fp); 287 sep = tcsrchr(name, NSSEP); 288 if (sep) { 289 fputts(T("n1:"), fp); 290 fputts(sep + 1, fp); 291 } else 292 fputts(name, fp); 293 puttc(T('>'), fp); 294 } 295 296 #ifndef W3C14N 297 298 static void XMLCALL 299 processingInstruction(void *userData, const XML_Char *target, 300 const XML_Char *data) { 301 FILE *fp = ((XmlwfUserData *)userData)->fp; 302 puttc(T('<'), fp); 303 puttc(T('?'), fp); 304 fputts(target, fp); 305 puttc(T(' '), fp); 306 fputts(data, fp); 307 puttc(T('?'), fp); 308 puttc(T('>'), fp); 309 } 310 311 static XML_Char * 312 xcsdup(const XML_Char *s) { 313 const size_t count = xcslen(s) + /* null terminator */ 1; 314 315 // Detect and prevent integer overflow 316 if (count > SIZE_MAX / sizeof(XML_Char)) 317 return NULL; 318 319 const size_t numBytes = count * sizeof(XML_Char); 320 XML_Char *const result = malloc(numBytes); 321 if (result == NULL) 322 return NULL; 323 memcpy(result, s, numBytes); 324 return result; 325 } 326 327 static void XMLCALL 328 startDoctypeDecl(void *userData, const XML_Char *doctypeName, 329 const XML_Char *sysid, const XML_Char *publid, 330 int has_internal_subset) { 331 XmlwfUserData *data = userData; 332 UNUSED_P(sysid); 333 UNUSED_P(publid); 334 UNUSED_P(has_internal_subset); 335 data->currentDoctypeName = xcsdup(doctypeName); 336 } 337 338 static void 339 freeNotations(XmlwfUserData *data) { 340 NotationList *notationListHead = data->notationListHead; 341 342 while (notationListHead != NULL) { 343 NotationList *next = notationListHead->next; 344 free((void *)notationListHead->notationName); 345 free((void *)notationListHead->systemId); 346 free((void *)notationListHead->publicId); 347 free(notationListHead); 348 notationListHead = next; 349 } 350 data->notationListHead = NULL; 351 } 352 353 static void 354 cleanupUserData(XmlwfUserData *userData) { 355 free((void *)userData->currentDoctypeName); 356 userData->currentDoctypeName = NULL; 357 freeNotations(userData); 358 } 359 360 static int 361 xcscmp(const XML_Char *xs, const XML_Char *xt) { 362 while (*xs != 0 && *xt != 0) { 363 if (*xs < *xt) 364 return -1; 365 if (*xs > *xt) 366 return 1; 367 xs++; 368 xt++; 369 } 370 if (*xs < *xt) 371 return -1; 372 if (*xs > *xt) 373 return 1; 374 return 0; 375 } 376 377 static int 378 notationCmp(const void *a, const void *b) { 379 const NotationList *const n1 = *(const NotationList *const *)a; 380 const NotationList *const n2 = *(const NotationList *const *)b; 381 382 return xcscmp(n1->notationName, n2->notationName); 383 } 384 385 /* Write a SystemLiteral/PubidLiteral, choosing a delimiter that does not 386 occur in the value. The grammar forbids a literal from containing its 387 own delimiter, so a value reported by Expat never holds both quote 388 characters and a safe delimiter always exists. */ 389 static void 390 writeLiteral(FILE *fp, const XML_Char *value) { 391 const XML_Char quote = (tcschr(value, T('\'')) != NULL) ? T('"') : T('\''); 392 puttc(quote, fp); 393 fputts(value, fp); 394 puttc(quote, fp); 395 } 396 397 static void XMLCALL 398 endDoctypeDecl(void *userData) { 399 XmlwfUserData *data = userData; 400 NotationList **notations; 401 size_t notationCount = 0; 402 NotationList *p; 403 size_t i; 404 405 /* How many notations do we have? */ 406 for (p = data->notationListHead; p != NULL; p = p->next) 407 notationCount++; 408 if (notationCount == 0) { 409 /* Nothing to report */ 410 goto cleanUp; 411 } 412 413 /* Detect and prevent integer overflow in the multiplication, mirroring 414 the guards in xcsdup() and resolveSystemId() */ 415 if (notationCount > SIZE_MAX / sizeof(NotationList *)) { 416 fprintf(stderr, "Unable to sort notations"); 417 goto cleanUp; 418 } 419 420 notations = malloc(notationCount * sizeof(NotationList *)); 421 if (notations == NULL) { 422 fprintf(stderr, "Unable to sort notations"); 423 goto cleanUp; 424 } 425 426 for (p = data->notationListHead, i = 0; i < notationCount; p = p->next, i++) { 427 notations[i] = p; 428 } 429 qsort(notations, notationCount, sizeof(NotationList *), notationCmp); 430 431 /* Output the DOCTYPE header */ 432 fputts(T("<!DOCTYPE "), data->fp); 433 fputts(data->currentDoctypeName, data->fp); 434 fputts(T(" [\n"), data->fp); 435 436 /* Now the NOTATIONs */ 437 for (i = 0; i < notationCount; i++) { 438 fputts(T("<!NOTATION "), data->fp); 439 fputts(notations[i]->notationName, data->fp); 440 if (notations[i]->publicId != NULL) { 441 fputts(T(" PUBLIC "), data->fp); 442 writeLiteral(data->fp, notations[i]->publicId); 443 if (notations[i]->systemId != NULL) { 444 puttc(T(' '), data->fp); 445 writeLiteral(data->fp, notations[i]->systemId); 446 } 447 } else if (notations[i]->systemId != NULL) { 448 fputts(T(" SYSTEM "), data->fp); 449 writeLiteral(data->fp, notations[i]->systemId); 450 } 451 puttc(T('>'), data->fp); 452 puttc(T('\n'), data->fp); 453 } 454 455 /* Finally end the DOCTYPE */ 456 fputts(T("]>\n"), data->fp); 457 458 free(notations); 459 460 cleanUp: 461 freeNotations(data); 462 free((void *)data->currentDoctypeName); 463 data->currentDoctypeName = NULL; 464 } 465 466 static void XMLCALL 467 notationDecl(void *userData, const XML_Char *notationName, const XML_Char *base, 468 const XML_Char *systemId, const XML_Char *publicId) { 469 XmlwfUserData *data = userData; 470 NotationList *entry = malloc(sizeof(NotationList)); 471 const char *errorMessage = "Unable to store NOTATION for output\n"; 472 473 UNUSED_P(base); 474 if (entry == NULL) { 475 fputs(errorMessage, stderr); 476 return; /* Nothing we can really do about this */ 477 } 478 entry->notationName = xcsdup(notationName); 479 if (entry->notationName == NULL) { 480 fputs(errorMessage, stderr); 481 free(entry); 482 return; 483 } 484 if (systemId != NULL) { 485 entry->systemId = xcsdup(systemId); 486 if (entry->systemId == NULL) { 487 fputs(errorMessage, stderr); 488 free((void *)entry->notationName); 489 free(entry); 490 return; 491 } 492 } else { 493 entry->systemId = NULL; 494 } 495 if (publicId != NULL) { 496 entry->publicId = xcsdup(publicId); 497 if (entry->publicId == NULL) { 498 fputs(errorMessage, stderr); 499 free((void *)entry->systemId); /* Safe if it's NULL */ 500 free((void *)entry->notationName); 501 free(entry); 502 return; 503 } 504 } else { 505 entry->publicId = NULL; 506 } 507 508 entry->next = data->notationListHead; 509 data->notationListHead = entry; 510 } 511 512 #endif /* not W3C14N */ 513 514 static void XMLCALL 515 defaultCharacterData(void *userData, const XML_Char *s, int len) { 516 UNUSED_P(s); 517 UNUSED_P(len); 518 XML_DefaultCurrent(userData); 519 } 520 521 static void XMLCALL 522 defaultStartElement(void *userData, const XML_Char *name, 523 const XML_Char **atts) { 524 UNUSED_P(name); 525 UNUSED_P(atts); 526 XML_DefaultCurrent(userData); 527 } 528 529 static void XMLCALL 530 defaultEndElement(void *userData, const XML_Char *name) { 531 UNUSED_P(name); 532 XML_DefaultCurrent(userData); 533 } 534 535 static void XMLCALL 536 defaultProcessingInstruction(void *userData, const XML_Char *target, 537 const XML_Char *data) { 538 UNUSED_P(target); 539 UNUSED_P(data); 540 XML_DefaultCurrent(userData); 541 } 542 543 static void XMLCALL 544 nopCharacterData(void *userData, const XML_Char *s, int len) { 545 UNUSED_P(userData); 546 UNUSED_P(s); 547 UNUSED_P(len); 548 } 549 550 static void XMLCALL 551 nopStartElement(void *userData, const XML_Char *name, const XML_Char **atts) { 552 UNUSED_P(userData); 553 UNUSED_P(name); 554 UNUSED_P(atts); 555 } 556 557 static void XMLCALL 558 nopEndElement(void *userData, const XML_Char *name) { 559 UNUSED_P(userData); 560 UNUSED_P(name); 561 } 562 563 static void XMLCALL 564 nopProcessingInstruction(void *userData, const XML_Char *target, 565 const XML_Char *data) { 566 UNUSED_P(userData); 567 UNUSED_P(target); 568 UNUSED_P(data); 569 } 570 571 static void XMLCALL 572 markup(void *userData, const XML_Char *s, int len) { 573 FILE *fp = ((XmlwfUserData *)XML_GetUserData(userData))->fp; 574 for (; len > 0; --len, ++s) 575 puttc(*s, fp); 576 } 577 578 static void 579 metaLocation(XML_Parser parser) { 580 const XML_Char *uri = XML_GetBase(parser); 581 FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp; 582 if (uri) { 583 fputts(T(" uri=\""), fp); 584 characterData(XML_GetUserData(parser), uri, (int)tcslen(uri)); 585 puttc(T('"'), fp); 586 } 587 ftprintf(fp, 588 T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"") T(" nbytes=\"%d\"") 589 T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"") T(" col=\"%") 590 T(XML_FMT_INT_MOD) T("u\""), 591 XML_GetCurrentByteIndex(parser), XML_GetCurrentByteCount(parser), 592 XML_GetCurrentLineNumber(parser), 593 XML_GetCurrentColumnNumber(parser)); 594 } 595 596 static void 597 metaStartDocument(void *userData) { 598 fputts(T("<document>\n"), ((XmlwfUserData *)XML_GetUserData(userData))->fp); 599 } 600 601 static void 602 metaEndDocument(void *userData) { 603 fputts(T("</document>\n"), ((XmlwfUserData *)XML_GetUserData(userData))->fp); 604 } 605 606 static void XMLCALL 607 metaStartElement(void *userData, const XML_Char *name, const XML_Char **atts) { 608 XML_Parser parser = userData; 609 XmlwfUserData *data = XML_GetUserData(parser); 610 FILE *fp = data->fp; 611 const XML_Char **specifiedAttsEnd 612 = atts + XML_GetSpecifiedAttributeCount(parser); 613 const XML_Char **idAttPtr; 614 int idAttIndex = XML_GetIdAttributeIndex(parser); 615 if (idAttIndex < 0) 616 idAttPtr = 0; 617 else 618 idAttPtr = atts + idAttIndex; 619 620 fputts(T("<starttag name=\""), fp); 621 characterData(data, name, (int)tcslen(name)); 622 puttc(T('"'), fp); 623 metaLocation(parser); 624 if (*atts) { 625 fputts(T(">\n"), fp); 626 do { 627 fputts(T("<attribute name=\""), fp); 628 characterData(data, atts[0], (int)tcslen(atts[0])); 629 fputts(T("\" value=\""), fp); 630 characterData(data, atts[1], (int)tcslen(atts[1])); 631 if (atts >= specifiedAttsEnd) 632 fputts(T("\" defaulted=\"yes\"/>\n"), fp); 633 else if (atts == idAttPtr) 634 fputts(T("\" id=\"yes\"/>\n"), fp); 635 else 636 fputts(T("\"/>\n"), fp); 637 } while (*(atts += 2)); 638 fputts(T("</starttag>\n"), fp); 639 } else 640 fputts(T("/>\n"), fp); 641 } 642 643 static void XMLCALL 644 metaEndElement(void *userData, const XML_Char *name) { 645 XML_Parser parser = userData; 646 XmlwfUserData *data = XML_GetUserData(parser); 647 FILE *fp = data->fp; 648 fputts(T("<endtag name=\""), fp); 649 characterData(data, name, (int)tcslen(name)); 650 puttc(T('"'), fp); 651 metaLocation(parser); 652 fputts(T("/>\n"), fp); 653 } 654 655 static void XMLCALL 656 metaProcessingInstruction(void *userData, const XML_Char *target, 657 const XML_Char *data) { 658 XML_Parser parser = userData; 659 XmlwfUserData *usrData = XML_GetUserData(parser); 660 FILE *fp = usrData->fp; 661 ftprintf(fp, T("<pi target=\"%s\" data=\""), target); 662 characterData(usrData, data, (int)tcslen(data)); 663 puttc(T('"'), fp); 664 metaLocation(parser); 665 fputts(T("/>\n"), fp); 666 } 667 668 static void XMLCALL 669 metaComment(void *userData, const XML_Char *data) { 670 XML_Parser parser = userData; 671 XmlwfUserData *usrData = XML_GetUserData(parser); 672 FILE *fp = usrData->fp; 673 fputts(T("<comment data=\""), fp); 674 characterData(usrData, data, (int)tcslen(data)); 675 puttc(T('"'), fp); 676 metaLocation(parser); 677 fputts(T("/>\n"), fp); 678 } 679 680 static void XMLCALL 681 metaStartCdataSection(void *userData) { 682 XML_Parser parser = userData; 683 XmlwfUserData *data = XML_GetUserData(parser); 684 FILE *fp = data->fp; 685 fputts(T("<startcdata"), fp); 686 metaLocation(parser); 687 fputts(T("/>\n"), fp); 688 } 689 690 static void XMLCALL 691 metaEndCdataSection(void *userData) { 692 XML_Parser parser = userData; 693 XmlwfUserData *data = XML_GetUserData(parser); 694 FILE *fp = data->fp; 695 fputts(T("<endcdata"), fp); 696 metaLocation(parser); 697 fputts(T("/>\n"), fp); 698 } 699 700 static void XMLCALL 701 metaCharacterData(void *userData, const XML_Char *s, int len) { 702 XML_Parser parser = userData; 703 XmlwfUserData *data = XML_GetUserData(parser); 704 FILE *fp = data->fp; 705 fputts(T("<chars str=\""), fp); 706 characterData(data, s, len); 707 puttc(T('"'), fp); 708 metaLocation(parser); 709 fputts(T("/>\n"), fp); 710 } 711 712 static void XMLCALL 713 metaStartDoctypeDecl(void *userData, const XML_Char *doctypeName, 714 const XML_Char *sysid, const XML_Char *pubid, 715 int has_internal_subset) { 716 XML_Parser parser = userData; 717 XmlwfUserData *data = XML_GetUserData(parser); 718 FILE *fp = data->fp; 719 UNUSED_P(sysid); 720 UNUSED_P(pubid); 721 UNUSED_P(has_internal_subset); 722 ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName); 723 metaLocation(parser); 724 fputts(T("/>\n"), fp); 725 } 726 727 static void XMLCALL 728 metaEndDoctypeDecl(void *userData) { 729 XML_Parser parser = userData; 730 XmlwfUserData *data = XML_GetUserData(parser); 731 FILE *fp = data->fp; 732 fputts(T("<enddoctype"), fp); 733 metaLocation(parser); 734 fputts(T("/>\n"), fp); 735 } 736 737 static void XMLCALL 738 metaNotationDecl(void *userData, const XML_Char *notationName, 739 const XML_Char *base, const XML_Char *systemId, 740 const XML_Char *publicId) { 741 XML_Parser parser = userData; 742 XmlwfUserData *data = XML_GetUserData(parser); 743 FILE *fp = data->fp; 744 UNUSED_P(base); 745 ftprintf(fp, T("<notation name=\"%s\""), notationName); 746 if (publicId) 747 ftprintf(fp, T(" public=\"%s\""), publicId); 748 if (systemId) { 749 fputts(T(" system=\""), fp); 750 characterData(data, systemId, (int)tcslen(systemId)); 751 puttc(T('"'), fp); 752 } 753 metaLocation(parser); 754 fputts(T("/>\n"), fp); 755 } 756 757 static void XMLCALL 758 metaEntityDecl(void *userData, const XML_Char *entityName, int is_param, 759 const XML_Char *value, int value_length, const XML_Char *base, 760 const XML_Char *systemId, const XML_Char *publicId, 761 const XML_Char *notationName) { 762 XML_Parser parser = userData; 763 XmlwfUserData *data = XML_GetUserData(parser); 764 FILE *fp = data->fp; 765 766 UNUSED_P(is_param); 767 UNUSED_P(base); 768 if (value) { 769 ftprintf(fp, T("<entity name=\"%s\""), entityName); 770 metaLocation(parser); 771 puttc(T('>'), fp); 772 characterData(data, value, value_length); 773 fputts(T("</entity/>\n"), fp); 774 } else if (notationName) { 775 ftprintf(fp, T("<entity name=\"%s\""), entityName); 776 if (publicId) 777 ftprintf(fp, T(" public=\"%s\""), publicId); 778 fputts(T(" system=\""), fp); 779 characterData(data, systemId, (int)tcslen(systemId)); 780 puttc(T('"'), fp); 781 ftprintf(fp, T(" notation=\"%s\""), notationName); 782 metaLocation(parser); 783 fputts(T("/>\n"), fp); 784 } else { 785 ftprintf(fp, T("<entity name=\"%s\""), entityName); 786 if (publicId) 787 ftprintf(fp, T(" public=\"%s\""), publicId); 788 fputts(T(" system=\""), fp); 789 characterData(data, systemId, (int)tcslen(systemId)); 790 puttc(T('"'), fp); 791 metaLocation(parser); 792 fputts(T("/>\n"), fp); 793 } 794 } 795 796 static void XMLCALL 797 metaStartNamespaceDecl(void *userData, const XML_Char *prefix, 798 const XML_Char *uri) { 799 XML_Parser parser = userData; 800 XmlwfUserData *data = XML_GetUserData(parser); 801 FILE *fp = data->fp; 802 fputts(T("<startns"), fp); 803 if (prefix) 804 ftprintf(fp, T(" prefix=\"%s\""), prefix); 805 if (uri) { 806 fputts(T(" ns=\""), fp); 807 characterData(data, uri, (int)tcslen(uri)); 808 fputts(T("\"/>\n"), fp); 809 } else 810 fputts(T("/>\n"), fp); 811 } 812 813 static void XMLCALL 814 metaEndNamespaceDecl(void *userData, const XML_Char *prefix) { 815 XML_Parser parser = userData; 816 XmlwfUserData *data = XML_GetUserData(parser); 817 FILE *fp = data->fp; 818 if (! prefix) 819 fputts(T("<endns/>\n"), fp); 820 else 821 ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix); 822 } 823 824 static int XMLCALL 825 unknownEncodingConvert(void *data, const char *p) { 826 return codepageConvert(*(int *)data, p); 827 } 828 829 static int XMLCALL 830 unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info) { 831 int cp; 832 static const XML_Char prefixL[] = T("windows-"); 833 static const XML_Char prefixU[] = T("WINDOWS-"); 834 int i; 835 836 UNUSED_P(userData); 837 for (i = 0; prefixU[i]; i++) 838 if (name[i] != prefixU[i] && name[i] != prefixL[i]) 839 return 0; 840 841 cp = 0; 842 for (; name[i]; i++) { 843 static const XML_Char digits[] = T("0123456789"); 844 const XML_Char *s = tcschr(digits, name[i]); 845 if (! s) 846 return 0; 847 cp *= 10; 848 cp += (int)(s - digits); 849 if (cp >= 0x10000) 850 return 0; 851 } 852 if (! codepageMap(cp, info->map)) 853 return 0; 854 info->convert = unknownEncodingConvert; 855 /* We could just cast the code page integer to a void *, 856 and avoid the use of release. */ 857 info->release = free; 858 info->data = malloc(sizeof(int)); 859 if (! info->data) 860 return 0; 861 *(int *)info->data = cp; 862 return 1; 863 } 864 865 static int XMLCALL 866 notStandalone(void *userData) { 867 UNUSED_P(userData); 868 return 0; 869 } 870 871 static void 872 showVersion(XML_Char *prog) { 873 XML_Char *s = prog; 874 XML_Char ch; 875 const XML_Feature *features = XML_GetFeatureList(); 876 while ((ch = *s) != 0) { 877 if (ch == '/' 878 #if defined(_WIN32) 879 || ch == '\\' 880 #endif 881 ) 882 prog = s + 1; 883 ++s; 884 } 885 ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion()); 886 if (features != NULL && features[0].feature != XML_FEATURE_END) { 887 int i = 1; 888 ftprintf(stdout, T("%s"), features[0].name); 889 if (features[0].value) 890 ftprintf(stdout, T("=%ld"), features[0].value); 891 while (features[i].feature != XML_FEATURE_END) { 892 ftprintf(stdout, T(", %s"), features[i].name); 893 if (features[i].value) 894 ftprintf(stdout, T("=%ld"), features[i].value); 895 ++i; 896 } 897 ftprintf(stdout, T("\n")); 898 } 899 } 900 901 #if defined(__GNUC__) 902 __attribute__((noreturn)) 903 #endif 904 static void 905 usage(const XML_Char *prog, int rc) { 906 ftprintf( 907 stderr, 908 /* Generated with: 909 * $ xmlwf/xmlwf_helpgen.sh 910 * To update, change xmlwf/xmlwf_helpgen.py, then paste the output of 911 * xmlwf/xmlwf_helpgen.sh in here. 912 */ 913 /* clang-format off */ 914 T("usage:\n") 915 T(" %s [OPTIONS] [FILE ...]\n") 916 T(" %s -h|--help\n") 917 T(" %s -v|--version\n") 918 T("\n") 919 T("xmlwf - Determines if an XML document is well-formed\n") 920 T("\n") 921 T("positional arguments:\n") 922 T(" FILE file to process (default: STDIN)\n") 923 T("\n") 924 T("input control arguments:\n") 925 T(" -s print an error if the document is not [s]tandalone\n") 926 T(" -n enable [n]amespace processing\n") 927 T(" -p enable processing of external DTDs and [p]arameter entities\n") 928 T(" -x enable processing of e[x]ternal entities\n") 929 T(" (CAREFUL! This makes xmlwf vulnerable to external entity attacks (XXE).)\n") 930 T(" -e ENCODING override any in-document [e]ncoding declaration\n") 931 T(" -w enable support for [W]indows code pages\n") 932 T(" -r disable memory-mapping and use [r]ead calls instead\n") 933 T(" -g BYTES buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)\n") 934 T(" -k when processing multiple files, [k]eep processing after first file with error\n") 935 T("\n") 936 T("output control arguments:\n") 937 T(" -d DIRECTORY output [d]estination directory\n") 938 T(" -c write a [c]opy of input XML, not canonical XML\n") 939 T(" -m write [m]eta XML, not canonical XML\n") 940 T(" -t write no XML output for [t]iming of plain parsing\n") 941 T(" -N enable adding doctype and [n]otation declarations\n") 942 T("\n") 943 T("amplification attack protection (e.g. billion laughs):\n") 944 T(" NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n") 945 T("\n") 946 T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n") 947 T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB/64 MiB)\n") 948 T("\n") 949 T("reparse deferral:\n") 950 T(" -q disable reparse deferral, and allow [q]uadratic parse runtime with large tokens\n") 951 T("\n") 952 T("info arguments:\n") 953 T(" -h, --help show this [h]elp message and exit\n") 954 T(" -v, --version show program's [v]ersion number and exit\n") 955 T("\n") 956 T("environment variables:\n") 957 T(" EXPAT_ACCOUNTING_DEBUG=(0|1|2|3)\n") 958 T(" Control verbosity of accounting debugging (default: 0)\n") 959 T(" EXPAT_ENTITY_DEBUG=(0|1|2)\n") 960 T(" Control verbosity of entity debugging (default: 0)\n") 961 T(" EXPAT_ENTROPY_DEBUG=(0|1)\n") 962 T(" Control verbosity of entropy debugging (default: 0)\n") 963 T(" EXPAT_MALLOC_DEBUG=(0|1|2)\n") 964 T(" Control verbosity of allocation tracker (default: 0)\n") 965 T("\n") 966 T("exit status:\n") 967 T(" 0 the input files are well-formed and the output (if requested) was written successfully\n") 968 T(" 1 could not allocate data structures, signals a serious problem with execution environment\n") 969 T(" 2 one or more input files were not well-formed\n") 970 T(" 3 could not create an output file\n") 971 T(" 4 command-line argument error\n") 972 T("\n") 973 T("xmlwf of libexpat is software libre, licensed under the MIT license.\n") 974 T("Please report bugs at https://github.com/libexpat/libexpat/issues -- thank you!\n") 975 , /* clang-format on */ 976 prog, prog, prog); 977 exit(rc); 978 } 979 980 #if defined(__MINGW32__) && defined(XML_UNICODE) 981 /* Silence warning about missing prototype */ 982 int wmain(int argc, XML_Char **argv); 983 #endif 984 985 #define XMLWF_SHIFT_ARG_INTO(constCharStarTarget, argc, argv, i, j) \ 986 { \ 987 if (argv[i][j + 1] == T('\0')) { \ 988 if (++i == argc) { \ 989 usage(argv[0], XMLWF_EXIT_USAGE_ERROR); \ 990 /* usage called exit(..), never gets here */ \ 991 } \ 992 constCharStarTarget = argv[i]; \ 993 } else { \ 994 constCharStarTarget = argv[i] + j + 1; \ 995 } \ 996 i++; \ 997 j = 0; \ 998 } 999 1000 int 1001 tmain(int argc, XML_Char **argv) { 1002 int i, j; 1003 const XML_Char *outputDir = NULL; 1004 const XML_Char *encoding = NULL; 1005 unsigned processFlags = XML_MAP_FILE; 1006 int windowsCodePages = 0; 1007 int outputType = 0; 1008 int useNamespaces = 0; 1009 int requireStandalone = 0; 1010 int requiresNotations = 0; 1011 int continueOnError = 0; 1012 1013 float attackMaximumAmplification = -1.0f; /* signaling "not set" */ 1014 unsigned long long attackThresholdBytes = 0; 1015 XML_Bool attackThresholdGiven = XML_FALSE; 1016 1017 XML_Bool disableDeferral = XML_FALSE; 1018 1019 int exitCode = XMLWF_EXIT_SUCCESS; 1020 enum XML_ParamEntityParsing paramEntityParsing 1021 = XML_PARAM_ENTITY_PARSING_NEVER; 1022 int useStdin = 0; 1023 XmlwfUserData userData = {NULL, NULL, NULL}; 1024 1025 #ifdef _MSC_VER 1026 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); 1027 #endif 1028 1029 i = 1; 1030 j = 0; 1031 while (i < argc) { 1032 if (j == 0) { 1033 if (argv[i][0] != T('-')) 1034 break; 1035 if (argv[i][1] == T('-')) { 1036 if (argv[i][2] == T('\0')) { 1037 i++; 1038 break; 1039 } else if (tcscmp(argv[i] + 2, T("help")) == 0) { 1040 usage(argv[0], XMLWF_EXIT_SUCCESS); 1041 // usage called exit(..), never gets here 1042 } else if (tcscmp(argv[i] + 2, T("version")) == 0) { 1043 showVersion(argv[0]); 1044 return XMLWF_EXIT_SUCCESS; 1045 } 1046 } 1047 j++; 1048 } 1049 switch (argv[i][j]) { 1050 case T('r'): 1051 processFlags &= ~XML_MAP_FILE; 1052 j++; 1053 break; 1054 case T('s'): 1055 requireStandalone = 1; 1056 j++; 1057 break; 1058 case T('n'): 1059 useNamespaces = 1; 1060 j++; 1061 break; 1062 case T('p'): 1063 paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS; 1064 EXPAT_FALLTHROUGH; 1065 case T('x'): 1066 processFlags |= XML_EXTERNAL_ENTITIES; 1067 j++; 1068 break; 1069 case T('w'): 1070 windowsCodePages = 1; 1071 j++; 1072 break; 1073 case T('m'): 1074 outputType = 'm'; 1075 j++; 1076 break; 1077 case T('c'): 1078 outputType = 'c'; 1079 useNamespaces = 0; 1080 j++; 1081 break; 1082 case T('t'): 1083 outputType = 't'; 1084 j++; 1085 break; 1086 case T('N'): 1087 requiresNotations = 1; 1088 j++; 1089 break; 1090 case T('d'): 1091 XMLWF_SHIFT_ARG_INTO(outputDir, argc, argv, i, j); 1092 break; 1093 case T('e'): 1094 XMLWF_SHIFT_ARG_INTO(encoding, argc, argv, i, j); 1095 break; 1096 case T('h'): 1097 usage(argv[0], XMLWF_EXIT_SUCCESS); 1098 // usage called exit(..), never gets here 1099 case T('v'): 1100 showVersion(argv[0]); 1101 return XMLWF_EXIT_SUCCESS; 1102 case T('g'): { 1103 const XML_Char *valueText = NULL; 1104 XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j); 1105 1106 errno = 0; 1107 XML_Char *afterValueText = (XML_Char *)valueText; 1108 const long long read_size_bytes_candidate 1109 = tcstoull(valueText, &afterValueText, 10); 1110 if ((errno != 0) || (afterValueText[0] != T('\0')) 1111 || (read_size_bytes_candidate < 1) 1112 || (read_size_bytes_candidate > (INT_MAX / 2 + 1))) { 1113 // This prevents tperror(..) from reporting misleading "[..]: Success" 1114 errno = ERANGE; 1115 tperror(T("invalid buffer size") T( 1116 " (needs an integer from 1 to INT_MAX/2+1 i.e. 1,073,741,824 on most platforms)")); 1117 exit(XMLWF_EXIT_USAGE_ERROR); 1118 } 1119 g_read_size_bytes = (int)read_size_bytes_candidate; 1120 break; 1121 } 1122 case T('k'): 1123 continueOnError = 1; 1124 j++; 1125 break; 1126 case T('a'): { 1127 const XML_Char *valueText = NULL; 1128 XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j); 1129 1130 errno = 0; 1131 XML_Char *afterValueText = NULL; 1132 attackMaximumAmplification = tcstof(valueText, &afterValueText); 1133 if ((errno != 0) || (afterValueText[0] != T('\0')) 1134 || isnan(attackMaximumAmplification) 1135 || (attackMaximumAmplification < 1.0f)) { 1136 // This prevents tperror(..) from reporting misleading "[..]: Success" 1137 errno = ERANGE; 1138 tperror(T("invalid amplification limit") T( 1139 " (needs a floating point number greater or equal than 1.0)")); 1140 exit(XMLWF_EXIT_USAGE_ERROR); 1141 } 1142 #if XML_GE == 0 1143 ftprintf(stderr, 1144 T("Warning: Given amplification limit ignored") 1145 T(", xmlwf has been compiled without DTD/GE support.\n")); 1146 #endif 1147 break; 1148 } 1149 case T('b'): { 1150 const XML_Char *valueText = NULL; 1151 XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j); 1152 1153 errno = 0; 1154 XML_Char *afterValueText = (XML_Char *)valueText; 1155 attackThresholdBytes = tcstoull(valueText, &afterValueText, 10); 1156 if ((errno != 0) || (afterValueText[0] != T('\0'))) { 1157 // This prevents tperror(..) from reporting misleading "[..]: Success" 1158 errno = ERANGE; 1159 tperror(T("invalid ignore threshold") 1160 T(" (needs an integer from 0 to 2^64-1)")); 1161 exit(XMLWF_EXIT_USAGE_ERROR); 1162 } 1163 attackThresholdGiven = XML_TRUE; 1164 #if XML_GE == 0 1165 ftprintf(stderr, 1166 T("Warning: Given attack threshold ignored") 1167 T(", xmlwf has been compiled without DTD/GE support.\n")); 1168 #endif 1169 break; 1170 } 1171 case T('q'): { 1172 disableDeferral = XML_TRUE; 1173 j++; 1174 break; 1175 } 1176 case T('\0'): 1177 if (j > 1) { 1178 i++; 1179 j = 0; 1180 break; 1181 } 1182 EXPAT_FALLTHROUGH; 1183 default: 1184 usage(argv[0], XMLWF_EXIT_USAGE_ERROR); 1185 // usage called exit(..), never gets here 1186 } 1187 } 1188 if (i == argc) { 1189 useStdin = 1; 1190 processFlags &= ~XML_MAP_FILE; 1191 i--; 1192 } 1193 for (; i < argc; i++) { 1194 XML_Char *outName = 0; 1195 int result; 1196 XML_Parser parser; 1197 if (useNamespaces) 1198 parser = XML_ParserCreateNS(encoding, NSSEP); 1199 else 1200 parser = XML_ParserCreate(encoding); 1201 1202 if (! parser) { 1203 tperror(T("Could not instantiate parser")); 1204 exit(XMLWF_EXIT_INTERNAL_ERROR); 1205 } 1206 1207 if (attackMaximumAmplification != -1.0f) { 1208 #if XML_GE == 1 1209 XML_SetBillionLaughsAttackProtectionMaximumAmplification( 1210 parser, attackMaximumAmplification); 1211 XML_SetAllocTrackerMaximumAmplification(parser, 1212 attackMaximumAmplification); 1213 #endif 1214 } 1215 if (attackThresholdGiven) { 1216 #if XML_GE == 1 1217 XML_SetBillionLaughsAttackProtectionActivationThreshold( 1218 parser, attackThresholdBytes); 1219 XML_SetAllocTrackerActivationThreshold(parser, attackThresholdBytes); 1220 #else 1221 (void)attackThresholdBytes; // silence -Wunused-but-set-variable 1222 #endif 1223 } 1224 1225 if (disableDeferral) { 1226 const XML_Bool success = XML_SetReparseDeferralEnabled(parser, XML_FALSE); 1227 if (! success) { 1228 // This prevents tperror(..) from reporting misleading "[..]: Success" 1229 errno = EINVAL; 1230 tperror(T("Failed to disable reparse deferral")); 1231 exit(XMLWF_EXIT_INTERNAL_ERROR); 1232 } 1233 } 1234 1235 if (requireStandalone) 1236 XML_SetNotStandaloneHandler(parser, notStandalone); 1237 XML_SetParamEntityParsing(parser, paramEntityParsing); 1238 if (outputType == 't') { 1239 /* This is for doing timings; this gives a more realistic estimate of 1240 the parsing time. */ 1241 outputDir = 0; 1242 XML_SetElementHandler(parser, nopStartElement, nopEndElement); 1243 XML_SetCharacterDataHandler(parser, nopCharacterData); 1244 XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction); 1245 } else if (outputDir) { 1246 const XML_Char *delim = T("/"); 1247 const XML_Char *file = useStdin ? T("STDIN") : argv[i]; 1248 if (! useStdin) { 1249 /* Jump after last (back)slash */ 1250 const XML_Char *lastDelim = tcsrchr(file, delim[0]); 1251 if (lastDelim) 1252 file = lastDelim + 1; 1253 #if defined(_WIN32) 1254 else { 1255 const XML_Char *winDelim = T("\\"); 1256 lastDelim = tcsrchr(file, winDelim[0]); 1257 if (lastDelim) { 1258 file = lastDelim + 1; 1259 delim = winDelim; 1260 } 1261 } 1262 #endif 1263 } 1264 const size_t outputDirLen = tcslen(outputDir); 1265 const size_t fileLen = tcslen(file); 1266 1267 /* Detect and prevent integer overflow in the addition (without 1268 risking underflow) and the multiplication, mirroring the guards 1269 in xcsdup() and resolveSystemId() */ 1270 if (outputDirLen > SIZE_MAX - fileLen 1271 || outputDirLen > SIZE_MAX - fileLen - 2) { 1272 tperror(T("Could not allocate memory")); 1273 exit(XMLWF_EXIT_INTERNAL_ERROR); 1274 } 1275 1276 const size_t charsRequired = outputDirLen + fileLen + 2; 1277 1278 if (charsRequired > SIZE_MAX / sizeof(XML_Char)) { 1279 tperror(T("Could not allocate memory")); 1280 exit(XMLWF_EXIT_INTERNAL_ERROR); 1281 } 1282 1283 outName = malloc(charsRequired * sizeof(XML_Char)); 1284 if (! outName) { 1285 tperror(T("Could not allocate memory")); 1286 exit(XMLWF_EXIT_INTERNAL_ERROR); 1287 } 1288 tcscpy(outName, outputDir); 1289 tcscat(outName, delim); 1290 tcscat(outName, file); 1291 userData.fp = tfopen(outName, T("wb")); 1292 if (! userData.fp) { 1293 tperror(outName); 1294 exitCode = XMLWF_EXIT_OUTPUT_ERROR; 1295 free(outName); 1296 XML_ParserFree(parser); 1297 if (continueOnError) { 1298 continue; 1299 } else { 1300 break; 1301 } 1302 } 1303 setvbuf(userData.fp, NULL, _IOFBF, 16384); 1304 #ifdef XML_UNICODE 1305 puttc(0xFEFF, userData.fp); 1306 #endif 1307 XML_SetUserData(parser, &userData); 1308 switch (outputType) { 1309 case 'm': 1310 XML_UseParserAsHandlerArg(parser); 1311 XML_SetElementHandler(parser, metaStartElement, metaEndElement); 1312 XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction); 1313 XML_SetCommentHandler(parser, metaComment); 1314 XML_SetCdataSectionHandler(parser, metaStartCdataSection, 1315 metaEndCdataSection); 1316 XML_SetCharacterDataHandler(parser, metaCharacterData); 1317 XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl, 1318 metaEndDoctypeDecl); 1319 XML_SetEntityDeclHandler(parser, metaEntityDecl); 1320 XML_SetNotationDeclHandler(parser, metaNotationDecl); 1321 XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl, 1322 metaEndNamespaceDecl); 1323 metaStartDocument(parser); 1324 break; 1325 case 'c': 1326 XML_UseParserAsHandlerArg(parser); 1327 XML_SetDefaultHandler(parser, markup); 1328 XML_SetElementHandler(parser, defaultStartElement, defaultEndElement); 1329 XML_SetCharacterDataHandler(parser, defaultCharacterData); 1330 XML_SetProcessingInstructionHandler(parser, 1331 defaultProcessingInstruction); 1332 break; 1333 default: 1334 if (useNamespaces) 1335 XML_SetElementHandler(parser, startElementNS, endElementNS); 1336 else 1337 XML_SetElementHandler(parser, startElement, endElement); 1338 XML_SetCharacterDataHandler(parser, characterData); 1339 #ifndef W3C14N 1340 XML_SetProcessingInstructionHandler(parser, processingInstruction); 1341 if (requiresNotations) { 1342 XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl); 1343 XML_SetNotationDeclHandler(parser, notationDecl); 1344 } 1345 #endif /* not W3C14N */ 1346 break; 1347 } 1348 } 1349 if (windowsCodePages) 1350 XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0); 1351 result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags); 1352 if (outputDir) { 1353 if (outputType == 'm') 1354 metaEndDocument(parser); 1355 fclose(userData.fp); 1356 if (! result) { 1357 tremove(outName); 1358 } 1359 free(outName); 1360 } 1361 XML_ParserFree(parser); 1362 if (! result) { 1363 exitCode = XMLWF_EXIT_NOT_WELLFORMED; 1364 cleanupUserData(&userData); 1365 if (! continueOnError) { 1366 break; 1367 } 1368 } 1369 } 1370 return exitCode; 1371 } 1372