1 /* 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2000-2017 Expat development team 11 Licensed under the MIT license: 12 13 Permission is hereby granted, free of charge, to any person obtaining 14 a copy of this software and associated documentation files (the 15 "Software"), to deal in the Software without restriction, including 16 without limitation the rights to use, copy, modify, merge, publish, 17 distribute, sublicense, and/or sell copies of the Software, and to permit 18 persons to whom the Software is furnished to do so, subject to the 19 following conditions: 20 21 The above copyright notice and this permission notice shall be included 22 in all copies or substantial portions of the Software. 23 24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 30 USE OR OTHER DEALINGS IN THE SOFTWARE. 31 */ 32 33 #include <assert.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <stddef.h> 37 #include <string.h> 38 39 #include "expat.h" 40 #include "codepage.h" 41 #include "internal.h" /* for UNUSED_P only */ 42 #include "xmlfile.h" 43 #include "xmltchar.h" 44 45 #ifdef _MSC_VER 46 # include <crtdbg.h> 47 #endif 48 49 #ifdef XML_UNICODE 50 # include <wchar.h> 51 #endif 52 53 /* Structures for handler user data */ 54 typedef struct NotationList { 55 struct NotationList *next; 56 const XML_Char *notationName; 57 const XML_Char *systemId; 58 const XML_Char *publicId; 59 } NotationList; 60 61 typedef struct xmlwfUserData { 62 FILE *fp; 63 NotationList *notationListHead; 64 const XML_Char *currentDoctypeName; 65 } XmlwfUserData; 66 67 /* This ensures proper sorting. */ 68 69 #define NSSEP T('\001') 70 71 static void XMLCALL 72 characterData(void *userData, const XML_Char *s, int len) { 73 FILE *fp = ((XmlwfUserData *)userData)->fp; 74 for (; len > 0; --len, ++s) { 75 switch (*s) { 76 case T('&'): 77 fputts(T("&"), fp); 78 break; 79 case T('<'): 80 fputts(T("<"), fp); 81 break; 82 case T('>'): 83 fputts(T(">"), fp); 84 break; 85 #ifdef W3C14N 86 case 13: 87 fputts(T("
"), fp); 88 break; 89 #else 90 case T('"'): 91 fputts(T("""), fp); 92 break; 93 case 9: 94 case 10: 95 case 13: 96 ftprintf(fp, T("&#%d;"), *s); 97 break; 98 #endif 99 default: 100 puttc(*s, fp); 101 break; 102 } 103 } 104 } 105 106 static void 107 attributeValue(FILE *fp, const XML_Char *s) { 108 puttc(T('='), fp); 109 puttc(T('"'), fp); 110 assert(s); 111 for (;;) { 112 switch (*s) { 113 case 0: 114 case NSSEP: 115 puttc(T('"'), fp); 116 return; 117 case T('&'): 118 fputts(T("&"), fp); 119 break; 120 case T('<'): 121 fputts(T("<"), fp); 122 break; 123 case T('"'): 124 fputts(T("""), fp); 125 break; 126 #ifdef W3C14N 127 case 9: 128 fputts(T("	"), fp); 129 break; 130 case 10: 131 fputts(T("
"), fp); 132 break; 133 case 13: 134 fputts(T("
"), fp); 135 break; 136 #else 137 case T('>'): 138 fputts(T(">"), fp); 139 break; 140 case 9: 141 case 10: 142 case 13: 143 ftprintf(fp, T("&#%d;"), *s); 144 break; 145 #endif 146 default: 147 puttc(*s, fp); 148 break; 149 } 150 s++; 151 } 152 } 153 154 /* Lexicographically comparing UTF-8 encoded attribute values, 155 is equivalent to lexicographically comparing based on the character number. */ 156 157 static int 158 attcmp(const void *att1, const void *att2) { 159 return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2); 160 } 161 162 static void XMLCALL 163 startElement(void *userData, const XML_Char *name, const XML_Char **atts) { 164 int nAtts; 165 const XML_Char **p; 166 FILE *fp = ((XmlwfUserData *)userData)->fp; 167 puttc(T('<'), fp); 168 fputts(name, fp); 169 170 p = atts; 171 while (*p) 172 ++p; 173 nAtts = (int)((p - atts) >> 1); 174 if (nAtts > 1) 175 qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp); 176 while (*atts) { 177 puttc(T(' '), fp); 178 fputts(*atts++, fp); 179 attributeValue(fp, *atts); 180 atts++; 181 } 182 puttc(T('>'), fp); 183 } 184 185 static void XMLCALL 186 endElement(void *userData, const XML_Char *name) { 187 FILE *fp = ((XmlwfUserData *)userData)->fp; 188 puttc(T('<'), fp); 189 puttc(T('/'), fp); 190 fputts(name, fp); 191 puttc(T('>'), fp); 192 } 193 194 static int 195 nsattcmp(const void *p1, const void *p2) { 196 const XML_Char *att1 = *(const XML_Char **)p1; 197 const XML_Char *att2 = *(const XML_Char **)p2; 198 int sep1 = (tcsrchr(att1, NSSEP) != 0); 199 int sep2 = (tcsrchr(att1, NSSEP) != 0); 200 if (sep1 != sep2) 201 return sep1 - sep2; 202 return tcscmp(att1, att2); 203 } 204 205 static void XMLCALL 206 startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) { 207 int nAtts; 208 int nsi; 209 const XML_Char **p; 210 FILE *fp = ((XmlwfUserData *)userData)->fp; 211 const XML_Char *sep; 212 puttc(T('<'), fp); 213 214 sep = tcsrchr(name, NSSEP); 215 if (sep) { 216 fputts(T("n1:"), fp); 217 fputts(sep + 1, fp); 218 fputts(T(" xmlns:n1"), fp); 219 attributeValue(fp, name); 220 nsi = 2; 221 } else { 222 fputts(name, fp); 223 nsi = 1; 224 } 225 226 p = atts; 227 while (*p) 228 ++p; 229 nAtts = (int)((p - atts) >> 1); 230 if (nAtts > 1) 231 qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp); 232 while (*atts) { 233 name = *atts++; 234 sep = tcsrchr(name, NSSEP); 235 puttc(T(' '), fp); 236 if (sep) { 237 ftprintf(fp, T("n%d:"), nsi); 238 fputts(sep + 1, fp); 239 } else 240 fputts(name, fp); 241 attributeValue(fp, *atts); 242 if (sep) { 243 ftprintf(fp, T(" xmlns:n%d"), nsi++); 244 attributeValue(fp, name); 245 } 246 atts++; 247 } 248 puttc(T('>'), fp); 249 } 250 251 static void XMLCALL 252 endElementNS(void *userData, const XML_Char *name) { 253 FILE *fp = ((XmlwfUserData *)userData)->fp; 254 const XML_Char *sep; 255 puttc(T('<'), fp); 256 puttc(T('/'), fp); 257 sep = tcsrchr(name, NSSEP); 258 if (sep) { 259 fputts(T("n1:"), fp); 260 fputts(sep + 1, fp); 261 } else 262 fputts(name, fp); 263 puttc(T('>'), fp); 264 } 265 266 #ifndef W3C14N 267 268 static void XMLCALL 269 processingInstruction(void *userData, const XML_Char *target, 270 const XML_Char *data) { 271 FILE *fp = ((XmlwfUserData *)userData)->fp; 272 puttc(T('<'), fp); 273 puttc(T('?'), fp); 274 fputts(target, fp); 275 puttc(T(' '), fp); 276 fputts(data, fp); 277 puttc(T('?'), fp); 278 puttc(T('>'), fp); 279 } 280 281 static XML_Char * 282 xcsdup(const XML_Char *s) { 283 XML_Char *result; 284 int count = 0; 285 int numBytes; 286 287 /* Get the length of the string, including terminator */ 288 while (s[count++] != 0) { 289 /* Do nothing */ 290 } 291 numBytes = count * sizeof(XML_Char); 292 result = malloc(numBytes); 293 if (result == NULL) 294 return NULL; 295 memcpy(result, s, numBytes); 296 return result; 297 } 298 299 static void XMLCALL 300 startDoctypeDecl(void *userData, const XML_Char *doctypeName, 301 const XML_Char *sysid, const XML_Char *publid, 302 int has_internal_subset) { 303 XmlwfUserData *data = (XmlwfUserData *)userData; 304 UNUSED_P(sysid); 305 UNUSED_P(publid); 306 UNUSED_P(has_internal_subset); 307 data->currentDoctypeName = xcsdup(doctypeName); 308 } 309 310 static void 311 freeNotations(XmlwfUserData *data) { 312 NotationList *notationListHead = data->notationListHead; 313 314 while (notationListHead != NULL) { 315 NotationList *next = notationListHead->next; 316 free((void *)notationListHead->notationName); 317 free((void *)notationListHead->systemId); 318 free((void *)notationListHead->publicId); 319 free(notationListHead); 320 notationListHead = next; 321 } 322 data->notationListHead = NULL; 323 } 324 325 static int 326 xcscmp(const XML_Char *xs, const XML_Char *xt) { 327 while (*xs != 0 && *xt != 0) { 328 if (*xs < *xt) 329 return -1; 330 if (*xs > *xt) 331 return 1; 332 xs++; 333 xt++; 334 } 335 if (*xs < *xt) 336 return -1; 337 if (*xs > *xt) 338 return 1; 339 return 0; 340 } 341 342 static int 343 notationCmp(const void *a, const void *b) { 344 const NotationList *const n1 = *(NotationList **)a; 345 const NotationList *const n2 = *(NotationList **)b; 346 347 return xcscmp(n1->notationName, n2->notationName); 348 } 349 350 static void XMLCALL 351 endDoctypeDecl(void *userData) { 352 XmlwfUserData *data = (XmlwfUserData *)userData; 353 NotationList **notations; 354 int notationCount = 0; 355 NotationList *p; 356 int i; 357 358 /* How many notations do we have? */ 359 for (p = data->notationListHead; p != NULL; p = p->next) 360 notationCount++; 361 if (notationCount == 0) { 362 /* Nothing to report */ 363 free((void *)data->currentDoctypeName); 364 data->currentDoctypeName = NULL; 365 return; 366 } 367 368 notations = malloc(notationCount * sizeof(NotationList *)); 369 if (notations == NULL) { 370 fprintf(stderr, "Unable to sort notations"); 371 freeNotations(data); 372 return; 373 } 374 375 for (p = data->notationListHead, i = 0; i < notationCount; p = p->next, i++) { 376 notations[i] = p; 377 } 378 qsort(notations, notationCount, sizeof(NotationList *), notationCmp); 379 380 /* Output the DOCTYPE header */ 381 fputts(T("<!DOCTYPE "), data->fp); 382 fputts(data->currentDoctypeName, data->fp); 383 fputts(T(" [\n"), data->fp); 384 385 /* Now the NOTATIONs */ 386 for (i = 0; i < notationCount; i++) { 387 fputts(T("<!NOTATION "), data->fp); 388 fputts(notations[i]->notationName, data->fp); 389 if (notations[i]->publicId != NULL) { 390 fputts(T(" PUBLIC '"), data->fp); 391 fputts(notations[i]->publicId, data->fp); 392 puttc(T('\''), data->fp); 393 if (notations[i]->systemId != NULL) { 394 puttc(T(' '), data->fp); 395 puttc(T('\''), data->fp); 396 fputts(notations[i]->systemId, data->fp); 397 puttc(T('\''), data->fp); 398 } 399 } else if (notations[i]->systemId != NULL) { 400 fputts(T(" SYSTEM '"), data->fp); 401 fputts(notations[i]->systemId, data->fp); 402 puttc(T('\''), data->fp); 403 } 404 puttc(T('>'), data->fp); 405 puttc(T('\n'), data->fp); 406 } 407 408 /* Finally end the DOCTYPE */ 409 fputts(T("]>\n"), data->fp); 410 411 free(notations); 412 freeNotations(data); 413 free((void *)data->currentDoctypeName); 414 data->currentDoctypeName = NULL; 415 } 416 417 static void XMLCALL 418 notationDecl(void *userData, const XML_Char *notationName, const XML_Char *base, 419 const XML_Char *systemId, const XML_Char *publicId) { 420 XmlwfUserData *data = (XmlwfUserData *)userData; 421 NotationList *entry = malloc(sizeof(NotationList)); 422 const char *errorMessage = "Unable to store NOTATION for output\n"; 423 424 UNUSED_P(base); 425 if (entry == NULL) { 426 fputs(errorMessage, stderr); 427 return; /* Nothing we can really do about this */ 428 } 429 entry->notationName = xcsdup(notationName); 430 if (entry->notationName == NULL) { 431 fputs(errorMessage, stderr); 432 free(entry); 433 return; 434 } 435 if (systemId != NULL) { 436 entry->systemId = xcsdup(systemId); 437 if (entry->systemId == NULL) { 438 fputs(errorMessage, stderr); 439 free((void *)entry->notationName); 440 free(entry); 441 return; 442 } 443 } else { 444 entry->systemId = NULL; 445 } 446 if (publicId != NULL) { 447 entry->publicId = xcsdup(publicId); 448 if (entry->publicId == NULL) { 449 fputs(errorMessage, stderr); 450 free((void *)entry->systemId); /* Safe if it's NULL */ 451 free((void *)entry->notationName); 452 free(entry); 453 return; 454 } 455 } else { 456 entry->publicId = NULL; 457 } 458 459 entry->next = data->notationListHead; 460 data->notationListHead = entry; 461 } 462 463 #endif /* not W3C14N */ 464 465 static void XMLCALL 466 defaultCharacterData(void *userData, const XML_Char *s, int len) { 467 UNUSED_P(s); 468 UNUSED_P(len); 469 XML_DefaultCurrent((XML_Parser)userData); 470 } 471 472 static void XMLCALL 473 defaultStartElement(void *userData, const XML_Char *name, 474 const XML_Char **atts) { 475 UNUSED_P(name); 476 UNUSED_P(atts); 477 XML_DefaultCurrent((XML_Parser)userData); 478 } 479 480 static void XMLCALL 481 defaultEndElement(void *userData, const XML_Char *name) { 482 UNUSED_P(name); 483 XML_DefaultCurrent((XML_Parser)userData); 484 } 485 486 static void XMLCALL 487 defaultProcessingInstruction(void *userData, const XML_Char *target, 488 const XML_Char *data) { 489 UNUSED_P(target); 490 UNUSED_P(data); 491 XML_DefaultCurrent((XML_Parser)userData); 492 } 493 494 static void XMLCALL 495 nopCharacterData(void *userData, const XML_Char *s, int len) { 496 UNUSED_P(userData); 497 UNUSED_P(s); 498 UNUSED_P(len); 499 } 500 501 static void XMLCALL 502 nopStartElement(void *userData, const XML_Char *name, const XML_Char **atts) { 503 UNUSED_P(userData); 504 UNUSED_P(name); 505 UNUSED_P(atts); 506 } 507 508 static void XMLCALL 509 nopEndElement(void *userData, const XML_Char *name) { 510 UNUSED_P(userData); 511 UNUSED_P(name); 512 } 513 514 static void XMLCALL 515 nopProcessingInstruction(void *userData, const XML_Char *target, 516 const XML_Char *data) { 517 UNUSED_P(userData); 518 UNUSED_P(target); 519 UNUSED_P(data); 520 } 521 522 static void XMLCALL 523 markup(void *userData, const XML_Char *s, int len) { 524 FILE *fp = ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp; 525 for (; len > 0; --len, ++s) 526 puttc(*s, fp); 527 } 528 529 static void 530 metaLocation(XML_Parser parser) { 531 const XML_Char *uri = XML_GetBase(parser); 532 FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp; 533 if (uri) 534 ftprintf(fp, T(" uri=\"%s\""), uri); 535 ftprintf(fp, 536 T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"") T(" nbytes=\"%d\"") 537 T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"") T(" col=\"%") 538 T(XML_FMT_INT_MOD) T("u\""), 539 XML_GetCurrentByteIndex(parser), XML_GetCurrentByteCount(parser), 540 XML_GetCurrentLineNumber(parser), 541 XML_GetCurrentColumnNumber(parser)); 542 } 543 544 static void 545 metaStartDocument(void *userData) { 546 fputts(T("<document>\n"), 547 ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp); 548 } 549 550 static void 551 metaEndDocument(void *userData) { 552 fputts(T("</document>\n"), 553 ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp); 554 } 555 556 static void XMLCALL 557 metaStartElement(void *userData, const XML_Char *name, const XML_Char **atts) { 558 XML_Parser parser = (XML_Parser)userData; 559 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 560 FILE *fp = data->fp; 561 const XML_Char **specifiedAttsEnd 562 = atts + XML_GetSpecifiedAttributeCount(parser); 563 const XML_Char **idAttPtr; 564 int idAttIndex = XML_GetIdAttributeIndex(parser); 565 if (idAttIndex < 0) 566 idAttPtr = 0; 567 else 568 idAttPtr = atts + idAttIndex; 569 570 ftprintf(fp, T("<starttag name=\"%s\""), name); 571 metaLocation(parser); 572 if (*atts) { 573 fputts(T(">\n"), fp); 574 do { 575 ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]); 576 characterData(data, atts[1], (int)tcslen(atts[1])); 577 if (atts >= specifiedAttsEnd) 578 fputts(T("\" defaulted=\"yes\"/>\n"), fp); 579 else if (atts == idAttPtr) 580 fputts(T("\" id=\"yes\"/>\n"), fp); 581 else 582 fputts(T("\"/>\n"), fp); 583 } while (*(atts += 2)); 584 fputts(T("</starttag>\n"), fp); 585 } else 586 fputts(T("/>\n"), fp); 587 } 588 589 static void XMLCALL 590 metaEndElement(void *userData, const XML_Char *name) { 591 XML_Parser parser = (XML_Parser)userData; 592 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 593 FILE *fp = data->fp; 594 ftprintf(fp, T("<endtag name=\"%s\""), name); 595 metaLocation(parser); 596 fputts(T("/>\n"), fp); 597 } 598 599 static void XMLCALL 600 metaProcessingInstruction(void *userData, const XML_Char *target, 601 const XML_Char *data) { 602 XML_Parser parser = (XML_Parser)userData; 603 XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser); 604 FILE *fp = usrData->fp; 605 ftprintf(fp, T("<pi target=\"%s\" data=\""), target); 606 characterData(usrData, data, (int)tcslen(data)); 607 puttc(T('"'), fp); 608 metaLocation(parser); 609 fputts(T("/>\n"), fp); 610 } 611 612 static void XMLCALL 613 metaComment(void *userData, const XML_Char *data) { 614 XML_Parser parser = (XML_Parser)userData; 615 XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser); 616 FILE *fp = usrData->fp; 617 fputts(T("<comment data=\""), fp); 618 characterData(usrData, data, (int)tcslen(data)); 619 puttc(T('"'), fp); 620 metaLocation(parser); 621 fputts(T("/>\n"), fp); 622 } 623 624 static void XMLCALL 625 metaStartCdataSection(void *userData) { 626 XML_Parser parser = (XML_Parser)userData; 627 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 628 FILE *fp = data->fp; 629 fputts(T("<startcdata"), fp); 630 metaLocation(parser); 631 fputts(T("/>\n"), fp); 632 } 633 634 static void XMLCALL 635 metaEndCdataSection(void *userData) { 636 XML_Parser parser = (XML_Parser)userData; 637 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 638 FILE *fp = data->fp; 639 fputts(T("<endcdata"), fp); 640 metaLocation(parser); 641 fputts(T("/>\n"), fp); 642 } 643 644 static void XMLCALL 645 metaCharacterData(void *userData, const XML_Char *s, int len) { 646 XML_Parser parser = (XML_Parser)userData; 647 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 648 FILE *fp = data->fp; 649 fputts(T("<chars str=\""), fp); 650 characterData(data, s, len); 651 puttc(T('"'), fp); 652 metaLocation(parser); 653 fputts(T("/>\n"), fp); 654 } 655 656 static void XMLCALL 657 metaStartDoctypeDecl(void *userData, const XML_Char *doctypeName, 658 const XML_Char *sysid, const XML_Char *pubid, 659 int has_internal_subset) { 660 XML_Parser parser = (XML_Parser)userData; 661 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 662 FILE *fp = data->fp; 663 UNUSED_P(sysid); 664 UNUSED_P(pubid); 665 UNUSED_P(has_internal_subset); 666 ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName); 667 metaLocation(parser); 668 fputts(T("/>\n"), fp); 669 } 670 671 static void XMLCALL 672 metaEndDoctypeDecl(void *userData) { 673 XML_Parser parser = (XML_Parser)userData; 674 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 675 FILE *fp = data->fp; 676 fputts(T("<enddoctype"), fp); 677 metaLocation(parser); 678 fputts(T("/>\n"), fp); 679 } 680 681 static void XMLCALL 682 metaNotationDecl(void *userData, const XML_Char *notationName, 683 const XML_Char *base, const XML_Char *systemId, 684 const XML_Char *publicId) { 685 XML_Parser parser = (XML_Parser)userData; 686 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 687 FILE *fp = data->fp; 688 UNUSED_P(base); 689 ftprintf(fp, T("<notation name=\"%s\""), notationName); 690 if (publicId) 691 ftprintf(fp, T(" public=\"%s\""), publicId); 692 if (systemId) { 693 fputts(T(" system=\""), fp); 694 characterData(data, systemId, (int)tcslen(systemId)); 695 puttc(T('"'), fp); 696 } 697 metaLocation(parser); 698 fputts(T("/>\n"), fp); 699 } 700 701 static void XMLCALL 702 metaEntityDecl(void *userData, const XML_Char *entityName, int is_param, 703 const XML_Char *value, int value_length, const XML_Char *base, 704 const XML_Char *systemId, const XML_Char *publicId, 705 const XML_Char *notationName) { 706 XML_Parser parser = (XML_Parser)userData; 707 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 708 FILE *fp = data->fp; 709 710 UNUSED_P(is_param); 711 UNUSED_P(base); 712 if (value) { 713 ftprintf(fp, T("<entity name=\"%s\""), entityName); 714 metaLocation(parser); 715 puttc(T('>'), fp); 716 characterData(data, value, value_length); 717 fputts(T("</entity/>\n"), fp); 718 } else if (notationName) { 719 ftprintf(fp, T("<entity name=\"%s\""), entityName); 720 if (publicId) 721 ftprintf(fp, T(" public=\"%s\""), publicId); 722 fputts(T(" system=\""), fp); 723 characterData(data, systemId, (int)tcslen(systemId)); 724 puttc(T('"'), fp); 725 ftprintf(fp, T(" notation=\"%s\""), notationName); 726 metaLocation(parser); 727 fputts(T("/>\n"), fp); 728 } else { 729 ftprintf(fp, T("<entity name=\"%s\""), entityName); 730 if (publicId) 731 ftprintf(fp, T(" public=\"%s\""), publicId); 732 fputts(T(" system=\""), fp); 733 characterData(data, systemId, (int)tcslen(systemId)); 734 puttc(T('"'), fp); 735 metaLocation(parser); 736 fputts(T("/>\n"), fp); 737 } 738 } 739 740 static void XMLCALL 741 metaStartNamespaceDecl(void *userData, const XML_Char *prefix, 742 const XML_Char *uri) { 743 XML_Parser parser = (XML_Parser)userData; 744 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 745 FILE *fp = data->fp; 746 fputts(T("<startns"), fp); 747 if (prefix) 748 ftprintf(fp, T(" prefix=\"%s\""), prefix); 749 if (uri) { 750 fputts(T(" ns=\""), fp); 751 characterData(data, uri, (int)tcslen(uri)); 752 fputts(T("\"/>\n"), fp); 753 } else 754 fputts(T("/>\n"), fp); 755 } 756 757 static void XMLCALL 758 metaEndNamespaceDecl(void *userData, const XML_Char *prefix) { 759 XML_Parser parser = (XML_Parser)userData; 760 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 761 FILE *fp = data->fp; 762 if (! prefix) 763 fputts(T("<endns/>\n"), fp); 764 else 765 ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix); 766 } 767 768 static int XMLCALL 769 unknownEncodingConvert(void *data, const char *p) { 770 return codepageConvert(*(int *)data, p); 771 } 772 773 static int XMLCALL 774 unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info) { 775 int cp; 776 static const XML_Char prefixL[] = T("windows-"); 777 static const XML_Char prefixU[] = T("WINDOWS-"); 778 int i; 779 780 UNUSED_P(userData); 781 for (i = 0; prefixU[i]; i++) 782 if (name[i] != prefixU[i] && name[i] != prefixL[i]) 783 return 0; 784 785 cp = 0; 786 for (; name[i]; i++) { 787 static const XML_Char digits[] = T("0123456789"); 788 const XML_Char *s = tcschr(digits, name[i]); 789 if (! s) 790 return 0; 791 cp *= 10; 792 cp += (int)(s - digits); 793 if (cp >= 0x10000) 794 return 0; 795 } 796 if (! codepageMap(cp, info->map)) 797 return 0; 798 info->convert = unknownEncodingConvert; 799 /* We could just cast the code page integer to a void *, 800 and avoid the use of release. */ 801 info->release = free; 802 info->data = malloc(sizeof(int)); 803 if (! info->data) 804 return 0; 805 *(int *)info->data = cp; 806 return 1; 807 } 808 809 static int XMLCALL 810 notStandalone(void *userData) { 811 UNUSED_P(userData); 812 return 0; 813 } 814 815 static void 816 showVersion(XML_Char *prog) { 817 XML_Char *s = prog; 818 XML_Char ch; 819 const XML_Feature *features = XML_GetFeatureList(); 820 while ((ch = *s) != 0) { 821 if (ch == '/' 822 #if defined(_WIN32) 823 || ch == '\\' 824 #endif 825 ) 826 prog = s + 1; 827 ++s; 828 } 829 ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion()); 830 if (features != NULL && features[0].feature != XML_FEATURE_END) { 831 int i = 1; 832 ftprintf(stdout, T("%s"), features[0].name); 833 if (features[0].value) 834 ftprintf(stdout, T("=%ld"), features[0].value); 835 while (features[i].feature != XML_FEATURE_END) { 836 ftprintf(stdout, T(", %s"), features[i].name); 837 if (features[i].value) 838 ftprintf(stdout, T("=%ld"), features[i].value); 839 ++i; 840 } 841 ftprintf(stdout, T("\n")); 842 } 843 } 844 845 static void 846 usage(const XML_Char *prog, int rc) { 847 ftprintf( 848 stderr, 849 /* Generated with: 850 * $ xmlwf/xmlwf_helpgen.sh 851 */ 852 /* clang-format off */ 853 T("usage: %s [-s] [-n] [-p] [-x] [-e ENCODING] [-w] [-r] [-d DIRECTORY]\n") 854 T(" [-c | -m | -t] [-N]\n") 855 T(" [FILE [FILE ...]]\n") 856 T("\n") 857 T("xmlwf - Determines if an XML document is well-formed\n") 858 T("\n") 859 T("positional arguments:\n") 860 T(" FILE files to process (default: STDIN)\n") 861 T("\n") 862 T("input control arguments:\n") 863 T(" -s print an error if the document is not [s]tandalone\n") 864 T(" -n enable [n]amespace processing\n") 865 T(" -p enable processing external DTDs and [p]arameter entities\n") 866 T(" -x enable processing of e[x]ternal entities\n") 867 T(" -e ENCODING override any in-document [e]ncoding declaration\n") 868 T(" -w enable support for [W]indows code pages\n") 869 T(" -r disable memory-mapping and use normal file [r]ead IO calls instead\n") 870 T("\n") 871 T("output control arguments:\n") 872 T(" -d DIRECTORY output [d]estination directory\n") 873 T(" -c write a [c]opy of input XML, not canonical XML\n") 874 T(" -m write [m]eta XML, not canonical XML\n") 875 T(" -t write no XML output for [t]iming of plain parsing\n") 876 T(" -N enable adding doctype and [n]otation declarations\n") 877 T("\n") 878 T("info arguments:\n") 879 T(" -h show this [h]elp message and exit\n") 880 T(" -v show program's [v]ersion number and exit\n") 881 T("\n") 882 T("libexpat is software libre, licensed under the MIT license.\n") 883 T("Please report bugs at https://github.com/libexpat/libexpat/issues. Thank you!\n") 884 , /* clang-format on */ 885 prog); 886 exit(rc); 887 } 888 889 #if defined(__MINGW32__) && defined(XML_UNICODE) 890 /* Silence warning about missing prototype */ 891 int wmain(int argc, XML_Char **argv); 892 #endif 893 894 int 895 tmain(int argc, XML_Char **argv) { 896 int i, j; 897 const XML_Char *outputDir = NULL; 898 const XML_Char *encoding = NULL; 899 unsigned processFlags = XML_MAP_FILE; 900 int windowsCodePages = 0; 901 int outputType = 0; 902 int useNamespaces = 0; 903 int requireStandalone = 0; 904 int requiresNotations = 0; 905 enum XML_ParamEntityParsing paramEntityParsing 906 = XML_PARAM_ENTITY_PARSING_NEVER; 907 int useStdin = 0; 908 XmlwfUserData userData = {NULL, NULL, NULL}; 909 910 #ifdef _MSC_VER 911 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); 912 #endif 913 914 i = 1; 915 j = 0; 916 while (i < argc) { 917 if (j == 0) { 918 if (argv[i][0] != T('-')) 919 break; 920 if (argv[i][1] == T('-') && argv[i][2] == T('\0')) { 921 i++; 922 break; 923 } 924 j++; 925 } 926 switch (argv[i][j]) { 927 case T('r'): 928 processFlags &= ~XML_MAP_FILE; 929 j++; 930 break; 931 case T('s'): 932 requireStandalone = 1; 933 j++; 934 break; 935 case T('n'): 936 useNamespaces = 1; 937 j++; 938 break; 939 case T('p'): 940 paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS; 941 /* fall through */ 942 case T('x'): 943 processFlags |= XML_EXTERNAL_ENTITIES; 944 j++; 945 break; 946 case T('w'): 947 windowsCodePages = 1; 948 j++; 949 break; 950 case T('m'): 951 outputType = 'm'; 952 j++; 953 break; 954 case T('c'): 955 outputType = 'c'; 956 useNamespaces = 0; 957 j++; 958 break; 959 case T('t'): 960 outputType = 't'; 961 j++; 962 break; 963 case T('N'): 964 requiresNotations = 1; 965 j++; 966 break; 967 case T('d'): 968 if (argv[i][j + 1] == T('\0')) { 969 if (++i == argc) 970 usage(argv[0], 2); 971 outputDir = argv[i]; 972 } else 973 outputDir = argv[i] + j + 1; 974 i++; 975 j = 0; 976 break; 977 case T('e'): 978 if (argv[i][j + 1] == T('\0')) { 979 if (++i == argc) 980 usage(argv[0], 2); 981 encoding = argv[i]; 982 } else 983 encoding = argv[i] + j + 1; 984 i++; 985 j = 0; 986 break; 987 case T('h'): 988 usage(argv[0], 0); 989 return 0; 990 case T('v'): 991 showVersion(argv[0]); 992 return 0; 993 case T('\0'): 994 if (j > 1) { 995 i++; 996 j = 0; 997 break; 998 } 999 /* fall through */ 1000 default: 1001 usage(argv[0], 2); 1002 } 1003 } 1004 if (i == argc) { 1005 useStdin = 1; 1006 processFlags &= ~XML_MAP_FILE; 1007 i--; 1008 } 1009 for (; i < argc; i++) { 1010 XML_Char *outName = 0; 1011 int result; 1012 XML_Parser parser; 1013 if (useNamespaces) 1014 parser = XML_ParserCreateNS(encoding, NSSEP); 1015 else 1016 parser = XML_ParserCreate(encoding); 1017 1018 if (! parser) { 1019 tperror(T("Could not instantiate parser")); 1020 exit(1); 1021 } 1022 1023 if (requireStandalone) 1024 XML_SetNotStandaloneHandler(parser, notStandalone); 1025 XML_SetParamEntityParsing(parser, paramEntityParsing); 1026 if (outputType == 't') { 1027 /* This is for doing timings; this gives a more realistic estimate of 1028 the parsing time. */ 1029 outputDir = 0; 1030 XML_SetElementHandler(parser, nopStartElement, nopEndElement); 1031 XML_SetCharacterDataHandler(parser, nopCharacterData); 1032 XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction); 1033 } else if (outputDir) { 1034 const XML_Char *delim = T("/"); 1035 const XML_Char *file = useStdin ? T("STDIN") : argv[i]; 1036 if (! useStdin) { 1037 /* Jump after last (back)slash */ 1038 const XML_Char *lastDelim = tcsrchr(file, delim[0]); 1039 if (lastDelim) 1040 file = lastDelim + 1; 1041 #if defined(_WIN32) 1042 else { 1043 const XML_Char *winDelim = T("\\"); 1044 lastDelim = tcsrchr(file, winDelim[0]); 1045 if (lastDelim) { 1046 file = lastDelim + 1; 1047 delim = winDelim; 1048 } 1049 } 1050 #endif 1051 } 1052 outName = (XML_Char *)malloc((tcslen(outputDir) + tcslen(file) + 2) 1053 * sizeof(XML_Char)); 1054 tcscpy(outName, outputDir); 1055 tcscat(outName, delim); 1056 tcscat(outName, file); 1057 userData.fp = tfopen(outName, T("wb")); 1058 if (! userData.fp) { 1059 tperror(outName); 1060 exit(1); 1061 } 1062 setvbuf(userData.fp, NULL, _IOFBF, 16384); 1063 #ifdef XML_UNICODE 1064 puttc(0xFEFF, userData.fp); 1065 #endif 1066 XML_SetUserData(parser, &userData); 1067 switch (outputType) { 1068 case 'm': 1069 XML_UseParserAsHandlerArg(parser); 1070 XML_SetElementHandler(parser, metaStartElement, metaEndElement); 1071 XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction); 1072 XML_SetCommentHandler(parser, metaComment); 1073 XML_SetCdataSectionHandler(parser, metaStartCdataSection, 1074 metaEndCdataSection); 1075 XML_SetCharacterDataHandler(parser, metaCharacterData); 1076 XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl, 1077 metaEndDoctypeDecl); 1078 XML_SetEntityDeclHandler(parser, metaEntityDecl); 1079 XML_SetNotationDeclHandler(parser, metaNotationDecl); 1080 XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl, 1081 metaEndNamespaceDecl); 1082 metaStartDocument(parser); 1083 break; 1084 case 'c': 1085 XML_UseParserAsHandlerArg(parser); 1086 XML_SetDefaultHandler(parser, markup); 1087 XML_SetElementHandler(parser, defaultStartElement, defaultEndElement); 1088 XML_SetCharacterDataHandler(parser, defaultCharacterData); 1089 XML_SetProcessingInstructionHandler(parser, 1090 defaultProcessingInstruction); 1091 break; 1092 default: 1093 if (useNamespaces) 1094 XML_SetElementHandler(parser, startElementNS, endElementNS); 1095 else 1096 XML_SetElementHandler(parser, startElement, endElement); 1097 XML_SetCharacterDataHandler(parser, characterData); 1098 #ifndef W3C14N 1099 XML_SetProcessingInstructionHandler(parser, processingInstruction); 1100 if (requiresNotations) { 1101 XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl); 1102 XML_SetNotationDeclHandler(parser, notationDecl); 1103 } 1104 #endif /* not W3C14N */ 1105 break; 1106 } 1107 } 1108 if (windowsCodePages) 1109 XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0); 1110 result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags); 1111 if (outputDir) { 1112 if (outputType == 'm') 1113 metaEndDocument(parser); 1114 fclose(userData.fp); 1115 if (! result) { 1116 tremove(outName); 1117 } 1118 free(outName); 1119 } 1120 XML_ParserFree(parser); 1121 if (! result) { 1122 exit(2); 1123 } 1124 } 1125 return 0; 1126 } 1127