1 /* 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2000-2017 Expat development team 11 Licensed under the MIT license: 12 13 Permission is hereby granted, free of charge, to any person obtaining 14 a copy of this software and associated documentation files (the 15 "Software"), to deal in the Software without restriction, including 16 without limitation the rights to use, copy, modify, merge, publish, 17 distribute, sublicense, and/or sell copies of the Software, and to permit 18 persons to whom the Software is furnished to do so, subject to the 19 following conditions: 20 21 The above copyright notice and this permission notice shall be included 22 in all copies or substantial portions of the Software. 23 24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 30 USE OR OTHER DEALINGS IN THE SOFTWARE. 31 */ 32 33 #include <assert.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <stddef.h> 37 #include <string.h> 38 39 #include "expat.h" 40 #include "codepage.h" 41 #include "internal.h" /* for UNUSED_P only */ 42 #include "xmlfile.h" 43 #include "xmltchar.h" 44 45 #ifdef _MSC_VER 46 # include <crtdbg.h> 47 #endif 48 49 #ifdef XML_UNICODE 50 # include <wchar.h> 51 #endif 52 53 /* Structures for handler user data */ 54 typedef struct NotationList { 55 struct NotationList *next; 56 const XML_Char *notationName; 57 const XML_Char *systemId; 58 const XML_Char *publicId; 59 } NotationList; 60 61 typedef struct xmlwfUserData { 62 FILE *fp; 63 NotationList *notationListHead; 64 const XML_Char *currentDoctypeName; 65 } XmlwfUserData; 66 67 68 /* This ensures proper sorting. */ 69 70 #define NSSEP T('\001') 71 72 static void XMLCALL 73 characterData(void *userData, const XML_Char *s, int len) 74 { 75 FILE *fp = ((XmlwfUserData *)userData)->fp; 76 for (; len > 0; --len, ++s) { 77 switch (*s) { 78 case T('&'): 79 fputts(T("&"), fp); 80 break; 81 case T('<'): 82 fputts(T("<"), fp); 83 break; 84 case T('>'): 85 fputts(T(">"), fp); 86 break; 87 #ifdef W3C14N 88 case 13: 89 fputts(T("
"), fp); 90 break; 91 #else 92 case T('"'): 93 fputts(T("""), fp); 94 break; 95 case 9: 96 case 10: 97 case 13: 98 ftprintf(fp, T("&#%d;"), *s); 99 break; 100 #endif 101 default: 102 puttc(*s, fp); 103 break; 104 } 105 } 106 } 107 108 static void 109 attributeValue(FILE *fp, const XML_Char *s) 110 { 111 puttc(T('='), fp); 112 puttc(T('"'), fp); 113 assert(s); 114 for (;;) { 115 switch (*s) { 116 case 0: 117 case NSSEP: 118 puttc(T('"'), fp); 119 return; 120 case T('&'): 121 fputts(T("&"), fp); 122 break; 123 case T('<'): 124 fputts(T("<"), fp); 125 break; 126 case T('"'): 127 fputts(T("""), fp); 128 break; 129 #ifdef W3C14N 130 case 9: 131 fputts(T("	"), fp); 132 break; 133 case 10: 134 fputts(T("
"), fp); 135 break; 136 case 13: 137 fputts(T("
"), fp); 138 break; 139 #else 140 case T('>'): 141 fputts(T(">"), fp); 142 break; 143 case 9: 144 case 10: 145 case 13: 146 ftprintf(fp, T("&#%d;"), *s); 147 break; 148 #endif 149 default: 150 puttc(*s, fp); 151 break; 152 } 153 s++; 154 } 155 } 156 157 /* Lexicographically comparing UTF-8 encoded attribute values, 158 is equivalent to lexicographically comparing based on the character number. */ 159 160 static int 161 attcmp(const void *att1, const void *att2) 162 { 163 return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2); 164 } 165 166 static void XMLCALL 167 startElement(void *userData, const XML_Char *name, const XML_Char **atts) 168 { 169 int nAtts; 170 const XML_Char **p; 171 FILE *fp = ((XmlwfUserData *)userData)->fp; 172 puttc(T('<'), fp); 173 fputts(name, fp); 174 175 p = atts; 176 while (*p) 177 ++p; 178 nAtts = (int)((p - atts) >> 1); 179 if (nAtts > 1) 180 qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp); 181 while (*atts) { 182 puttc(T(' '), fp); 183 fputts(*atts++, fp); 184 attributeValue(fp, *atts); 185 atts++; 186 } 187 puttc(T('>'), fp); 188 } 189 190 static void XMLCALL 191 endElement(void *userData, const XML_Char *name) 192 { 193 FILE *fp = ((XmlwfUserData *)userData)->fp; 194 puttc(T('<'), fp); 195 puttc(T('/'), fp); 196 fputts(name, fp); 197 puttc(T('>'), fp); 198 } 199 200 static int 201 nsattcmp(const void *p1, const void *p2) 202 { 203 const XML_Char *att1 = *(const XML_Char **)p1; 204 const XML_Char *att2 = *(const XML_Char **)p2; 205 int sep1 = (tcsrchr(att1, NSSEP) != 0); 206 int sep2 = (tcsrchr(att1, NSSEP) != 0); 207 if (sep1 != sep2) 208 return sep1 - sep2; 209 return tcscmp(att1, att2); 210 } 211 212 static void XMLCALL 213 startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) 214 { 215 int nAtts; 216 int nsi; 217 const XML_Char **p; 218 FILE *fp = ((XmlwfUserData *)userData)->fp; 219 const XML_Char *sep; 220 puttc(T('<'), fp); 221 222 sep = tcsrchr(name, NSSEP); 223 if (sep) { 224 fputts(T("n1:"), fp); 225 fputts(sep + 1, fp); 226 fputts(T(" xmlns:n1"), fp); 227 attributeValue(fp, name); 228 nsi = 2; 229 } 230 else { 231 fputts(name, fp); 232 nsi = 1; 233 } 234 235 p = atts; 236 while (*p) 237 ++p; 238 nAtts = (int)((p - atts) >> 1); 239 if (nAtts > 1) 240 qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp); 241 while (*atts) { 242 name = *atts++; 243 sep = tcsrchr(name, NSSEP); 244 puttc(T(' '), fp); 245 if (sep) { 246 ftprintf(fp, T("n%d:"), nsi); 247 fputts(sep + 1, fp); 248 } 249 else 250 fputts(name, fp); 251 attributeValue(fp, *atts); 252 if (sep) { 253 ftprintf(fp, T(" xmlns:n%d"), nsi++); 254 attributeValue(fp, name); 255 } 256 atts++; 257 } 258 puttc(T('>'), fp); 259 } 260 261 static void XMLCALL 262 endElementNS(void *userData, const XML_Char *name) 263 { 264 FILE *fp = ((XmlwfUserData *)userData)->fp; 265 const XML_Char *sep; 266 puttc(T('<'), fp); 267 puttc(T('/'), fp); 268 sep = tcsrchr(name, NSSEP); 269 if (sep) { 270 fputts(T("n1:"), fp); 271 fputts(sep + 1, fp); 272 } 273 else 274 fputts(name, fp); 275 puttc(T('>'), fp); 276 } 277 278 #ifndef W3C14N 279 280 static void XMLCALL 281 processingInstruction(void *userData, const XML_Char *target, 282 const XML_Char *data) 283 { 284 FILE *fp = ((XmlwfUserData *)userData)->fp; 285 puttc(T('<'), fp); 286 puttc(T('?'), fp); 287 fputts(target, fp); 288 puttc(T(' '), fp); 289 fputts(data, fp); 290 puttc(T('?'), fp); 291 puttc(T('>'), fp); 292 } 293 294 295 static XML_Char *xcsdup(const XML_Char *s) 296 { 297 XML_Char *result; 298 int count = 0; 299 int numBytes; 300 301 /* Get the length of the string, including terminator */ 302 while (s[count++] != 0) { 303 /* Do nothing */ 304 } 305 numBytes = count * sizeof(XML_Char); 306 result = malloc(numBytes); 307 if (result == NULL) 308 return NULL; 309 memcpy(result, s, numBytes); 310 return result; 311 } 312 313 static void XMLCALL 314 startDoctypeDecl(void *userData, 315 const XML_Char *doctypeName, 316 const XML_Char *UNUSED_P(sysid), 317 const XML_Char *UNUSED_P(publid), 318 int UNUSED_P(has_internal_subset)) 319 { 320 XmlwfUserData *data = (XmlwfUserData *)userData; 321 data->currentDoctypeName = xcsdup(doctypeName); 322 } 323 324 static void 325 freeNotations(XmlwfUserData *data) 326 { 327 NotationList *notationListHead = data->notationListHead; 328 329 while (notationListHead != NULL) { 330 NotationList *next = notationListHead->next; 331 free((void *)notationListHead->notationName); 332 free((void *)notationListHead->systemId); 333 free((void *)notationListHead->publicId); 334 free(notationListHead); 335 notationListHead = next; 336 } 337 data->notationListHead = NULL; 338 } 339 340 static int xcscmp(const XML_Char *xs, const XML_Char *xt) 341 { 342 while (*xs != 0 && *xt != 0) { 343 if (*xs < *xt) 344 return -1; 345 if (*xs > *xt) 346 return 1; 347 xs++; 348 xt++; 349 } 350 if (*xs < *xt) 351 return -1; 352 if (*xs > *xt) 353 return 1; 354 return 0; 355 } 356 357 static int 358 notationCmp(const void *a, const void *b) 359 { 360 const NotationList * const n1 = *(NotationList **)a; 361 const NotationList * const n2 = *(NotationList **)b; 362 363 return xcscmp(n1->notationName, n2->notationName); 364 } 365 366 static void XMLCALL 367 endDoctypeDecl(void *userData) 368 { 369 XmlwfUserData *data = (XmlwfUserData *)userData; 370 NotationList **notations; 371 int notationCount = 0; 372 NotationList *p; 373 int i; 374 375 /* How many notations do we have? */ 376 for (p = data->notationListHead; p != NULL; p = p->next) 377 notationCount++; 378 if (notationCount == 0) { 379 /* Nothing to report */ 380 free((void *)data->currentDoctypeName); 381 data->currentDoctypeName = NULL; 382 return; 383 } 384 385 notations = malloc(notationCount * sizeof(NotationList *)); 386 if (notations == NULL) { 387 fprintf(stderr, "Unable to sort notations"); 388 freeNotations(data); 389 return; 390 } 391 392 for (p = data->notationListHead, i = 0; 393 i < notationCount; 394 p = p->next, i++) { 395 notations[i] = p; 396 } 397 qsort(notations, notationCount, sizeof(NotationList *), notationCmp); 398 399 /* Output the DOCTYPE header */ 400 fputts(T("<!DOCTYPE "), data->fp); 401 fputts(data->currentDoctypeName, data->fp); 402 fputts(T(" [\n"), data->fp); 403 404 /* Now the NOTATIONs */ 405 for (i = 0; i < notationCount; i++) { 406 fputts(T("<!NOTATION "), data->fp); 407 fputts(notations[i]->notationName, data->fp); 408 if (notations[i]->publicId != NULL) { 409 fputts(T(" PUBLIC '"), data->fp); 410 fputts(notations[i]->publicId, data->fp); 411 puttc(T('\''), data->fp); 412 if (notations[i]->systemId != NULL) { 413 puttc(T(' '), data->fp); 414 puttc(T('\''), data->fp); 415 fputts(notations[i]->systemId, data->fp); 416 puttc(T('\''), data->fp); 417 } 418 } 419 else if (notations[i]->systemId != NULL) { 420 fputts(T(" SYSTEM '"), data->fp); 421 fputts(notations[i]->systemId, data->fp); 422 puttc(T('\''), data->fp); 423 } 424 puttc(T('>'), data->fp); 425 puttc(T('\n'), data->fp); 426 } 427 428 /* Finally end the DOCTYPE */ 429 fputts(T("]>\n"), data->fp); 430 431 free(notations); 432 freeNotations(data); 433 free((void *)data->currentDoctypeName); 434 data->currentDoctypeName = NULL; 435 } 436 437 static void XMLCALL 438 notationDecl(void *userData, 439 const XML_Char *notationName, 440 const XML_Char *UNUSED_P(base), 441 const XML_Char *systemId, 442 const XML_Char *publicId) 443 { 444 XmlwfUserData *data = (XmlwfUserData *)userData; 445 NotationList *entry = malloc(sizeof(NotationList)); 446 const char *errorMessage = "Unable to store NOTATION for output\n"; 447 448 if (entry == NULL) { 449 fputs(errorMessage, stderr); 450 return; /* Nothing we can really do about this */ 451 } 452 entry->notationName = xcsdup(notationName); 453 if (entry->notationName == NULL) { 454 fputs(errorMessage, stderr); 455 free(entry); 456 return; 457 } 458 if (systemId != NULL) { 459 entry->systemId = xcsdup(systemId); 460 if (entry->systemId == NULL) { 461 fputs(errorMessage, stderr); 462 free((void *)entry->notationName); 463 free(entry); 464 return; 465 } 466 } 467 else { 468 entry->systemId = NULL; 469 } 470 if (publicId != NULL) { 471 entry->publicId = xcsdup(publicId); 472 if (entry->publicId == NULL) { 473 fputs(errorMessage, stderr); 474 free((void *)entry->systemId); /* Safe if it's NULL */ 475 free((void *)entry->notationName); 476 free(entry); 477 return; 478 } 479 } 480 else { 481 entry->publicId = NULL; 482 } 483 484 entry->next = data->notationListHead; 485 data->notationListHead = entry; 486 } 487 488 #endif /* not W3C14N */ 489 490 static void XMLCALL 491 defaultCharacterData(void *userData, const XML_Char *UNUSED_P(s), int UNUSED_P(len)) 492 { 493 XML_DefaultCurrent((XML_Parser) userData); 494 } 495 496 static void XMLCALL 497 defaultStartElement(void *userData, const XML_Char *UNUSED_P(name), 498 const XML_Char **UNUSED_P(atts)) 499 { 500 XML_DefaultCurrent((XML_Parser) userData); 501 } 502 503 static void XMLCALL 504 defaultEndElement(void *userData, const XML_Char *UNUSED_P(name)) 505 { 506 XML_DefaultCurrent((XML_Parser) userData); 507 } 508 509 static void XMLCALL 510 defaultProcessingInstruction(void *userData, const XML_Char *UNUSED_P(target), 511 const XML_Char *UNUSED_P(data)) 512 { 513 XML_DefaultCurrent((XML_Parser) userData); 514 } 515 516 static void XMLCALL 517 nopCharacterData(void *UNUSED_P(userData), const XML_Char *UNUSED_P(s), int UNUSED_P(len)) 518 { 519 } 520 521 static void XMLCALL 522 nopStartElement(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts)) 523 { 524 } 525 526 static void XMLCALL 527 nopEndElement(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name)) 528 { 529 } 530 531 static void XMLCALL 532 nopProcessingInstruction(void *UNUSED_P(userData), const XML_Char *UNUSED_P(target), 533 const XML_Char *UNUSED_P(data)) 534 { 535 } 536 537 static void XMLCALL 538 markup(void *userData, const XML_Char *s, int len) 539 { 540 FILE *fp = ((XmlwfUserData *)XML_GetUserData((XML_Parser) userData))->fp; 541 for (; len > 0; --len, ++s) 542 puttc(*s, fp); 543 } 544 545 static void 546 metaLocation(XML_Parser parser) 547 { 548 const XML_Char *uri = XML_GetBase(parser); 549 FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp; 550 if (uri) 551 ftprintf(fp, T(" uri=\"%s\""), uri); 552 ftprintf(fp, 553 T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"") 554 T(" nbytes=\"%d\"") 555 T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"") 556 T(" col=\"%") T(XML_FMT_INT_MOD) T("u\""), 557 XML_GetCurrentByteIndex(parser), 558 XML_GetCurrentByteCount(parser), 559 XML_GetCurrentLineNumber(parser), 560 XML_GetCurrentColumnNumber(parser)); 561 } 562 563 static void 564 metaStartDocument(void *userData) 565 { 566 fputts(T("<document>\n"), 567 ((XmlwfUserData *)XML_GetUserData((XML_Parser) userData))->fp); 568 } 569 570 static void 571 metaEndDocument(void *userData) 572 { 573 fputts(T("</document>\n"), 574 ((XmlwfUserData *)XML_GetUserData((XML_Parser) userData))->fp); 575 } 576 577 static void XMLCALL 578 metaStartElement(void *userData, const XML_Char *name, 579 const XML_Char **atts) 580 { 581 XML_Parser parser = (XML_Parser) userData; 582 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 583 FILE *fp = data->fp; 584 const XML_Char **specifiedAttsEnd 585 = atts + XML_GetSpecifiedAttributeCount(parser); 586 const XML_Char **idAttPtr; 587 int idAttIndex = XML_GetIdAttributeIndex(parser); 588 if (idAttIndex < 0) 589 idAttPtr = 0; 590 else 591 idAttPtr = atts + idAttIndex; 592 593 ftprintf(fp, T("<starttag name=\"%s\""), name); 594 metaLocation(parser); 595 if (*atts) { 596 fputts(T(">\n"), fp); 597 do { 598 ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]); 599 characterData(data, atts[1], (int)tcslen(atts[1])); 600 if (atts >= specifiedAttsEnd) 601 fputts(T("\" defaulted=\"yes\"/>\n"), fp); 602 else if (atts == idAttPtr) 603 fputts(T("\" id=\"yes\"/>\n"), fp); 604 else 605 fputts(T("\"/>\n"), fp); 606 } while (*(atts += 2)); 607 fputts(T("</starttag>\n"), fp); 608 } 609 else 610 fputts(T("/>\n"), fp); 611 } 612 613 static void XMLCALL 614 metaEndElement(void *userData, const XML_Char *name) 615 { 616 XML_Parser parser = (XML_Parser) userData; 617 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 618 FILE *fp = data->fp; 619 ftprintf(fp, T("<endtag name=\"%s\""), name); 620 metaLocation(parser); 621 fputts(T("/>\n"), fp); 622 } 623 624 static void XMLCALL 625 metaProcessingInstruction(void *userData, const XML_Char *target, 626 const XML_Char *data) 627 { 628 XML_Parser parser = (XML_Parser) userData; 629 XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser); 630 FILE *fp = usrData->fp; 631 ftprintf(fp, T("<pi target=\"%s\" data=\""), target); 632 characterData(usrData, data, (int)tcslen(data)); 633 puttc(T('"'), fp); 634 metaLocation(parser); 635 fputts(T("/>\n"), fp); 636 } 637 638 static void XMLCALL 639 metaComment(void *userData, const XML_Char *data) 640 { 641 XML_Parser parser = (XML_Parser) userData; 642 XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser); 643 FILE *fp = usrData->fp; 644 fputts(T("<comment data=\""), fp); 645 characterData(usrData, data, (int)tcslen(data)); 646 puttc(T('"'), fp); 647 metaLocation(parser); 648 fputts(T("/>\n"), fp); 649 } 650 651 static void XMLCALL 652 metaStartCdataSection(void *userData) 653 { 654 XML_Parser parser = (XML_Parser) userData; 655 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 656 FILE *fp = data->fp; 657 fputts(T("<startcdata"), fp); 658 metaLocation(parser); 659 fputts(T("/>\n"), fp); 660 } 661 662 static void XMLCALL 663 metaEndCdataSection(void *userData) 664 { 665 XML_Parser parser = (XML_Parser) userData; 666 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 667 FILE *fp = data->fp; 668 fputts(T("<endcdata"), fp); 669 metaLocation(parser); 670 fputts(T("/>\n"), fp); 671 } 672 673 static void XMLCALL 674 metaCharacterData(void *userData, const XML_Char *s, int len) 675 { 676 XML_Parser parser = (XML_Parser) userData; 677 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 678 FILE *fp = data->fp; 679 fputts(T("<chars str=\""), fp); 680 characterData(data, s, len); 681 puttc(T('"'), fp); 682 metaLocation(parser); 683 fputts(T("/>\n"), fp); 684 } 685 686 static void XMLCALL 687 metaStartDoctypeDecl(void *userData, 688 const XML_Char *doctypeName, 689 const XML_Char *UNUSED_P(sysid), 690 const XML_Char *UNUSED_P(pubid), 691 int UNUSED_P(has_internal_subset)) 692 { 693 XML_Parser parser = (XML_Parser) userData; 694 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 695 FILE *fp = data->fp; 696 ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName); 697 metaLocation(parser); 698 fputts(T("/>\n"), fp); 699 } 700 701 static void XMLCALL 702 metaEndDoctypeDecl(void *userData) 703 { 704 XML_Parser parser = (XML_Parser) userData; 705 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 706 FILE *fp = data->fp; 707 fputts(T("<enddoctype"), fp); 708 metaLocation(parser); 709 fputts(T("/>\n"), fp); 710 } 711 712 static void XMLCALL 713 metaNotationDecl(void *userData, 714 const XML_Char *notationName, 715 const XML_Char *UNUSED_P(base), 716 const XML_Char *systemId, 717 const XML_Char *publicId) 718 { 719 XML_Parser parser = (XML_Parser) userData; 720 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 721 FILE *fp = data->fp; 722 ftprintf(fp, T("<notation name=\"%s\""), notationName); 723 if (publicId) 724 ftprintf(fp, T(" public=\"%s\""), publicId); 725 if (systemId) { 726 fputts(T(" system=\""), fp); 727 characterData(data, systemId, (int)tcslen(systemId)); 728 puttc(T('"'), fp); 729 } 730 metaLocation(parser); 731 fputts(T("/>\n"), fp); 732 } 733 734 735 static void XMLCALL 736 metaEntityDecl(void *userData, 737 const XML_Char *entityName, 738 int UNUSED_P(is_param), 739 const XML_Char *value, 740 int value_length, 741 const XML_Char *UNUSED_P(base), 742 const XML_Char *systemId, 743 const XML_Char *publicId, 744 const XML_Char *notationName) 745 { 746 XML_Parser parser = (XML_Parser) userData; 747 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 748 FILE *fp = data->fp; 749 750 if (value) { 751 ftprintf(fp, T("<entity name=\"%s\""), entityName); 752 metaLocation(parser); 753 puttc(T('>'), fp); 754 characterData(data, value, value_length); 755 fputts(T("</entity/>\n"), fp); 756 } 757 else if (notationName) { 758 ftprintf(fp, T("<entity name=\"%s\""), entityName); 759 if (publicId) 760 ftprintf(fp, T(" public=\"%s\""), publicId); 761 fputts(T(" system=\""), fp); 762 characterData(data, systemId, (int)tcslen(systemId)); 763 puttc(T('"'), fp); 764 ftprintf(fp, T(" notation=\"%s\""), notationName); 765 metaLocation(parser); 766 fputts(T("/>\n"), fp); 767 } 768 else { 769 ftprintf(fp, T("<entity name=\"%s\""), entityName); 770 if (publicId) 771 ftprintf(fp, T(" public=\"%s\""), publicId); 772 fputts(T(" system=\""), fp); 773 characterData(data, systemId, (int)tcslen(systemId)); 774 puttc(T('"'), fp); 775 metaLocation(parser); 776 fputts(T("/>\n"), fp); 777 } 778 } 779 780 static void XMLCALL 781 metaStartNamespaceDecl(void *userData, 782 const XML_Char *prefix, 783 const XML_Char *uri) 784 { 785 XML_Parser parser = (XML_Parser) userData; 786 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 787 FILE *fp = data->fp; 788 fputts(T("<startns"), fp); 789 if (prefix) 790 ftprintf(fp, T(" prefix=\"%s\""), prefix); 791 if (uri) { 792 fputts(T(" ns=\""), fp); 793 characterData(data, uri, (int)tcslen(uri)); 794 fputts(T("\"/>\n"), fp); 795 } 796 else 797 fputts(T("/>\n"), fp); 798 } 799 800 static void XMLCALL 801 metaEndNamespaceDecl(void *userData, const XML_Char *prefix) 802 { 803 XML_Parser parser = (XML_Parser) userData; 804 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser); 805 FILE *fp = data->fp; 806 if (!prefix) 807 fputts(T("<endns/>\n"), fp); 808 else 809 ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix); 810 } 811 812 static int XMLCALL 813 unknownEncodingConvert(void *data, const char *p) 814 { 815 return codepageConvert(*(int *)data, p); 816 } 817 818 static int XMLCALL 819 unknownEncoding(void *UNUSED_P(userData), const XML_Char *name, XML_Encoding *info) 820 { 821 int cp; 822 static const XML_Char prefixL[] = T("windows-"); 823 static const XML_Char prefixU[] = T("WINDOWS-"); 824 int i; 825 826 for (i = 0; prefixU[i]; i++) 827 if (name[i] != prefixU[i] && name[i] != prefixL[i]) 828 return 0; 829 830 cp = 0; 831 for (; name[i]; i++) { 832 static const XML_Char digits[] = T("0123456789"); 833 const XML_Char *s = tcschr(digits, name[i]); 834 if (!s) 835 return 0; 836 cp *= 10; 837 cp += (int)(s - digits); 838 if (cp >= 0x10000) 839 return 0; 840 } 841 if (!codepageMap(cp, info->map)) 842 return 0; 843 info->convert = unknownEncodingConvert; 844 /* We could just cast the code page integer to a void *, 845 and avoid the use of release. */ 846 info->release = free; 847 info->data = malloc(sizeof(int)); 848 if (!info->data) 849 return 0; 850 *(int *)info->data = cp; 851 return 1; 852 } 853 854 static int XMLCALL 855 notStandalone(void *UNUSED_P(userData)) 856 { 857 return 0; 858 } 859 860 static void 861 showVersion(XML_Char *prog) 862 { 863 XML_Char *s = prog; 864 XML_Char ch; 865 const XML_Feature *features = XML_GetFeatureList(); 866 while ((ch = *s) != 0) { 867 if (ch == '/' 868 #if defined(_WIN32) 869 || ch == '\\' 870 #endif 871 ) 872 prog = s + 1; 873 ++s; 874 } 875 ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion()); 876 if (features != NULL && features[0].feature != XML_FEATURE_END) { 877 int i = 1; 878 ftprintf(stdout, T("%s"), features[0].name); 879 if (features[0].value) 880 ftprintf(stdout, T("=%ld"), features[0].value); 881 while (features[i].feature != XML_FEATURE_END) { 882 ftprintf(stdout, T(", %s"), features[i].name); 883 if (features[i].value) 884 ftprintf(stdout, T("=%ld"), features[i].value); 885 ++i; 886 } 887 ftprintf(stdout, T("\n")); 888 } 889 } 890 891 static void 892 usage(const XML_Char *prog, int rc) 893 { 894 ftprintf(stderr, 895 T("usage: %s [-s] [-n] [-p] [-x] [-e encoding] [-w] [-d output-dir] [-c] [-m] [-r] [-t] [-N] [file ...]\n"), prog); 896 exit(rc); 897 } 898 899 #if defined(__MINGW32__) && defined(XML_UNICODE) 900 /* Silence warning about missing prototype */ 901 int wmain(int argc, XML_Char **argv); 902 #endif 903 904 int 905 tmain(int argc, XML_Char **argv) 906 { 907 int i, j; 908 const XML_Char *outputDir = NULL; 909 const XML_Char *encoding = NULL; 910 unsigned processFlags = XML_MAP_FILE; 911 int windowsCodePages = 0; 912 int outputType = 0; 913 int useNamespaces = 0; 914 int requireStandalone = 0; 915 int requiresNotations = 0; 916 enum XML_ParamEntityParsing paramEntityParsing = 917 XML_PARAM_ENTITY_PARSING_NEVER; 918 int useStdin = 0; 919 XmlwfUserData userData = { NULL, NULL, NULL }; 920 921 #ifdef _MSC_VER 922 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF|_CRTDBG_LEAK_CHECK_DF); 923 #endif 924 925 i = 1; 926 j = 0; 927 while (i < argc) { 928 if (j == 0) { 929 if (argv[i][0] != T('-')) 930 break; 931 if (argv[i][1] == T('-') && argv[i][2] == T('\0')) { 932 i++; 933 break; 934 } 935 j++; 936 } 937 switch (argv[i][j]) { 938 case T('r'): 939 processFlags &= ~XML_MAP_FILE; 940 j++; 941 break; 942 case T('s'): 943 requireStandalone = 1; 944 j++; 945 break; 946 case T('n'): 947 useNamespaces = 1; 948 j++; 949 break; 950 case T('p'): 951 paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS; 952 /* fall through */ 953 case T('x'): 954 processFlags |= XML_EXTERNAL_ENTITIES; 955 j++; 956 break; 957 case T('w'): 958 windowsCodePages = 1; 959 j++; 960 break; 961 case T('m'): 962 outputType = 'm'; 963 j++; 964 break; 965 case T('c'): 966 outputType = 'c'; 967 useNamespaces = 0; 968 j++; 969 break; 970 case T('t'): 971 outputType = 't'; 972 j++; 973 break; 974 case T('N'): 975 requiresNotations = 1; 976 j++; 977 break; 978 case T('d'): 979 if (argv[i][j + 1] == T('\0')) { 980 if (++i == argc) 981 usage(argv[0], 2); 982 outputDir = argv[i]; 983 } 984 else 985 outputDir = argv[i] + j + 1; 986 i++; 987 j = 0; 988 break; 989 case T('e'): 990 if (argv[i][j + 1] == T('\0')) { 991 if (++i == argc) 992 usage(argv[0], 2); 993 encoding = argv[i]; 994 } 995 else 996 encoding = argv[i] + j + 1; 997 i++; 998 j = 0; 999 break; 1000 case T('h'): 1001 usage(argv[0], 0); 1002 return 0; 1003 case T('v'): 1004 showVersion(argv[0]); 1005 return 0; 1006 case T('\0'): 1007 if (j > 1) { 1008 i++; 1009 j = 0; 1010 break; 1011 } 1012 /* fall through */ 1013 default: 1014 usage(argv[0], 2); 1015 } 1016 } 1017 if (i == argc) { 1018 useStdin = 1; 1019 processFlags &= ~XML_MAP_FILE; 1020 i--; 1021 } 1022 for (; i < argc; i++) { 1023 XML_Char *outName = 0; 1024 int result; 1025 XML_Parser parser; 1026 if (useNamespaces) 1027 parser = XML_ParserCreateNS(encoding, NSSEP); 1028 else 1029 parser = XML_ParserCreate(encoding); 1030 1031 if (! parser) { 1032 tperror(T("Could not instantiate parser")); 1033 exit(1); 1034 } 1035 1036 if (requireStandalone) 1037 XML_SetNotStandaloneHandler(parser, notStandalone); 1038 XML_SetParamEntityParsing(parser, paramEntityParsing); 1039 if (outputType == 't') { 1040 /* This is for doing timings; this gives a more realistic estimate of 1041 the parsing time. */ 1042 outputDir = 0; 1043 XML_SetElementHandler(parser, nopStartElement, nopEndElement); 1044 XML_SetCharacterDataHandler(parser, nopCharacterData); 1045 XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction); 1046 } 1047 else if (outputDir) { 1048 const XML_Char * delim = T("/"); 1049 const XML_Char *file = useStdin ? T("STDIN") : argv[i]; 1050 if (!useStdin) { 1051 /* Jump after last (back)slash */ 1052 const XML_Char * lastDelim = tcsrchr(file, delim[0]); 1053 if (lastDelim) 1054 file = lastDelim + 1; 1055 #if defined(_WIN32) 1056 else { 1057 const XML_Char * winDelim = T("\\"); 1058 lastDelim = tcsrchr(file, winDelim[0]); 1059 if (lastDelim) { 1060 file = lastDelim + 1; 1061 delim = winDelim; 1062 } 1063 } 1064 #endif 1065 } 1066 outName = (XML_Char *)malloc((tcslen(outputDir) + tcslen(file) + 2) 1067 * sizeof(XML_Char)); 1068 tcscpy(outName, outputDir); 1069 tcscat(outName, delim); 1070 tcscat(outName, file); 1071 userData.fp = tfopen(outName, T("wb")); 1072 if (!userData.fp) { 1073 tperror(outName); 1074 exit(1); 1075 } 1076 setvbuf(userData.fp, NULL, _IOFBF, 16384); 1077 #ifdef XML_UNICODE 1078 puttc(0xFEFF, userData.fp); 1079 #endif 1080 XML_SetUserData(parser, &userData); 1081 switch (outputType) { 1082 case 'm': 1083 XML_UseParserAsHandlerArg(parser); 1084 XML_SetElementHandler(parser, metaStartElement, metaEndElement); 1085 XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction); 1086 XML_SetCommentHandler(parser, metaComment); 1087 XML_SetCdataSectionHandler(parser, metaStartCdataSection, 1088 metaEndCdataSection); 1089 XML_SetCharacterDataHandler(parser, metaCharacterData); 1090 XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl, 1091 metaEndDoctypeDecl); 1092 XML_SetEntityDeclHandler(parser, metaEntityDecl); 1093 XML_SetNotationDeclHandler(parser, metaNotationDecl); 1094 XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl, 1095 metaEndNamespaceDecl); 1096 metaStartDocument(parser); 1097 break; 1098 case 'c': 1099 XML_UseParserAsHandlerArg(parser); 1100 XML_SetDefaultHandler(parser, markup); 1101 XML_SetElementHandler(parser, defaultStartElement, defaultEndElement); 1102 XML_SetCharacterDataHandler(parser, defaultCharacterData); 1103 XML_SetProcessingInstructionHandler(parser, 1104 defaultProcessingInstruction); 1105 break; 1106 default: 1107 if (useNamespaces) 1108 XML_SetElementHandler(parser, startElementNS, endElementNS); 1109 else 1110 XML_SetElementHandler(parser, startElement, endElement); 1111 XML_SetCharacterDataHandler(parser, characterData); 1112 #ifndef W3C14N 1113 XML_SetProcessingInstructionHandler(parser, processingInstruction); 1114 if (requiresNotations) { 1115 XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl); 1116 XML_SetNotationDeclHandler(parser, notationDecl); 1117 } 1118 #endif /* not W3C14N */ 1119 break; 1120 } 1121 } 1122 if (windowsCodePages) 1123 XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0); 1124 result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags); 1125 if (outputDir) { 1126 if (outputType == 'm') 1127 metaEndDocument(parser); 1128 fclose(userData.fp); 1129 if (!result) { 1130 tremove(outName); 1131 exit(2); 1132 } 1133 free(outName); 1134 } 1135 XML_ParserFree(parser); 1136 } 1137 return 0; 1138 } 1139