xref: /freebsd/contrib/expat/xmlwf/xmlwf.c (revision a25896ca1270e25b657ceaa8d47d5699515f5c25)
1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000-2017 Expat development team
11    Licensed under the MIT license:
12 
13    Permission is  hereby granted,  free of charge,  to any  person obtaining
14    a  copy  of  this  software   and  associated  documentation  files  (the
15    "Software"),  to  deal in  the  Software  without restriction,  including
16    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17    distribute, sublicense, and/or sell copies of the Software, and to permit
18    persons  to whom  the Software  is  furnished to  do so,  subject to  the
19    following conditions:
20 
21    The above copyright  notice and this permission notice  shall be included
22    in all copies or substantial portions of the Software.
23 
24    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30    USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32 
33 #include <assert.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <stddef.h>
37 #include <string.h>
38 
39 #include "expat.h"
40 #include "codepage.h"
41 #include "internal.h"  /* for UNUSED_P only */
42 #include "xmlfile.h"
43 #include "xmltchar.h"
44 
45 #ifdef _MSC_VER
46 # include <crtdbg.h>
47 #endif
48 
49 #ifdef XML_UNICODE
50 # include <wchar.h>
51 #endif
52 
53 /* Structures for handler user data */
54 typedef struct NotationList {
55   struct NotationList *next;
56   const XML_Char *notationName;
57   const XML_Char *systemId;
58   const XML_Char *publicId;
59 } NotationList;
60 
61 typedef struct xmlwfUserData {
62   FILE *fp;
63   NotationList *notationListHead;
64   const XML_Char *currentDoctypeName;
65 } XmlwfUserData;
66 
67 
68 /* This ensures proper sorting. */
69 
70 #define NSSEP T('\001')
71 
72 static void XMLCALL
73 characterData(void *userData, const XML_Char *s, int len)
74 {
75   FILE *fp = ((XmlwfUserData *)userData)->fp;
76   for (; len > 0; --len, ++s) {
77     switch (*s) {
78     case T('&'):
79       fputts(T("&amp;"), fp);
80       break;
81     case T('<'):
82       fputts(T("&lt;"), fp);
83       break;
84     case T('>'):
85       fputts(T("&gt;"), fp);
86       break;
87 #ifdef W3C14N
88     case 13:
89       fputts(T("&#xD;"), fp);
90       break;
91 #else
92     case T('"'):
93       fputts(T("&quot;"), fp);
94       break;
95     case 9:
96     case 10:
97     case 13:
98       ftprintf(fp, T("&#%d;"), *s);
99       break;
100 #endif
101     default:
102       puttc(*s, fp);
103       break;
104     }
105   }
106 }
107 
108 static void
109 attributeValue(FILE *fp, const XML_Char *s)
110 {
111   puttc(T('='), fp);
112   puttc(T('"'), fp);
113   assert(s);
114   for (;;) {
115     switch (*s) {
116     case 0:
117     case NSSEP:
118       puttc(T('"'), fp);
119       return;
120     case T('&'):
121       fputts(T("&amp;"), fp);
122       break;
123     case T('<'):
124       fputts(T("&lt;"), fp);
125       break;
126     case T('"'):
127       fputts(T("&quot;"), fp);
128       break;
129 #ifdef W3C14N
130     case 9:
131       fputts(T("&#x9;"), fp);
132       break;
133     case 10:
134       fputts(T("&#xA;"), fp);
135       break;
136     case 13:
137       fputts(T("&#xD;"), fp);
138       break;
139 #else
140     case T('>'):
141       fputts(T("&gt;"), fp);
142       break;
143     case 9:
144     case 10:
145     case 13:
146       ftprintf(fp, T("&#%d;"), *s);
147       break;
148 #endif
149     default:
150       puttc(*s, fp);
151       break;
152     }
153     s++;
154   }
155 }
156 
157 /* Lexicographically comparing UTF-8 encoded attribute values,
158 is equivalent to lexicographically comparing based on the character number. */
159 
160 static int
161 attcmp(const void *att1, const void *att2)
162 {
163   return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2);
164 }
165 
166 static void XMLCALL
167 startElement(void *userData, const XML_Char *name, const XML_Char **atts)
168 {
169   int nAtts;
170   const XML_Char **p;
171   FILE *fp = ((XmlwfUserData *)userData)->fp;
172   puttc(T('<'), fp);
173   fputts(name, fp);
174 
175   p = atts;
176   while (*p)
177     ++p;
178   nAtts = (int)((p - atts) >> 1);
179   if (nAtts > 1)
180     qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
181   while (*atts) {
182     puttc(T(' '), fp);
183     fputts(*atts++, fp);
184     attributeValue(fp, *atts);
185     atts++;
186   }
187   puttc(T('>'), fp);
188 }
189 
190 static void XMLCALL
191 endElement(void *userData, const XML_Char *name)
192 {
193   FILE *fp = ((XmlwfUserData *)userData)->fp;
194   puttc(T('<'), fp);
195   puttc(T('/'), fp);
196   fputts(name, fp);
197   puttc(T('>'), fp);
198 }
199 
200 static int
201 nsattcmp(const void *p1, const void *p2)
202 {
203   const XML_Char *att1 = *(const XML_Char **)p1;
204   const XML_Char *att2 = *(const XML_Char **)p2;
205   int sep1 = (tcsrchr(att1, NSSEP) != 0);
206   int sep2 = (tcsrchr(att1, NSSEP) != 0);
207   if (sep1 != sep2)
208     return sep1 - sep2;
209   return tcscmp(att1, att2);
210 }
211 
212 static void XMLCALL
213 startElementNS(void *userData, const XML_Char *name, const XML_Char **atts)
214 {
215   int nAtts;
216   int nsi;
217   const XML_Char **p;
218   FILE *fp = ((XmlwfUserData *)userData)->fp;
219   const XML_Char *sep;
220   puttc(T('<'), fp);
221 
222   sep = tcsrchr(name, NSSEP);
223   if (sep) {
224     fputts(T("n1:"), fp);
225     fputts(sep + 1, fp);
226     fputts(T(" xmlns:n1"), fp);
227     attributeValue(fp, name);
228     nsi = 2;
229   }
230   else {
231     fputts(name, fp);
232     nsi = 1;
233   }
234 
235   p = atts;
236   while (*p)
237     ++p;
238   nAtts = (int)((p - atts) >> 1);
239   if (nAtts > 1)
240     qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp);
241   while (*atts) {
242     name = *atts++;
243     sep = tcsrchr(name, NSSEP);
244     puttc(T(' '), fp);
245     if (sep) {
246       ftprintf(fp, T("n%d:"), nsi);
247       fputts(sep + 1, fp);
248     }
249     else
250       fputts(name, fp);
251     attributeValue(fp, *atts);
252     if (sep) {
253       ftprintf(fp, T(" xmlns:n%d"), nsi++);
254       attributeValue(fp, name);
255     }
256     atts++;
257   }
258   puttc(T('>'), fp);
259 }
260 
261 static void XMLCALL
262 endElementNS(void *userData, const XML_Char *name)
263 {
264   FILE *fp = ((XmlwfUserData *)userData)->fp;
265   const XML_Char *sep;
266   puttc(T('<'), fp);
267   puttc(T('/'), fp);
268   sep = tcsrchr(name, NSSEP);
269   if (sep) {
270     fputts(T("n1:"), fp);
271     fputts(sep + 1, fp);
272   }
273   else
274     fputts(name, fp);
275   puttc(T('>'), fp);
276 }
277 
278 #ifndef W3C14N
279 
280 static void XMLCALL
281 processingInstruction(void *userData, const XML_Char *target,
282                       const XML_Char *data)
283 {
284   FILE *fp = ((XmlwfUserData *)userData)->fp;
285   puttc(T('<'), fp);
286   puttc(T('?'), fp);
287   fputts(target, fp);
288   puttc(T(' '), fp);
289   fputts(data, fp);
290   puttc(T('?'), fp);
291   puttc(T('>'), fp);
292 }
293 
294 
295 static XML_Char *xcsdup(const XML_Char *s)
296 {
297   XML_Char *result;
298   int count = 0;
299   int numBytes;
300 
301   /* Get the length of the string, including terminator */
302   while (s[count++] != 0) {
303     /* Do nothing */
304   }
305   numBytes = count * sizeof(XML_Char);
306   result = malloc(numBytes);
307   if (result == NULL)
308     return NULL;
309   memcpy(result, s, numBytes);
310   return result;
311 }
312 
313 static void XMLCALL
314 startDoctypeDecl(void *userData,
315                  const XML_Char *doctypeName,
316                  const XML_Char *UNUSED_P(sysid),
317                  const XML_Char *UNUSED_P(publid),
318                  int UNUSED_P(has_internal_subset))
319 {
320   XmlwfUserData *data = (XmlwfUserData *)userData;
321   data->currentDoctypeName = xcsdup(doctypeName);
322 }
323 
324 static void
325 freeNotations(XmlwfUserData *data)
326 {
327   NotationList *notationListHead = data->notationListHead;
328 
329   while (notationListHead != NULL) {
330     NotationList *next = notationListHead->next;
331     free((void *)notationListHead->notationName);
332     free((void *)notationListHead->systemId);
333     free((void *)notationListHead->publicId);
334     free(notationListHead);
335     notationListHead = next;
336   }
337   data->notationListHead = NULL;
338 }
339 
340 static int xcscmp(const XML_Char *xs, const XML_Char *xt)
341 {
342   while (*xs != 0 && *xt != 0) {
343     if (*xs < *xt)
344       return -1;
345     if (*xs > *xt)
346       return 1;
347     xs++;
348     xt++;
349   }
350   if (*xs < *xt)
351     return -1;
352   if (*xs > *xt)
353     return 1;
354   return 0;
355 }
356 
357 static int
358 notationCmp(const void *a, const void *b)
359 {
360   const NotationList * const n1 = *(NotationList **)a;
361   const NotationList * const n2 = *(NotationList **)b;
362 
363   return xcscmp(n1->notationName, n2->notationName);
364 }
365 
366 static void XMLCALL
367 endDoctypeDecl(void *userData)
368 {
369   XmlwfUserData *data = (XmlwfUserData *)userData;
370   NotationList **notations;
371   int notationCount = 0;
372   NotationList *p;
373   int i;
374 
375   /* How many notations do we have? */
376   for (p = data->notationListHead; p != NULL; p = p->next)
377     notationCount++;
378   if (notationCount == 0) {
379     /* Nothing to report */
380     free((void *)data->currentDoctypeName);
381     data->currentDoctypeName = NULL;
382     return;
383   }
384 
385   notations = malloc(notationCount * sizeof(NotationList *));
386   if (notations == NULL) {
387     fprintf(stderr, "Unable to sort notations");
388     freeNotations(data);
389     return;
390   }
391 
392   for (p = data->notationListHead, i = 0;
393        i < notationCount;
394        p = p->next, i++) {
395     notations[i] = p;
396   }
397   qsort(notations, notationCount, sizeof(NotationList *), notationCmp);
398 
399   /* Output the DOCTYPE header */
400   fputts(T("<!DOCTYPE "), data->fp);
401   fputts(data->currentDoctypeName, data->fp);
402   fputts(T(" [\n"), data->fp);
403 
404   /* Now the NOTATIONs */
405   for (i = 0; i < notationCount; i++) {
406     fputts(T("<!NOTATION "), data->fp);
407     fputts(notations[i]->notationName, data->fp);
408     if (notations[i]->publicId != NULL) {
409       fputts(T(" PUBLIC '"), data->fp);
410       fputts(notations[i]->publicId, data->fp);
411       puttc(T('\''), data->fp);
412       if (notations[i]->systemId != NULL) {
413         puttc(T(' '), data->fp);
414         puttc(T('\''), data->fp);
415         fputts(notations[i]->systemId, data->fp);
416         puttc(T('\''), data->fp);
417       }
418     }
419     else if (notations[i]->systemId != NULL) {
420       fputts(T(" SYSTEM '"), data->fp);
421       fputts(notations[i]->systemId, data->fp);
422       puttc(T('\''), data->fp);
423     }
424     puttc(T('>'), data->fp);
425     puttc(T('\n'), data->fp);
426   }
427 
428   /* Finally end the DOCTYPE */
429   fputts(T("]>\n"), data->fp);
430 
431   free(notations);
432   freeNotations(data);
433   free((void *)data->currentDoctypeName);
434   data->currentDoctypeName = NULL;
435 }
436 
437 static void XMLCALL
438 notationDecl(void *userData,
439              const XML_Char *notationName,
440              const XML_Char *UNUSED_P(base),
441              const XML_Char *systemId,
442              const XML_Char *publicId)
443 {
444   XmlwfUserData *data = (XmlwfUserData *)userData;
445   NotationList *entry = malloc(sizeof(NotationList));
446   const char *errorMessage = "Unable to store NOTATION for output\n";
447 
448   if (entry == NULL) {
449     fputs(errorMessage, stderr);
450     return; /* Nothing we can really do about this */
451   }
452   entry->notationName = xcsdup(notationName);
453   if (entry->notationName == NULL) {
454     fputs(errorMessage, stderr);
455     free(entry);
456     return;
457   }
458   if (systemId != NULL) {
459     entry->systemId = xcsdup(systemId);
460     if (entry->systemId == NULL) {
461       fputs(errorMessage, stderr);
462       free((void *)entry->notationName);
463       free(entry);
464       return;
465     }
466   }
467   else {
468     entry->systemId = NULL;
469   }
470   if (publicId != NULL) {
471     entry->publicId = xcsdup(publicId);
472     if (entry->publicId == NULL) {
473       fputs(errorMessage, stderr);
474       free((void *)entry->systemId); /* Safe if it's NULL */
475       free((void *)entry->notationName);
476       free(entry);
477       return;
478     }
479   }
480   else {
481     entry->publicId = NULL;
482   }
483 
484   entry->next = data->notationListHead;
485   data->notationListHead = entry;
486 }
487 
488 #endif /* not W3C14N */
489 
490 static void XMLCALL
491 defaultCharacterData(void *userData, const XML_Char *UNUSED_P(s), int UNUSED_P(len))
492 {
493   XML_DefaultCurrent((XML_Parser) userData);
494 }
495 
496 static void XMLCALL
497 defaultStartElement(void *userData, const XML_Char *UNUSED_P(name),
498                     const XML_Char **UNUSED_P(atts))
499 {
500   XML_DefaultCurrent((XML_Parser) userData);
501 }
502 
503 static void XMLCALL
504 defaultEndElement(void *userData, const XML_Char *UNUSED_P(name))
505 {
506   XML_DefaultCurrent((XML_Parser) userData);
507 }
508 
509 static void XMLCALL
510 defaultProcessingInstruction(void *userData, const XML_Char *UNUSED_P(target),
511                              const XML_Char *UNUSED_P(data))
512 {
513   XML_DefaultCurrent((XML_Parser) userData);
514 }
515 
516 static void XMLCALL
517 nopCharacterData(void *UNUSED_P(userData), const XML_Char *UNUSED_P(s), int UNUSED_P(len))
518 {
519 }
520 
521 static void XMLCALL
522 nopStartElement(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts))
523 {
524 }
525 
526 static void XMLCALL
527 nopEndElement(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name))
528 {
529 }
530 
531 static void XMLCALL
532 nopProcessingInstruction(void *UNUSED_P(userData), const XML_Char *UNUSED_P(target),
533                          const XML_Char *UNUSED_P(data))
534 {
535 }
536 
537 static void XMLCALL
538 markup(void *userData, const XML_Char *s, int len)
539 {
540   FILE *fp = ((XmlwfUserData *)XML_GetUserData((XML_Parser) userData))->fp;
541   for (; len > 0; --len, ++s)
542     puttc(*s, fp);
543 }
544 
545 static void
546 metaLocation(XML_Parser parser)
547 {
548   const XML_Char *uri = XML_GetBase(parser);
549   FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp;
550   if (uri)
551     ftprintf(fp, T(" uri=\"%s\""), uri);
552   ftprintf(fp,
553            T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"")
554              T(" nbytes=\"%d\"")
555              T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"")
556              T(" col=\"%") T(XML_FMT_INT_MOD) T("u\""),
557            XML_GetCurrentByteIndex(parser),
558            XML_GetCurrentByteCount(parser),
559            XML_GetCurrentLineNumber(parser),
560            XML_GetCurrentColumnNumber(parser));
561 }
562 
563 static void
564 metaStartDocument(void *userData)
565 {
566   fputts(T("<document>\n"),
567          ((XmlwfUserData *)XML_GetUserData((XML_Parser) userData))->fp);
568 }
569 
570 static void
571 metaEndDocument(void *userData)
572 {
573   fputts(T("</document>\n"),
574          ((XmlwfUserData *)XML_GetUserData((XML_Parser) userData))->fp);
575 }
576 
577 static void XMLCALL
578 metaStartElement(void *userData, const XML_Char *name,
579                  const XML_Char **atts)
580 {
581   XML_Parser parser = (XML_Parser) userData;
582   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
583   FILE *fp = data->fp;
584   const XML_Char **specifiedAttsEnd
585     = atts + XML_GetSpecifiedAttributeCount(parser);
586   const XML_Char **idAttPtr;
587   int idAttIndex = XML_GetIdAttributeIndex(parser);
588   if (idAttIndex < 0)
589     idAttPtr = 0;
590   else
591     idAttPtr = atts + idAttIndex;
592 
593   ftprintf(fp, T("<starttag name=\"%s\""), name);
594   metaLocation(parser);
595   if (*atts) {
596     fputts(T(">\n"), fp);
597     do {
598       ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]);
599       characterData(data, atts[1], (int)tcslen(atts[1]));
600       if (atts >= specifiedAttsEnd)
601         fputts(T("\" defaulted=\"yes\"/>\n"), fp);
602       else if (atts == idAttPtr)
603         fputts(T("\" id=\"yes\"/>\n"), fp);
604       else
605         fputts(T("\"/>\n"), fp);
606     } while (*(atts += 2));
607     fputts(T("</starttag>\n"), fp);
608   }
609   else
610     fputts(T("/>\n"), fp);
611 }
612 
613 static void XMLCALL
614 metaEndElement(void *userData, const XML_Char *name)
615 {
616   XML_Parser parser = (XML_Parser) userData;
617   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
618   FILE *fp = data->fp;
619   ftprintf(fp, T("<endtag name=\"%s\""), name);
620   metaLocation(parser);
621   fputts(T("/>\n"), fp);
622 }
623 
624 static void XMLCALL
625 metaProcessingInstruction(void *userData, const XML_Char *target,
626                           const XML_Char *data)
627 {
628   XML_Parser parser = (XML_Parser) userData;
629   XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
630   FILE *fp = usrData->fp;
631   ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
632   characterData(usrData, data, (int)tcslen(data));
633   puttc(T('"'), fp);
634   metaLocation(parser);
635   fputts(T("/>\n"), fp);
636 }
637 
638 static void XMLCALL
639 metaComment(void *userData, const XML_Char *data)
640 {
641   XML_Parser parser = (XML_Parser) userData;
642   XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
643   FILE *fp = usrData->fp;
644   fputts(T("<comment data=\""), fp);
645   characterData(usrData, data, (int)tcslen(data));
646   puttc(T('"'), fp);
647   metaLocation(parser);
648   fputts(T("/>\n"), fp);
649 }
650 
651 static void XMLCALL
652 metaStartCdataSection(void *userData)
653 {
654   XML_Parser parser = (XML_Parser) userData;
655   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
656   FILE *fp = data->fp;
657   fputts(T("<startcdata"), fp);
658   metaLocation(parser);
659   fputts(T("/>\n"), fp);
660 }
661 
662 static void XMLCALL
663 metaEndCdataSection(void *userData)
664 {
665   XML_Parser parser = (XML_Parser) userData;
666   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
667   FILE *fp = data->fp;
668   fputts(T("<endcdata"), fp);
669   metaLocation(parser);
670   fputts(T("/>\n"), fp);
671 }
672 
673 static void XMLCALL
674 metaCharacterData(void *userData, const XML_Char *s, int len)
675 {
676   XML_Parser parser = (XML_Parser) userData;
677   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
678   FILE *fp = data->fp;
679   fputts(T("<chars str=\""), fp);
680   characterData(data, s, len);
681   puttc(T('"'), fp);
682   metaLocation(parser);
683   fputts(T("/>\n"), fp);
684 }
685 
686 static void XMLCALL
687 metaStartDoctypeDecl(void *userData,
688                      const XML_Char *doctypeName,
689                      const XML_Char *UNUSED_P(sysid),
690                      const XML_Char *UNUSED_P(pubid),
691                      int UNUSED_P(has_internal_subset))
692 {
693   XML_Parser parser = (XML_Parser) userData;
694   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
695   FILE *fp = data->fp;
696   ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName);
697   metaLocation(parser);
698   fputts(T("/>\n"), fp);
699 }
700 
701 static void XMLCALL
702 metaEndDoctypeDecl(void *userData)
703 {
704   XML_Parser parser = (XML_Parser) userData;
705   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
706   FILE *fp = data->fp;
707   fputts(T("<enddoctype"), fp);
708   metaLocation(parser);
709   fputts(T("/>\n"), fp);
710 }
711 
712 static void XMLCALL
713 metaNotationDecl(void *userData,
714                  const XML_Char *notationName,
715                  const XML_Char *UNUSED_P(base),
716                  const XML_Char *systemId,
717                  const XML_Char *publicId)
718 {
719   XML_Parser parser = (XML_Parser) userData;
720   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
721   FILE *fp = data->fp;
722   ftprintf(fp, T("<notation name=\"%s\""), notationName);
723   if (publicId)
724     ftprintf(fp, T(" public=\"%s\""), publicId);
725   if (systemId) {
726     fputts(T(" system=\""), fp);
727     characterData(data, systemId, (int)tcslen(systemId));
728     puttc(T('"'), fp);
729   }
730   metaLocation(parser);
731   fputts(T("/>\n"), fp);
732 }
733 
734 
735 static void XMLCALL
736 metaEntityDecl(void *userData,
737                const XML_Char *entityName,
738                int  UNUSED_P(is_param),
739                const XML_Char *value,
740                int  value_length,
741                const XML_Char *UNUSED_P(base),
742                const XML_Char *systemId,
743                const XML_Char *publicId,
744                const XML_Char *notationName)
745 {
746   XML_Parser parser = (XML_Parser) userData;
747   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
748   FILE *fp = data->fp;
749 
750   if (value) {
751     ftprintf(fp, T("<entity name=\"%s\""), entityName);
752     metaLocation(parser);
753     puttc(T('>'), fp);
754     characterData(data, value, value_length);
755     fputts(T("</entity/>\n"), fp);
756   }
757   else if (notationName) {
758     ftprintf(fp, T("<entity name=\"%s\""), entityName);
759     if (publicId)
760       ftprintf(fp, T(" public=\"%s\""), publicId);
761     fputts(T(" system=\""), fp);
762     characterData(data, systemId, (int)tcslen(systemId));
763     puttc(T('"'), fp);
764     ftprintf(fp, T(" notation=\"%s\""), notationName);
765     metaLocation(parser);
766     fputts(T("/>\n"), fp);
767   }
768   else {
769     ftprintf(fp, T("<entity name=\"%s\""), entityName);
770     if (publicId)
771       ftprintf(fp, T(" public=\"%s\""), publicId);
772     fputts(T(" system=\""), fp);
773     characterData(data, systemId, (int)tcslen(systemId));
774     puttc(T('"'), fp);
775     metaLocation(parser);
776     fputts(T("/>\n"), fp);
777   }
778 }
779 
780 static void XMLCALL
781 metaStartNamespaceDecl(void *userData,
782                        const XML_Char *prefix,
783                        const XML_Char *uri)
784 {
785   XML_Parser parser = (XML_Parser) userData;
786   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
787   FILE *fp = data->fp;
788   fputts(T("<startns"), fp);
789   if (prefix)
790     ftprintf(fp, T(" prefix=\"%s\""), prefix);
791   if (uri) {
792     fputts(T(" ns=\""), fp);
793     characterData(data, uri, (int)tcslen(uri));
794     fputts(T("\"/>\n"), fp);
795   }
796   else
797     fputts(T("/>\n"), fp);
798 }
799 
800 static void XMLCALL
801 metaEndNamespaceDecl(void *userData, const XML_Char *prefix)
802 {
803   XML_Parser parser = (XML_Parser) userData;
804   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
805   FILE *fp = data->fp;
806   if (!prefix)
807     fputts(T("<endns/>\n"), fp);
808   else
809     ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix);
810 }
811 
812 static int XMLCALL
813 unknownEncodingConvert(void *data, const char *p)
814 {
815   return codepageConvert(*(int *)data, p);
816 }
817 
818 static int XMLCALL
819 unknownEncoding(void *UNUSED_P(userData), const XML_Char *name, XML_Encoding *info)
820 {
821   int cp;
822   static const XML_Char prefixL[] = T("windows-");
823   static const XML_Char prefixU[] = T("WINDOWS-");
824   int i;
825 
826   for (i = 0; prefixU[i]; i++)
827     if (name[i] != prefixU[i] && name[i] != prefixL[i])
828       return 0;
829 
830   cp = 0;
831   for (; name[i]; i++) {
832     static const XML_Char digits[] = T("0123456789");
833     const XML_Char *s = tcschr(digits, name[i]);
834     if (!s)
835       return 0;
836     cp *= 10;
837     cp += (int)(s - digits);
838     if (cp >= 0x10000)
839       return 0;
840   }
841   if (!codepageMap(cp, info->map))
842     return 0;
843   info->convert = unknownEncodingConvert;
844   /* We could just cast the code page integer to a void *,
845   and avoid the use of release. */
846   info->release = free;
847   info->data = malloc(sizeof(int));
848   if (!info->data)
849     return 0;
850   *(int *)info->data = cp;
851   return 1;
852 }
853 
854 static int XMLCALL
855 notStandalone(void *UNUSED_P(userData))
856 {
857   return 0;
858 }
859 
860 static void
861 showVersion(XML_Char *prog)
862 {
863   XML_Char *s = prog;
864   XML_Char ch;
865   const XML_Feature *features = XML_GetFeatureList();
866   while ((ch = *s) != 0) {
867     if (ch == '/'
868 #if defined(_WIN32)
869         || ch == '\\'
870 #endif
871         )
872       prog = s + 1;
873     ++s;
874   }
875   ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion());
876   if (features != NULL && features[0].feature != XML_FEATURE_END) {
877     int i = 1;
878     ftprintf(stdout, T("%s"), features[0].name);
879     if (features[0].value)
880       ftprintf(stdout, T("=%ld"), features[0].value);
881     while (features[i].feature != XML_FEATURE_END) {
882       ftprintf(stdout, T(", %s"), features[i].name);
883       if (features[i].value)
884         ftprintf(stdout, T("=%ld"), features[i].value);
885       ++i;
886     }
887     ftprintf(stdout, T("\n"));
888   }
889 }
890 
891 static void
892 usage(const XML_Char *prog, int rc)
893 {
894   ftprintf(stderr,
895            T("usage: %s [-s] [-n] [-p] [-x] [-e encoding] [-w] [-d output-dir] [-c] [-m] [-r] [-t] [-N] [file ...]\n"), prog);
896   exit(rc);
897 }
898 
899 #if defined(__MINGW32__) && defined(XML_UNICODE)
900 /* Silence warning about missing prototype */
901 int wmain(int argc, XML_Char **argv);
902 #endif
903 
904 int
905 tmain(int argc, XML_Char **argv)
906 {
907   int i, j;
908   const XML_Char *outputDir = NULL;
909   const XML_Char *encoding = NULL;
910   unsigned processFlags = XML_MAP_FILE;
911   int windowsCodePages = 0;
912   int outputType = 0;
913   int useNamespaces = 0;
914   int requireStandalone = 0;
915   int requiresNotations = 0;
916   enum XML_ParamEntityParsing paramEntityParsing =
917     XML_PARAM_ENTITY_PARSING_NEVER;
918   int useStdin = 0;
919   XmlwfUserData userData = { NULL, NULL, NULL };
920 
921 #ifdef _MSC_VER
922   _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF|_CRTDBG_LEAK_CHECK_DF);
923 #endif
924 
925   i = 1;
926   j = 0;
927   while (i < argc) {
928     if (j == 0) {
929       if (argv[i][0] != T('-'))
930         break;
931       if (argv[i][1] == T('-') && argv[i][2] == T('\0')) {
932         i++;
933         break;
934       }
935       j++;
936     }
937     switch (argv[i][j]) {
938     case T('r'):
939       processFlags &= ~XML_MAP_FILE;
940       j++;
941       break;
942     case T('s'):
943       requireStandalone = 1;
944       j++;
945       break;
946     case T('n'):
947       useNamespaces = 1;
948       j++;
949       break;
950     case T('p'):
951       paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS;
952       /* fall through */
953     case T('x'):
954       processFlags |= XML_EXTERNAL_ENTITIES;
955       j++;
956       break;
957     case T('w'):
958       windowsCodePages = 1;
959       j++;
960       break;
961     case T('m'):
962       outputType = 'm';
963       j++;
964       break;
965     case T('c'):
966       outputType = 'c';
967       useNamespaces = 0;
968       j++;
969       break;
970     case T('t'):
971       outputType = 't';
972       j++;
973       break;
974     case T('N'):
975       requiresNotations = 1;
976       j++;
977       break;
978     case T('d'):
979       if (argv[i][j + 1] == T('\0')) {
980         if (++i == argc)
981           usage(argv[0], 2);
982         outputDir = argv[i];
983       }
984       else
985         outputDir = argv[i] + j + 1;
986       i++;
987       j = 0;
988       break;
989     case T('e'):
990       if (argv[i][j + 1] == T('\0')) {
991         if (++i == argc)
992           usage(argv[0], 2);
993         encoding = argv[i];
994       }
995       else
996         encoding = argv[i] + j + 1;
997       i++;
998       j = 0;
999       break;
1000     case T('h'):
1001       usage(argv[0], 0);
1002       return 0;
1003     case T('v'):
1004       showVersion(argv[0]);
1005       return 0;
1006     case T('\0'):
1007       if (j > 1) {
1008         i++;
1009         j = 0;
1010         break;
1011       }
1012       /* fall through */
1013     default:
1014       usage(argv[0], 2);
1015     }
1016   }
1017   if (i == argc) {
1018     useStdin = 1;
1019     processFlags &= ~XML_MAP_FILE;
1020     i--;
1021   }
1022   for (; i < argc; i++) {
1023     XML_Char *outName = 0;
1024     int result;
1025     XML_Parser parser;
1026     if (useNamespaces)
1027       parser = XML_ParserCreateNS(encoding, NSSEP);
1028     else
1029       parser = XML_ParserCreate(encoding);
1030 
1031     if (! parser) {
1032       tperror(T("Could not instantiate parser"));
1033       exit(1);
1034     }
1035 
1036     if (requireStandalone)
1037       XML_SetNotStandaloneHandler(parser, notStandalone);
1038     XML_SetParamEntityParsing(parser, paramEntityParsing);
1039     if (outputType == 't') {
1040       /* This is for doing timings; this gives a more realistic estimate of
1041          the parsing time. */
1042       outputDir = 0;
1043       XML_SetElementHandler(parser, nopStartElement, nopEndElement);
1044       XML_SetCharacterDataHandler(parser, nopCharacterData);
1045       XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction);
1046     }
1047     else if (outputDir) {
1048       const XML_Char * delim = T("/");
1049       const XML_Char *file = useStdin ? T("STDIN") : argv[i];
1050       if (!useStdin) {
1051         /* Jump after last (back)slash */
1052         const XML_Char * lastDelim = tcsrchr(file, delim[0]);
1053         if (lastDelim)
1054           file = lastDelim + 1;
1055 #if defined(_WIN32)
1056         else {
1057           const XML_Char * winDelim = T("\\");
1058           lastDelim = tcsrchr(file, winDelim[0]);
1059           if (lastDelim) {
1060             file = lastDelim + 1;
1061             delim = winDelim;
1062           }
1063         }
1064 #endif
1065       }
1066       outName = (XML_Char *)malloc((tcslen(outputDir) + tcslen(file) + 2)
1067                        * sizeof(XML_Char));
1068       tcscpy(outName, outputDir);
1069       tcscat(outName, delim);
1070       tcscat(outName, file);
1071       userData.fp = tfopen(outName, T("wb"));
1072       if (!userData.fp) {
1073         tperror(outName);
1074         exit(1);
1075       }
1076       setvbuf(userData.fp, NULL, _IOFBF, 16384);
1077 #ifdef XML_UNICODE
1078       puttc(0xFEFF, userData.fp);
1079 #endif
1080       XML_SetUserData(parser, &userData);
1081       switch (outputType) {
1082       case 'm':
1083         XML_UseParserAsHandlerArg(parser);
1084         XML_SetElementHandler(parser, metaStartElement, metaEndElement);
1085         XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
1086         XML_SetCommentHandler(parser, metaComment);
1087         XML_SetCdataSectionHandler(parser, metaStartCdataSection,
1088                                    metaEndCdataSection);
1089         XML_SetCharacterDataHandler(parser, metaCharacterData);
1090         XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl,
1091                                   metaEndDoctypeDecl);
1092         XML_SetEntityDeclHandler(parser, metaEntityDecl);
1093         XML_SetNotationDeclHandler(parser, metaNotationDecl);
1094         XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl,
1095                                     metaEndNamespaceDecl);
1096         metaStartDocument(parser);
1097         break;
1098       case 'c':
1099         XML_UseParserAsHandlerArg(parser);
1100         XML_SetDefaultHandler(parser, markup);
1101         XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
1102         XML_SetCharacterDataHandler(parser, defaultCharacterData);
1103         XML_SetProcessingInstructionHandler(parser,
1104                                             defaultProcessingInstruction);
1105         break;
1106       default:
1107         if (useNamespaces)
1108           XML_SetElementHandler(parser, startElementNS, endElementNS);
1109         else
1110           XML_SetElementHandler(parser, startElement, endElement);
1111         XML_SetCharacterDataHandler(parser, characterData);
1112 #ifndef W3C14N
1113         XML_SetProcessingInstructionHandler(parser, processingInstruction);
1114         if (requiresNotations) {
1115           XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl);
1116           XML_SetNotationDeclHandler(parser, notationDecl);
1117         }
1118 #endif /* not W3C14N */
1119         break;
1120       }
1121     }
1122     if (windowsCodePages)
1123       XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
1124     result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags);
1125     if (outputDir) {
1126       if (outputType == 'm')
1127         metaEndDocument(parser);
1128       fclose(userData.fp);
1129       if (!result) {
1130         tremove(outName);
1131         exit(2);
1132       }
1133       free(outName);
1134     }
1135     XML_ParserFree(parser);
1136   }
1137   return 0;
1138 }
1139