xref: /freebsd/contrib/expat/tests/misc_tests.c (revision 9cc9b8b372842b9a941d235c5e9949a214e5284f)
1 /* Tests in the "miscellaneous" test case for the Expat test suite
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org>
14    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
17    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21    Copyright (c) 2023      Sony Corporation / Snild Dolkow <snild@sony.com>
22    Copyright (c) 2025      Berkay Eren Ürün <berkay.ueruen@siemens.com>
23    Copyright (c) 2026      Matthew Fernandez <matthew.fernandez@gmail.com>
24    Licensed under the MIT license:
25 
26    Permission is  hereby granted,  free of charge,  to any  person obtaining
27    a  copy  of  this  software   and  associated  documentation  files  (the
28    "Software"),  to  deal in  the  Software  without restriction,  including
29    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
30    distribute, sublicense, and/or sell copies of the Software, and to permit
31    persons  to whom  the Software  is  furnished to  do so,  subject to  the
32    following conditions:
33 
34    The above copyright  notice and this permission notice  shall be included
35    in all copies or substantial portions of the Software.
36 
37    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
38    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
39    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
40    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
41    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
42    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
43    USE OR OTHER DEALINGS IN THE SOFTWARE.
44 */
45 
46 #if defined(NDEBUG)
47 #  undef NDEBUG /* because test suite relies on assert(...) at the moment */
48 #endif
49 
50 #include <assert.h>
51 #include <string.h>
52 
53 #include "expat_config.h"
54 
55 #include "expat.h"
56 #include "internal.h"
57 #include "minicheck.h"
58 #include "memcheck.h"
59 #include "common.h"
60 #include "ascii.h" /* for ASCII_xxx */
61 #include "handlers.h"
62 #include "misc_tests.h"
63 
64 void XMLCALL accumulate_characters_ext_handler(void *userData,
65                                                const XML_Char *s, int len);
66 
67 /* Test that a failure to allocate the parser structure fails gracefully */
START_TEST(test_misc_alloc_create_parser)68 START_TEST(test_misc_alloc_create_parser) {
69   XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
70   unsigned int i;
71   const unsigned int max_alloc_count = 10;
72 
73   /* Something this simple shouldn't need more than 10 allocations */
74   for (i = 0; i < max_alloc_count; i++) {
75     g_allocation_count = (int)i;
76     g_parser = XML_ParserCreate_MM(NULL, &memsuite, NULL);
77     if (g_parser != NULL)
78       break;
79   }
80   if (i == 0)
81     fail("Parser unexpectedly ignored failing allocator");
82   else if (i == max_alloc_count)
83     fail("Parser not created with max allocation count");
84 }
85 END_TEST
86 
87 /* Test memory allocation failures for a parser with an encoding */
START_TEST(test_misc_alloc_create_parser_with_encoding)88 START_TEST(test_misc_alloc_create_parser_with_encoding) {
89   XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
90   unsigned int i;
91   const unsigned int max_alloc_count = 10;
92 
93   /* Try several levels of allocation */
94   for (i = 0; i < max_alloc_count; i++) {
95     g_allocation_count = (int)i;
96     g_parser = XML_ParserCreate_MM(XCS("us-ascii"), &memsuite, NULL);
97     if (g_parser != NULL)
98       break;
99   }
100   if (i == 0)
101     fail("Parser ignored failing allocator");
102   else if (i == max_alloc_count)
103     fail("Parser not created with max allocation count");
104 }
105 END_TEST
106 
107 /* Test that freeing a NULL parser doesn't cause an explosion.
108  * (Not actually tested anywhere else)
109  */
START_TEST(test_misc_null_parser)110 START_TEST(test_misc_null_parser) {
111   XML_ParserFree(NULL);
112 }
113 END_TEST
114 
115 #if defined(__has_feature)
116 #  if __has_feature(undefined_behavior_sanitizer)
117 #    define EXPAT_TESTS_UBSAN 1
118 #  else
119 #    define EXPAT_TESTS_UBSAN 0
120 #  endif
121 #else
122 #  define EXPAT_TESTS_UBSAN 0
123 #endif
124 
125 /* Test that XML_ErrorString rejects out-of-range codes */
START_TEST(test_misc_error_string)126 START_TEST(test_misc_error_string) {
127 #if ! EXPAT_TESTS_UBSAN // because this would trigger UBSan
128   union {
129     enum XML_Error xml_error;
130     int integer;
131   } trickery;
132 
133   assert_true(sizeof(enum XML_Error) == sizeof(int)); // self-test
134 
135   trickery.integer = -1;
136   if (XML_ErrorString(trickery.xml_error) != NULL)
137     fail("Negative error code not rejected");
138 
139   trickery.integer = 100;
140   if (XML_ErrorString(trickery.xml_error) != NULL)
141     fail("Large error code not rejected");
142 #endif
143 }
144 END_TEST
145 
146 /* Test the version information is consistent */
147 
148 /* Since we are working in XML_LChars (potentially 16-bits), we
149  * can't use the standard C library functions for character
150  * manipulation and have to roll our own.
151  */
152 static int
parse_version(const XML_LChar * version_text,XML_Expat_Version * version_struct)153 parse_version(const XML_LChar *version_text,
154               XML_Expat_Version *version_struct) {
155   if (! version_text)
156     return XML_FALSE;
157 
158   while (*version_text != 0x00) {
159     if (*version_text >= ASCII_0 && *version_text <= ASCII_9)
160       break;
161     version_text++;
162   }
163   if (*version_text == 0x00)
164     return XML_FALSE;
165 
166   /* version_struct->major = strtoul(version_text, 10, &version_text) */
167   version_struct->major = 0;
168   while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
169     version_struct->major
170         = 10 * version_struct->major + (*version_text++ - ASCII_0);
171   }
172   if (*version_text++ != ASCII_PERIOD)
173     return XML_FALSE;
174 
175   /* Now for the minor version number */
176   version_struct->minor = 0;
177   while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
178     version_struct->minor
179         = 10 * version_struct->minor + (*version_text++ - ASCII_0);
180   }
181   if (*version_text++ != ASCII_PERIOD)
182     return XML_FALSE;
183 
184   /* Finally the micro version number */
185   version_struct->micro = 0;
186   while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
187     version_struct->micro
188         = 10 * version_struct->micro + (*version_text++ - ASCII_0);
189   }
190   if (*version_text != 0x00)
191     return XML_FALSE;
192   return XML_TRUE;
193 }
194 
195 static int
versions_equal(const XML_Expat_Version * first,const XML_Expat_Version * second)196 versions_equal(const XML_Expat_Version *first,
197                const XML_Expat_Version *second) {
198   return (first->major == second->major && first->minor == second->minor
199           && first->micro == second->micro);
200 }
201 
START_TEST(test_misc_version)202 START_TEST(test_misc_version) {
203   XML_Expat_Version read_version = XML_ExpatVersionInfo();
204   /* Silence compiler warning with the following assignment */
205   XML_Expat_Version parsed_version = {0, 0, 0};
206   const XML_LChar *version_text = XML_ExpatVersion();
207 
208   if (version_text == NULL)
209     fail("Could not obtain version text");
210   assert(version_text != NULL);
211   if (! parse_version(version_text, &parsed_version))
212     fail("Unable to parse version text");
213   if (! versions_equal(&read_version, &parsed_version))
214     fail("Version mismatch");
215 
216   if (xcstrcmp(version_text, XCS("expat_2.8.1"))
217       != 0) /* needs bump on releases */
218     fail("XML_*_VERSION in expat.h out of sync?\n");
219 }
220 END_TEST
221 
222 /* Test feature information */
START_TEST(test_misc_features)223 START_TEST(test_misc_features) {
224   const XML_Feature *features = XML_GetFeatureList();
225 
226   /* Prevent problems with double-freeing parsers */
227   g_parser = NULL;
228   if (features == NULL) {
229     fail("Failed to get feature information");
230   } else {
231     /* Loop through the features checking what we can */
232     while (features->feature != XML_FEATURE_END) {
233       switch (features->feature) {
234       case XML_FEATURE_SIZEOF_XML_CHAR:
235         if (features->value != sizeof(XML_Char))
236           fail("Incorrect size of XML_Char");
237         break;
238       case XML_FEATURE_SIZEOF_XML_LCHAR:
239         if (features->value != sizeof(XML_LChar))
240           fail("Incorrect size of XML_LChar");
241         break;
242       default:
243         break;
244       }
245       features++;
246     }
247   }
248 }
249 END_TEST
250 
251 /* Regression test for GitHub Issue #17: memory leak parsing attribute
252  * values with mixed bound and unbound namespaces.
253  */
START_TEST(test_misc_attribute_leak)254 START_TEST(test_misc_attribute_leak) {
255   const char *text = "<D xmlns:L=\"D\" l:a='' L:a=''/>";
256   XML_Memory_Handling_Suite memsuite
257       = {tracking_malloc, tracking_realloc, tracking_free};
258 
259   g_parser = XML_ParserCreate_MM(XCS("UTF-8"), &memsuite, XCS("\n"));
260   expect_failure(text, XML_ERROR_UNBOUND_PREFIX, "Unbound prefixes not found");
261   XML_ParserFree(g_parser);
262   /* Prevent the teardown trying to double free */
263   g_parser = NULL;
264 
265   if (! tracking_report())
266     fail("Memory leak found");
267 }
268 END_TEST
269 
270 /* Test parser created for UTF-16LE is successful */
START_TEST(test_misc_utf16le)271 START_TEST(test_misc_utf16le) {
272   const char text[] =
273       /* <?xml version='1.0'?><q>Hi</q> */
274       "<\0?\0x\0m\0l\0 \0"
275       "v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0?\0>\0"
276       "<\0q\0>\0H\0i\0<\0/\0q\0>\0";
277   const XML_Char *expected = XCS("Hi");
278   CharData storage;
279 
280   g_parser = XML_ParserCreate(XCS("UTF-16LE"));
281   if (g_parser == NULL)
282     fail("Parser not created");
283 
284   CharData_Init(&storage);
285   XML_SetUserData(g_parser, &storage);
286   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
287   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
288       == XML_STATUS_ERROR)
289     xml_failure(g_parser);
290   CharData_CheckXMLChars(&storage, expected);
291 }
292 END_TEST
293 
START_TEST(test_misc_stop_during_end_handler_issue_240_1)294 START_TEST(test_misc_stop_during_end_handler_issue_240_1) {
295   XML_Parser parser;
296   DataIssue240 *mydata;
297   enum XML_Status result;
298   const char *const doc1 = "<doc><e1/><e><foo/></e></doc>";
299 
300   parser = XML_ParserCreate(NULL);
301   XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
302   mydata = malloc(sizeof(DataIssue240));
303   assert_true(mydata != NULL);
304   mydata->parser = parser;
305   mydata->deep = 0;
306   XML_SetUserData(parser, mydata);
307 
308   result = _XML_Parse_SINGLE_BYTES(parser, doc1, (int)strlen(doc1), 1);
309   XML_ParserFree(parser);
310   free(mydata);
311   if (result != XML_STATUS_ERROR)
312     fail("Stopping the parser did not work as expected");
313 }
314 END_TEST
315 
START_TEST(test_misc_stop_during_end_handler_issue_240_2)316 START_TEST(test_misc_stop_during_end_handler_issue_240_2) {
317   XML_Parser parser;
318   DataIssue240 *mydata;
319   enum XML_Status result;
320   const char *const doc2 = "<doc><elem/></doc>";
321 
322   parser = XML_ParserCreate(NULL);
323   XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
324   mydata = malloc(sizeof(DataIssue240));
325   assert_true(mydata != NULL);
326   mydata->parser = parser;
327   mydata->deep = 0;
328   XML_SetUserData(parser, mydata);
329 
330   result = _XML_Parse_SINGLE_BYTES(parser, doc2, (int)strlen(doc2), 1);
331   XML_ParserFree(parser);
332   free(mydata);
333   if (result != XML_STATUS_ERROR)
334     fail("Stopping the parser did not work as expected");
335 }
336 END_TEST
337 
START_TEST(test_misc_deny_internal_entity_closing_doctype_issue_317)338 START_TEST(test_misc_deny_internal_entity_closing_doctype_issue_317) {
339   const char *const inputOne
340       = "<!DOCTYPE d [\n"
341         "<!ENTITY % element_d '<!ELEMENT d (#PCDATA)*>'>\n"
342         "%element_d;\n"
343         "<!ENTITY % e ']><d/>'>\n"
344         "\n"
345         "%e;";
346   const char *const inputTwo
347       = "<!DOCTYPE d [\n"
348         "<!ENTITY % element_d '<!ELEMENT d (#PCDATA)*>'>\n"
349         "%element_d;\n"
350         "<!ENTITY % e1 ']><d/>'><!ENTITY % e2 '&#37;e1;'>\n"
351         "\n"
352         "%e2;";
353   const char *const inputThree
354       = "<!DOCTYPE d [\n"
355         "<!ENTITY % element_d '<!ELEMENT d (#PCDATA)*>'>\n"
356         "%element_d;\n"
357         "<!ENTITY % e ']><d'>\n"
358         "\n"
359         "%e;/>";
360   const char *const inputIssue317
361       = "<!DOCTYPE doc [\n"
362         "<!ENTITY % element_doc '<!ELEMENT doc (#PCDATA)*>'>\n"
363         "%element_doc;\n"
364         "<!ENTITY % foo ']>\n"
365         "<doc>Hell<oc (#PCDATA)*>'>\n"
366         "%foo;\n"
367         "]>\n"
368         "<doc>Hello, world</dVc>";
369 
370   const char *const inputs[] = {inputOne, inputTwo, inputThree, inputIssue317};
371   const XML_Bool suspendOrNot[] = {XML_FALSE, XML_TRUE};
372   size_t inputIndex = 0;
373 
374   for (; inputIndex < sizeof(inputs) / sizeof(inputs[0]); inputIndex++) {
375     for (size_t suspendOrNotIndex = 0;
376          suspendOrNotIndex < sizeof(suspendOrNot) / sizeof(suspendOrNot[0]);
377          suspendOrNotIndex++) {
378       const char *const input = inputs[inputIndex];
379       const XML_Bool suspend = suspendOrNot[suspendOrNotIndex];
380       if (suspend && (g_chunkSize > 0)) {
381         // We cannot use _XML_Parse_SINGLE_BYTES below due to suspension, and
382         // so chunk sizes >0 would only repeat the very same test
383         // due to use of plain XML_Parse; we are saving upon that runtime:
384         return;
385       }
386 
387       set_subtest("[input=%d suspend=%s] %s", (int)inputIndex,
388                   suspend ? "true" : "false", input);
389       XML_Parser parser;
390       enum XML_Status parseResult;
391       int setParamEntityResult;
392       XML_Size lineNumber;
393       XML_Size columnNumber;
394 
395       parser = XML_ParserCreate(NULL);
396       setParamEntityResult
397           = XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
398       if (setParamEntityResult != 1)
399         fail("Failed to set XML_PARAM_ENTITY_PARSING_ALWAYS.");
400 
401       if (suspend) {
402         XML_SetUserData(parser, parser);
403         XML_SetElementDeclHandler(parser, suspend_after_element_declaration);
404       }
405 
406       if (suspend) {
407         // can't use SINGLE_BYTES here, because it'll return early on
408         // suspension, and we won't know exactly how much input we actually
409         // managed to give Expat.
410         parseResult = XML_Parse(parser, input, (int)strlen(input), 0);
411 
412         while (parseResult == XML_STATUS_SUSPENDED) {
413           parseResult = XML_ResumeParser(parser);
414         }
415 
416         if (parseResult != XML_STATUS_ERROR) {
417           // can't use SINGLE_BYTES here, because it'll return early on
418           // suspension, and we won't know exactly how much input we actually
419           // managed to give Expat.
420           parseResult = XML_Parse(parser, "", 0, 1);
421         }
422 
423         while (parseResult == XML_STATUS_SUSPENDED) {
424           parseResult = XML_ResumeParser(parser);
425         }
426       } else {
427         parseResult
428             = _XML_Parse_SINGLE_BYTES(parser, input, (int)strlen(input), 0);
429 
430         if (parseResult != XML_STATUS_ERROR) {
431           parseResult = _XML_Parse_SINGLE_BYTES(parser, "", 0, 1);
432         }
433       }
434 
435       if (parseResult != XML_STATUS_ERROR) {
436         fail("Parsing was expected to fail but succeeded.");
437       }
438 
439       if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
440         fail("Error code does not match XML_ERROR_INVALID_TOKEN");
441 
442       lineNumber = XML_GetCurrentLineNumber(parser);
443       if (lineNumber != 6)
444         fail("XML_GetCurrentLineNumber does not work as expected.");
445 
446       columnNumber = XML_GetCurrentColumnNumber(parser);
447       if (columnNumber != 0)
448         fail("XML_GetCurrentColumnNumber does not work as expected.");
449 
450       XML_ParserFree(parser);
451     }
452   }
453 }
454 END_TEST
455 
START_TEST(test_misc_tag_mismatch_reset_leak)456 START_TEST(test_misc_tag_mismatch_reset_leak) {
457 #ifdef XML_NS
458   const char *const text = "<open xmlns='https://namespace1.test'></close>";
459   XML_Parser parser = XML_ParserCreateNS(NULL, XCS('\n'));
460 
461   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
462       != XML_STATUS_ERROR)
463     fail("Call to parse was expected to fail");
464   if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH)
465     fail("Call to parse was expected to fail from a closing tag mismatch");
466 
467   XML_ParserReset(parser, NULL);
468 
469   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
470       != XML_STATUS_ERROR)
471     fail("Call to parse was expected to fail");
472   if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH)
473     fail("Call to parse was expected to fail from a closing tag mismatch");
474 
475   XML_ParserFree(parser);
476 #endif
477 }
478 END_TEST
479 
START_TEST(test_misc_create_external_entity_parser_with_null_context)480 START_TEST(test_misc_create_external_entity_parser_with_null_context) {
481   // With XML_DTD undefined, the only supported case of external entities
482   // is pattern "<!ENTITY entity123 SYSTEM 'filename123'>". A NULL context
483   // was causing a segfault through a null pointer dereference in function
484   // setContext, previously.
485   XML_Parser parser = XML_ParserCreate(NULL);
486   XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
487 #ifdef XML_DTD
488   assert_true(ext_parser != NULL);
489   XML_ParserFree(ext_parser);
490 #else
491   assert_true(ext_parser == NULL);
492 #endif /* XML_DTD */
493   XML_ParserFree(parser);
494 }
495 END_TEST
496 
START_TEST(test_misc_general_entities_support)497 START_TEST(test_misc_general_entities_support) {
498   const char *const doc
499       = "<!DOCTYPE r [\n"
500         "<!ENTITY e1 'v1'>\n"
501         "<!ENTITY e2 SYSTEM 'v2'>\n"
502         "]>\n"
503         "<r a1='[&e1;]'>[&e1;][&e2;][&amp;&apos;&gt;&lt;&quot;]</r>";
504 
505   CharData storage;
506   CharData_Init(&storage);
507 
508   XML_Parser parser = XML_ParserCreate(NULL);
509   XML_SetUserData(parser, &storage);
510   XML_SetStartElementHandler(parser, accumulate_start_element);
511   XML_SetExternalEntityRefHandler(parser,
512                                   external_entity_failer__if_not_xml_ge);
513   XML_SetEntityDeclHandler(parser, accumulate_entity_decl);
514   XML_SetCharacterDataHandler(parser, accumulate_characters);
515 
516   if (_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), XML_TRUE)
517       != XML_STATUS_OK) {
518     xml_failure(parser);
519   }
520 
521   XML_ParserFree(parser);
522 
523   CharData_CheckXMLChars(&storage,
524   /* clang-format off */
525 #if XML_GE == 1
526                          XCS("e1=v1\n")
527                          XCS("e2=(null)\n")
528                          XCS("(r(a1=[v1]))\n")
529                          XCS("[v1][][&'><\"]")
530 #else
531                          XCS("e1=&amp;e1;\n")
532                          XCS("e2=(null)\n")
533                          XCS("(r(a1=[&e1;]))\n")
534                          XCS("[&e1;][&e2;][&'><\"]")
535 #endif
536   );
537   /* clang-format on */
538 }
539 END_TEST
540 
541 static void XMLCALL
resumable_stopping_character_handler(void * userData,const XML_Char * s,int len)542 resumable_stopping_character_handler(void *userData, const XML_Char *s,
543                                      int len) {
544   UNUSED_P(s);
545   UNUSED_P(len);
546   XML_Parser parser = (XML_Parser)userData;
547   XML_StopParser(parser, XML_TRUE);
548 }
549 
550 // NOTE: This test needs active LeakSanitizer to be of actual use
START_TEST(test_misc_char_handler_stop_without_leak)551 START_TEST(test_misc_char_handler_stop_without_leak) {
552   const char *const data
553       = "<!DOCTYPE t1[<!ENTITY e1 'angle<'><!ENTITY e2 '&e1;'>]><t1>&e2;";
554   XML_Parser parser = XML_ParserCreate(NULL);
555   assert_true(parser != NULL);
556   XML_SetUserData(parser, parser);
557   XML_SetCharacterDataHandler(parser, resumable_stopping_character_handler);
558   _XML_Parse_SINGLE_BYTES(parser, data, (int)strlen(data), XML_FALSE);
559   XML_ParserFree(parser);
560 }
561 END_TEST
562 
START_TEST(test_misc_resumeparser_not_crashing)563 START_TEST(test_misc_resumeparser_not_crashing) {
564   XML_Parser parser = XML_ParserCreate(NULL);
565   XML_GetBuffer(parser, 1);
566   XML_StopParser(parser, /*resumable=*/XML_TRUE);
567   XML_ResumeParser(parser); // could crash here, previously
568   XML_ParserFree(parser);
569 }
570 END_TEST
571 
START_TEST(test_misc_stopparser_rejects_unstarted_parser)572 START_TEST(test_misc_stopparser_rejects_unstarted_parser) {
573   const XML_Bool cases[] = {XML_TRUE, XML_FALSE};
574   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
575     const XML_Bool resumable = cases[i];
576     XML_Parser parser = XML_ParserCreate(NULL);
577     assert_true(XML_GetErrorCode(parser) == XML_ERROR_NONE);
578     assert_true(XML_StopParser(parser, resumable) == XML_STATUS_ERROR);
579     assert_true(XML_GetErrorCode(parser) == XML_ERROR_NOT_STARTED);
580     XML_ParserFree(parser);
581   }
582 }
583 END_TEST
584 
585 /* Adaptation of accumulate_characters that takes ExtHdlrData input to work with
586  * test_renter_loop_finite_content below */
587 void XMLCALL
accumulate_characters_ext_handler(void * userData,const XML_Char * s,int len)588 accumulate_characters_ext_handler(void *userData, const XML_Char *s, int len) {
589   ExtHdlrData *const test_data = (ExtHdlrData *)userData;
590   CharData_AppendXMLChars(test_data->storage, s, len);
591 }
592 
593 /* Test that internalEntityProcessor does not re-enter forever;
594  * based on files tests/xmlconf/xmltest/valid/ext-sa/012.{xml,ent} */
START_TEST(test_renter_loop_finite_content)595 START_TEST(test_renter_loop_finite_content) {
596   CharData storage;
597   CharData_Init(&storage);
598   const char *const text = "<!DOCTYPE doc [\n"
599                            "<!ENTITY e1 '&e2;'>\n"
600                            "<!ENTITY e2 '&e3;'>\n"
601                            "<!ENTITY e3 SYSTEM '012.ent'>\n"
602                            "<!ENTITY e4 '&e5;'>\n"
603                            "<!ENTITY e5 '(e5)'>\n"
604                            "<!ELEMENT doc (#PCDATA)>\n"
605                            "]>\n"
606                            "<doc>&e1;</doc>\n";
607   ExtHdlrData test_data = {"&e4;\n", external_entity_null_loader, &storage};
608   const XML_Char *const expected = XCS("(e5)\n");
609 
610   XML_Parser parser = XML_ParserCreate(NULL);
611   assert_true(parser != NULL);
612   XML_SetUserData(parser, &test_data);
613   XML_SetExternalEntityRefHandler(parser, external_entity_oneshot_loader);
614   XML_SetCharacterDataHandler(parser, accumulate_characters_ext_handler);
615   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
616       == XML_STATUS_ERROR)
617     xml_failure(parser);
618 
619   CharData_CheckXMLChars(&storage, expected);
620   XML_ParserFree(parser);
621 }
622 END_TEST
623 
624 // Inspired by function XML_OriginalString of Perl's XML::Parser
625 static char *
dup_original_string(XML_Parser parser)626 dup_original_string(XML_Parser parser) {
627   const int byte_count = XML_GetCurrentByteCount(parser);
628 
629   assert_true(byte_count >= 0);
630 
631   int offset = -1;
632   int size = -1;
633 
634   const char *const context = XML_GetInputContext(parser, &offset, &size);
635 
636 #if XML_CONTEXT_BYTES > 0
637   assert_true(context != NULL);
638   assert_true(offset >= 0);
639   assert_true(size >= 0);
640   return portable_strndup(context + offset, byte_count);
641 #else
642   assert_true(context == NULL);
643   return NULL;
644 #endif
645 }
646 
647 static void
on_characters_issue_980(void * userData,const XML_Char * s,int len)648 on_characters_issue_980(void *userData, const XML_Char *s, int len) {
649   (void)s;
650   (void)len;
651   XML_Parser parser = (XML_Parser)userData;
652 
653   char *const original_string = dup_original_string(parser);
654 
655 #if XML_CONTEXT_BYTES > 0
656   assert_true(original_string != NULL);
657   assert_true(strcmp(original_string, "&draft.day;") == 0);
658   free(original_string);
659 #else
660   assert_true(original_string == NULL);
661 #endif
662 }
663 
START_TEST(test_misc_expected_event_ptr_issue_980)664 START_TEST(test_misc_expected_event_ptr_issue_980) {
665   // NOTE: This is a tiny subset of sample "REC-xml-19980210.xml"
666   //       from Perl's XML::Parser
667   const char *const doc = "<!DOCTYPE day [\n"
668                           "  <!ENTITY draft.day '10'>\n"
669                           "]>\n"
670                           "<day>&draft.day;</day>\n";
671 
672   XML_Parser parser = XML_ParserCreate(NULL);
673   XML_SetUserData(parser, parser);
674   XML_SetCharacterDataHandler(parser, on_characters_issue_980);
675 
676   assert_true(_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
677                                       /*isFinal=*/XML_TRUE)
678               == XML_STATUS_OK);
679 
680   XML_ParserFree(parser);
681 }
682 END_TEST
683 
START_TEST(test_misc_sync_entity_tolerated)684 START_TEST(test_misc_sync_entity_tolerated) {
685   const char *const doc = "<!DOCTYPE t0 [\n"
686                           "   <!ENTITY a '<t1></t1>'>\n"
687                           "   <!ENTITY b '<t2>two</t2>'>\n"
688                           "   <!ENTITY c '<t3>three<t4>four</t4>three</t3>'>\n"
689                           "   <!ENTITY d '<t5>&b;</t5>'>\n"
690                           "]>\n"
691                           "<t0>&a;&b;&c;&d;</t0>\n";
692   XML_Parser parser = XML_ParserCreate(NULL);
693 
694   assert_true(_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
695                                       /*isFinal=*/XML_TRUE)
696               == XML_STATUS_OK);
697 
698   XML_ParserFree(parser);
699 }
700 END_TEST
701 
START_TEST(test_misc_async_entity_rejected)702 START_TEST(test_misc_async_entity_rejected) {
703   struct test_case {
704     const char *doc;
705     enum XML_Status expectedStatusNoGE;
706     enum XML_Error expectedErrorNoGE;
707     XML_Size expectedErrorLine;
708     XML_Size expectedErrorColumn;
709   };
710   const struct test_case cases[] = {
711       // Opened by one entity, closed by another
712       {"<!DOCTYPE t0 [\n"
713        "   <!ENTITY open '<t1>'>\n"
714        "   <!ENTITY close '</t1>'>\n"
715        "]>\n"
716        "<t0>&open;&close;</t0>\n",
717        XML_STATUS_OK, XML_ERROR_NONE, 5, 4},
718       // Opened by tag, closed by entity (non-root case)
719       {"<!DOCTYPE t0 [\n"
720        "  <!ENTITY g0 ''>\n"
721        "  <!ENTITY g1 '&g0;</t1>'>\n"
722        "]>\n"
723        "<t0><t1>&g1;</t0>\n",
724        XML_STATUS_ERROR, XML_ERROR_TAG_MISMATCH, 5, 8},
725       // Opened by tag, closed by entity (root case)
726       {"<!DOCTYPE t0 [\n"
727        "  <!ENTITY g0 ''>\n"
728        "  <!ENTITY g1 '&g0;</t0>'>\n"
729        "]>\n"
730        "<t0>&g1;\n",
731        XML_STATUS_ERROR, XML_ERROR_NO_ELEMENTS, 5, 4},
732       // Opened by entity, closed by tag <-- regression from 2.7.0
733       {"<!DOCTYPE t0 [\n"
734        "  <!ENTITY g0 ''>\n"
735        "  <!ENTITY g1 '<t1>&g0;'>\n"
736        "]>\n"
737        "<t0>&g1;</t1></t0>\n",
738        XML_STATUS_ERROR, XML_ERROR_TAG_MISMATCH, 5, 4},
739       // Opened by tag, closed by entity; then the other way around
740       {"<!DOCTYPE t0 [\n"
741        "  <!ENTITY open '<t1>'>\n"
742        "  <!ENTITY close '</t1>'>\n"
743        "]>\n"
744        "<t0><t1>&close;&open;</t1></t0>\n",
745        XML_STATUS_OK, XML_ERROR_NONE, 5, 8},
746   };
747 
748   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
749     const struct test_case testCase = cases[i];
750     set_subtest("cases[%d]", (int)i);
751 
752     const char *const doc = testCase.doc;
753 #if XML_GE == 1
754     const enum XML_Status expectedStatus = XML_STATUS_ERROR;
755     const enum XML_Error expectedError = XML_ERROR_ASYNC_ENTITY;
756 #else
757     const enum XML_Status expectedStatus = testCase.expectedStatusNoGE;
758     const enum XML_Error expectedError = testCase.expectedErrorNoGE;
759 #endif
760 
761     XML_Parser parser = XML_ParserCreate(NULL);
762     assert_true(_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
763                                         /*isFinal=*/XML_TRUE)
764                 == expectedStatus);
765     assert_true(XML_GetErrorCode(parser) == expectedError);
766 #if XML_GE == 1
767     assert_true(XML_GetCurrentLineNumber(parser) == testCase.expectedErrorLine);
768     assert_true(XML_GetCurrentColumnNumber(parser)
769                 == testCase.expectedErrorColumn);
770 #endif
771     XML_ParserFree(parser);
772   }
773 }
774 END_TEST
775 
START_TEST(test_misc_no_infinite_loop_issue_1161)776 START_TEST(test_misc_no_infinite_loop_issue_1161) {
777   XML_Parser parser = XML_ParserCreate(NULL);
778 
779   const char *text = "<!DOCTYPE d SYSTEM 'secondary.txt'>";
780 
781   struct ExtOption options[] = {
782       {XCS("secondary.txt"),
783        "<!ENTITY % p SYSTEM 'tertiary.txt'><!ENTITY g '%p;'>"},
784       {XCS("tertiary.txt"), "<?xml version='1.0'?><a"},
785       {NULL, NULL},
786   };
787 
788   XML_SetUserData(parser, options);
789   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
790   XML_SetExternalEntityRefHandler(parser, external_entity_optioner);
791 
792   assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
793               == XML_STATUS_ERROR);
794 
795 #if defined(XML_DTD)
796   assert_true(XML_GetErrorCode(parser) == XML_ERROR_EXTERNAL_ENTITY_HANDLING);
797 #else
798   assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS);
799 #endif
800 
801   XML_ParserFree(parser);
802 }
803 END_TEST
804 
805 void
make_miscellaneous_test_case(Suite * s)806 make_miscellaneous_test_case(Suite *s) {
807   TCase *tc_misc = tcase_create("miscellaneous tests");
808 
809   suite_add_tcase(s, tc_misc);
810   tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);
811 
812   tcase_add_test(tc_misc, test_misc_alloc_create_parser);
813   tcase_add_test(tc_misc, test_misc_alloc_create_parser_with_encoding);
814   tcase_add_test(tc_misc, test_misc_null_parser);
815   tcase_add_test(tc_misc, test_misc_error_string);
816   tcase_add_test(tc_misc, test_misc_version);
817   tcase_add_test(tc_misc, test_misc_features);
818   tcase_add_test(tc_misc, test_misc_attribute_leak);
819   tcase_add_test(tc_misc, test_misc_utf16le);
820   tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_1);
821   tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_2);
822   tcase_add_test__ifdef_xml_dtd(
823       tc_misc, test_misc_deny_internal_entity_closing_doctype_issue_317);
824   tcase_add_test(tc_misc, test_misc_tag_mismatch_reset_leak);
825   tcase_add_test(tc_misc,
826                  test_misc_create_external_entity_parser_with_null_context);
827   tcase_add_test(tc_misc, test_misc_general_entities_support);
828   tcase_add_test(tc_misc, test_misc_char_handler_stop_without_leak);
829   tcase_add_test(tc_misc, test_misc_resumeparser_not_crashing);
830   tcase_add_test(tc_misc, test_misc_stopparser_rejects_unstarted_parser);
831   tcase_add_test__if_xml_ge(tc_misc, test_renter_loop_finite_content);
832   tcase_add_test(tc_misc, test_misc_expected_event_ptr_issue_980);
833   tcase_add_test(tc_misc, test_misc_sync_entity_tolerated);
834   tcase_add_test(tc_misc, test_misc_async_entity_rejected);
835   tcase_add_test(tc_misc, test_misc_no_infinite_loop_issue_1161);
836 }
837