xref: /freebsd/contrib/expat/tests/misc_tests.c (revision 908f215e80fa482aa953c39afa6bb516f561fc00)
1 /* Tests in the "miscellaneous" test case for the Expat test suite
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
17    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21    Copyright (c) 2023      Sony Corporation / Snild Dolkow <snild@sony.com>
22    Licensed under the MIT license:
23 
24    Permission is  hereby granted,  free of charge,  to any  person obtaining
25    a  copy  of  this  software   and  associated  documentation  files  (the
26    "Software"),  to  deal in  the  Software  without restriction,  including
27    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
28    distribute, sublicense, and/or sell copies of the Software, and to permit
29    persons  to whom  the Software  is  furnished to  do so,  subject to  the
30    following conditions:
31 
32    The above copyright  notice and this permission notice  shall be included
33    in all copies or substantial portions of the Software.
34 
35    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
36    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
37    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
40    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41    USE OR OTHER DEALINGS IN THE SOFTWARE.
42 */
43 
44 #if defined(NDEBUG)
45 #  undef NDEBUG /* because test suite relies on assert(...) at the moment */
46 #endif
47 
48 #include <assert.h>
49 #include <string.h>
50 
51 #include "expat_config.h"
52 
53 #include "expat.h"
54 #include "internal.h"
55 #include "minicheck.h"
56 #include "memcheck.h"
57 #include "common.h"
58 #include "ascii.h" /* for ASCII_xxx */
59 #include "handlers.h"
60 #include "misc_tests.h"
61 
62 /* Test that a failure to allocate the parser structure fails gracefully */
63 START_TEST(test_misc_alloc_create_parser) {
64   XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
65   unsigned int i;
66   const unsigned int max_alloc_count = 10;
67 
68   /* Something this simple shouldn't need more than 10 allocations */
69   for (i = 0; i < max_alloc_count; i++) {
70     g_allocation_count = i;
71     g_parser = XML_ParserCreate_MM(NULL, &memsuite, NULL);
72     if (g_parser != NULL)
73       break;
74   }
75   if (i == 0)
76     fail("Parser unexpectedly ignored failing allocator");
77   else if (i == max_alloc_count)
78     fail("Parser not created with max allocation count");
79 }
80 END_TEST
81 
82 /* Test memory allocation failures for a parser with an encoding */
83 START_TEST(test_misc_alloc_create_parser_with_encoding) {
84   XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
85   unsigned int i;
86   const unsigned int max_alloc_count = 10;
87 
88   /* Try several levels of allocation */
89   for (i = 0; i < max_alloc_count; i++) {
90     g_allocation_count = i;
91     g_parser = XML_ParserCreate_MM(XCS("us-ascii"), &memsuite, NULL);
92     if (g_parser != NULL)
93       break;
94   }
95   if (i == 0)
96     fail("Parser ignored failing allocator");
97   else if (i == max_alloc_count)
98     fail("Parser not created with max allocation count");
99 }
100 END_TEST
101 
102 /* Test that freeing a NULL parser doesn't cause an explosion.
103  * (Not actually tested anywhere else)
104  */
105 START_TEST(test_misc_null_parser) {
106   XML_ParserFree(NULL);
107 }
108 END_TEST
109 
110 #if defined(__has_feature)
111 #  if __has_feature(undefined_behavior_sanitizer)
112 #    define EXPAT_TESTS_UBSAN 1
113 #  else
114 #    define EXPAT_TESTS_UBSAN 0
115 #  endif
116 #else
117 #  define EXPAT_TESTS_UBSAN 0
118 #endif
119 
120 /* Test that XML_ErrorString rejects out-of-range codes */
121 START_TEST(test_misc_error_string) {
122 #if ! EXPAT_TESTS_UBSAN // because this would trigger UBSan
123   union {
124     enum XML_Error xml_error;
125     int integer;
126   } trickery;
127 
128   assert_true(sizeof(enum XML_Error) == sizeof(int)); // self-test
129 
130   trickery.integer = -1;
131   if (XML_ErrorString(trickery.xml_error) != NULL)
132     fail("Negative error code not rejected");
133 
134   trickery.integer = 100;
135   if (XML_ErrorString(trickery.xml_error) != NULL)
136     fail("Large error code not rejected");
137 #endif
138 }
139 END_TEST
140 
141 /* Test the version information is consistent */
142 
143 /* Since we are working in XML_LChars (potentially 16-bits), we
144  * can't use the standard C library functions for character
145  * manipulation and have to roll our own.
146  */
147 static int
148 parse_version(const XML_LChar *version_text,
149               XML_Expat_Version *version_struct) {
150   if (! version_text)
151     return XML_FALSE;
152 
153   while (*version_text != 0x00) {
154     if (*version_text >= ASCII_0 && *version_text <= ASCII_9)
155       break;
156     version_text++;
157   }
158   if (*version_text == 0x00)
159     return XML_FALSE;
160 
161   /* version_struct->major = strtoul(version_text, 10, &version_text) */
162   version_struct->major = 0;
163   while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
164     version_struct->major
165         = 10 * version_struct->major + (*version_text++ - ASCII_0);
166   }
167   if (*version_text++ != ASCII_PERIOD)
168     return XML_FALSE;
169 
170   /* Now for the minor version number */
171   version_struct->minor = 0;
172   while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
173     version_struct->minor
174         = 10 * version_struct->minor + (*version_text++ - ASCII_0);
175   }
176   if (*version_text++ != ASCII_PERIOD)
177     return XML_FALSE;
178 
179   /* Finally the micro version number */
180   version_struct->micro = 0;
181   while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
182     version_struct->micro
183         = 10 * version_struct->micro + (*version_text++ - ASCII_0);
184   }
185   if (*version_text != 0x00)
186     return XML_FALSE;
187   return XML_TRUE;
188 }
189 
190 static int
191 versions_equal(const XML_Expat_Version *first,
192                const XML_Expat_Version *second) {
193   return (first->major == second->major && first->minor == second->minor
194           && first->micro == second->micro);
195 }
196 
197 START_TEST(test_misc_version) {
198   XML_Expat_Version read_version = XML_ExpatVersionInfo();
199   /* Silence compiler warning with the following assignment */
200   XML_Expat_Version parsed_version = {0, 0, 0};
201   const XML_LChar *version_text = XML_ExpatVersion();
202 
203   if (version_text == NULL)
204     fail("Could not obtain version text");
205   assert(version_text != NULL);
206   if (! parse_version(version_text, &parsed_version))
207     fail("Unable to parse version text");
208   if (! versions_equal(&read_version, &parsed_version))
209     fail("Version mismatch");
210 
211   if (xcstrcmp(version_text, XCS("expat_2.6.4"))) /* needs bump on releases */
212     fail("XML_*_VERSION in expat.h out of sync?\n");
213 }
214 END_TEST
215 
216 /* Test feature information */
217 START_TEST(test_misc_features) {
218   const XML_Feature *features = XML_GetFeatureList();
219 
220   /* Prevent problems with double-freeing parsers */
221   g_parser = NULL;
222   if (features == NULL) {
223     fail("Failed to get feature information");
224   } else {
225     /* Loop through the features checking what we can */
226     while (features->feature != XML_FEATURE_END) {
227       switch (features->feature) {
228       case XML_FEATURE_SIZEOF_XML_CHAR:
229         if (features->value != sizeof(XML_Char))
230           fail("Incorrect size of XML_Char");
231         break;
232       case XML_FEATURE_SIZEOF_XML_LCHAR:
233         if (features->value != sizeof(XML_LChar))
234           fail("Incorrect size of XML_LChar");
235         break;
236       default:
237         break;
238       }
239       features++;
240     }
241   }
242 }
243 END_TEST
244 
245 /* Regression test for GitHub Issue #17: memory leak parsing attribute
246  * values with mixed bound and unbound namespaces.
247  */
248 START_TEST(test_misc_attribute_leak) {
249   const char *text = "<D xmlns:L=\"D\" l:a='' L:a=''/>";
250   XML_Memory_Handling_Suite memsuite
251       = {tracking_malloc, tracking_realloc, tracking_free};
252 
253   g_parser = XML_ParserCreate_MM(XCS("UTF-8"), &memsuite, XCS("\n"));
254   expect_failure(text, XML_ERROR_UNBOUND_PREFIX, "Unbound prefixes not found");
255   XML_ParserFree(g_parser);
256   /* Prevent the teardown trying to double free */
257   g_parser = NULL;
258 
259   if (! tracking_report())
260     fail("Memory leak found");
261 }
262 END_TEST
263 
264 /* Test parser created for UTF-16LE is successful */
265 START_TEST(test_misc_utf16le) {
266   const char text[] =
267       /* <?xml version='1.0'?><q>Hi</q> */
268       "<\0?\0x\0m\0l\0 \0"
269       "v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0?\0>\0"
270       "<\0q\0>\0H\0i\0<\0/\0q\0>\0";
271   const XML_Char *expected = XCS("Hi");
272   CharData storage;
273 
274   g_parser = XML_ParserCreate(XCS("UTF-16LE"));
275   if (g_parser == NULL)
276     fail("Parser not created");
277 
278   CharData_Init(&storage);
279   XML_SetUserData(g_parser, &storage);
280   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
281   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
282       == XML_STATUS_ERROR)
283     xml_failure(g_parser);
284   CharData_CheckXMLChars(&storage, expected);
285 }
286 END_TEST
287 
288 START_TEST(test_misc_stop_during_end_handler_issue_240_1) {
289   XML_Parser parser;
290   DataIssue240 *mydata;
291   enum XML_Status result;
292   const char *const doc1 = "<doc><e1/><e><foo/></e></doc>";
293 
294   parser = XML_ParserCreate(NULL);
295   XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
296   mydata = (DataIssue240 *)malloc(sizeof(DataIssue240));
297   mydata->parser = parser;
298   mydata->deep = 0;
299   XML_SetUserData(parser, mydata);
300 
301   result = _XML_Parse_SINGLE_BYTES(parser, doc1, (int)strlen(doc1), 1);
302   XML_ParserFree(parser);
303   free(mydata);
304   if (result != XML_STATUS_ERROR)
305     fail("Stopping the parser did not work as expected");
306 }
307 END_TEST
308 
309 START_TEST(test_misc_stop_during_end_handler_issue_240_2) {
310   XML_Parser parser;
311   DataIssue240 *mydata;
312   enum XML_Status result;
313   const char *const doc2 = "<doc><elem/></doc>";
314 
315   parser = XML_ParserCreate(NULL);
316   XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
317   mydata = (DataIssue240 *)malloc(sizeof(DataIssue240));
318   mydata->parser = parser;
319   mydata->deep = 0;
320   XML_SetUserData(parser, mydata);
321 
322   result = _XML_Parse_SINGLE_BYTES(parser, doc2, (int)strlen(doc2), 1);
323   XML_ParserFree(parser);
324   free(mydata);
325   if (result != XML_STATUS_ERROR)
326     fail("Stopping the parser did not work as expected");
327 }
328 END_TEST
329 
330 START_TEST(test_misc_deny_internal_entity_closing_doctype_issue_317) {
331   const char *const inputOne = "<!DOCTYPE d [\n"
332                                "<!ENTITY % e ']><d/>'>\n"
333                                "\n"
334                                "%e;";
335   const char *const inputTwo
336       = "<!DOCTYPE d [\n"
337         "<!ENTITY % e1 ']><d/>'><!ENTITY % e2 '&#37;e1;'>\n"
338         "\n"
339         "%e2;";
340   const char *const inputThree = "<!DOCTYPE d [\n"
341                                  "<!ENTITY % e ']><d'>\n"
342                                  "\n"
343                                  "%e;/>";
344   const char *const inputIssue317 = "<!DOCTYPE doc [\n"
345                                     "<!ENTITY % foo ']>\n"
346                                     "<doc>Hell<oc (#PCDATA)*>'>\n"
347                                     "%foo;\n"
348                                     "]>\n"
349                                     "<doc>Hello, world</dVc>";
350 
351   const char *const inputs[] = {inputOne, inputTwo, inputThree, inputIssue317};
352   size_t inputIndex = 0;
353 
354   for (; inputIndex < sizeof(inputs) / sizeof(inputs[0]); inputIndex++) {
355     set_subtest("%s", inputs[inputIndex]);
356     XML_Parser parser;
357     enum XML_Status parseResult;
358     int setParamEntityResult;
359     XML_Size lineNumber;
360     XML_Size columnNumber;
361     const char *const input = inputs[inputIndex];
362 
363     parser = XML_ParserCreate(NULL);
364     setParamEntityResult
365         = XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
366     if (setParamEntityResult != 1)
367       fail("Failed to set XML_PARAM_ENTITY_PARSING_ALWAYS.");
368 
369     parseResult = _XML_Parse_SINGLE_BYTES(parser, input, (int)strlen(input), 0);
370     if (parseResult != XML_STATUS_ERROR) {
371       parseResult = _XML_Parse_SINGLE_BYTES(parser, "", 0, 1);
372       if (parseResult != XML_STATUS_ERROR) {
373         fail("Parsing was expected to fail but succeeded.");
374       }
375     }
376 
377     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
378       fail("Error code does not match XML_ERROR_INVALID_TOKEN");
379 
380     lineNumber = XML_GetCurrentLineNumber(parser);
381     if (lineNumber != 4)
382       fail("XML_GetCurrentLineNumber does not work as expected.");
383 
384     columnNumber = XML_GetCurrentColumnNumber(parser);
385     if (columnNumber != 0)
386       fail("XML_GetCurrentColumnNumber does not work as expected.");
387 
388     XML_ParserFree(parser);
389   }
390 }
391 END_TEST
392 
393 START_TEST(test_misc_tag_mismatch_reset_leak) {
394 #ifdef XML_NS
395   const char *const text = "<open xmlns='https://namespace1.test'></close>";
396   XML_Parser parser = XML_ParserCreateNS(NULL, XCS('\n'));
397 
398   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
399       != XML_STATUS_ERROR)
400     fail("Call to parse was expected to fail");
401   if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH)
402     fail("Call to parse was expected to fail from a closing tag mismatch");
403 
404   XML_ParserReset(parser, NULL);
405 
406   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
407       != XML_STATUS_ERROR)
408     fail("Call to parse was expected to fail");
409   if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH)
410     fail("Call to parse was expected to fail from a closing tag mismatch");
411 
412   XML_ParserFree(parser);
413 #endif
414 }
415 END_TEST
416 
417 START_TEST(test_misc_create_external_entity_parser_with_null_context) {
418   // With XML_DTD undefined, the only supported case of external entities
419   // is pattern "<!ENTITY entity123 SYSTEM 'filename123'>". A NULL context
420   // was causing a segfault through a null pointer dereference in function
421   // setContext, previously.
422   XML_Parser parser = XML_ParserCreate(NULL);
423   XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
424 #ifdef XML_DTD
425   assert_true(ext_parser != NULL);
426   XML_ParserFree(ext_parser);
427 #else
428   assert_true(ext_parser == NULL);
429 #endif /* XML_DTD */
430   XML_ParserFree(parser);
431 }
432 END_TEST
433 
434 START_TEST(test_misc_general_entities_support) {
435   const char *const doc
436       = "<!DOCTYPE r [\n"
437         "<!ENTITY e1 'v1'>\n"
438         "<!ENTITY e2 SYSTEM 'v2'>\n"
439         "]>\n"
440         "<r a1='[&e1;]'>[&e1;][&e2;][&amp;&apos;&gt;&lt;&quot;]</r>";
441 
442   CharData storage;
443   CharData_Init(&storage);
444 
445   XML_Parser parser = XML_ParserCreate(NULL);
446   XML_SetUserData(parser, &storage);
447   XML_SetStartElementHandler(parser, accumulate_start_element);
448   XML_SetExternalEntityRefHandler(parser,
449                                   external_entity_failer__if_not_xml_ge);
450   XML_SetEntityDeclHandler(parser, accumulate_entity_decl);
451   XML_SetCharacterDataHandler(parser, accumulate_characters);
452 
453   if (_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), XML_TRUE)
454       != XML_STATUS_OK) {
455     xml_failure(parser);
456   }
457 
458   XML_ParserFree(parser);
459 
460   CharData_CheckXMLChars(&storage,
461   /* clang-format off */
462 #if XML_GE == 1
463                          XCS("e1=v1\n")
464                          XCS("e2=(null)\n")
465                          XCS("(r(a1=[v1]))\n")
466                          XCS("[v1][][&'><\"]")
467 #else
468                          XCS("e1=&amp;e1;\n")
469                          XCS("e2=(null)\n")
470                          XCS("(r(a1=[&e1;]))\n")
471                          XCS("[&e1;][&e2;][&'><\"]")
472 #endif
473   );
474   /* clang-format on */
475 }
476 END_TEST
477 
478 static void XMLCALL
479 resumable_stopping_character_handler(void *userData, const XML_Char *s,
480                                      int len) {
481   UNUSED_P(s);
482   UNUSED_P(len);
483   XML_Parser parser = (XML_Parser)userData;
484   XML_StopParser(parser, XML_TRUE);
485 }
486 
487 // NOTE: This test needs active LeakSanitizer to be of actual use
488 START_TEST(test_misc_char_handler_stop_without_leak) {
489   const char *const data
490       = "<!DOCTYPE t1[<!ENTITY e1 'angle<'><!ENTITY e2 '&e1;'>]><t1>&e2;";
491   XML_Parser parser = XML_ParserCreate(NULL);
492   assert_true(parser != NULL);
493   XML_SetUserData(parser, parser);
494   XML_SetCharacterDataHandler(parser, resumable_stopping_character_handler);
495   _XML_Parse_SINGLE_BYTES(parser, data, (int)strlen(data), XML_FALSE);
496   XML_ParserFree(parser);
497 }
498 END_TEST
499 
500 START_TEST(test_misc_resumeparser_not_crashing) {
501   XML_Parser parser = XML_ParserCreate(NULL);
502   XML_GetBuffer(parser, 1);
503   XML_StopParser(parser, /*resumable=*/XML_TRUE);
504   XML_ResumeParser(parser); // could crash here, previously
505   XML_ParserFree(parser);
506 }
507 END_TEST
508 
509 START_TEST(test_misc_stopparser_rejects_unstarted_parser) {
510   const XML_Bool cases[] = {XML_TRUE, XML_FALSE};
511   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
512     const XML_Bool resumable = cases[i];
513     XML_Parser parser = XML_ParserCreate(NULL);
514     assert_true(XML_GetErrorCode(parser) == XML_ERROR_NONE);
515     assert_true(XML_StopParser(parser, resumable) == XML_STATUS_ERROR);
516     assert_true(XML_GetErrorCode(parser) == XML_ERROR_NOT_STARTED);
517     XML_ParserFree(parser);
518   }
519 }
520 END_TEST
521 
522 void
523 make_miscellaneous_test_case(Suite *s) {
524   TCase *tc_misc = tcase_create("miscellaneous tests");
525 
526   suite_add_tcase(s, tc_misc);
527   tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);
528 
529   tcase_add_test(tc_misc, test_misc_alloc_create_parser);
530   tcase_add_test(tc_misc, test_misc_alloc_create_parser_with_encoding);
531   tcase_add_test(tc_misc, test_misc_null_parser);
532   tcase_add_test(tc_misc, test_misc_error_string);
533   tcase_add_test(tc_misc, test_misc_version);
534   tcase_add_test(tc_misc, test_misc_features);
535   tcase_add_test(tc_misc, test_misc_attribute_leak);
536   tcase_add_test(tc_misc, test_misc_utf16le);
537   tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_1);
538   tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_2);
539   tcase_add_test__ifdef_xml_dtd(
540       tc_misc, test_misc_deny_internal_entity_closing_doctype_issue_317);
541   tcase_add_test(tc_misc, test_misc_tag_mismatch_reset_leak);
542   tcase_add_test(tc_misc,
543                  test_misc_create_external_entity_parser_with_null_context);
544   tcase_add_test(tc_misc, test_misc_general_entities_support);
545   tcase_add_test(tc_misc, test_misc_char_handler_stop_without_leak);
546   tcase_add_test(tc_misc, test_misc_resumeparser_not_crashing);
547   tcase_add_test(tc_misc, test_misc_stopparser_rejects_unstarted_parser);
548 }
549