1 /* Tests in the "miscellaneous" test case for the Expat test suite 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> 11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> 13 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> 14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 15 Copyright (c) 2017 Joe Orton <jorton@redhat.com> 16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> 17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com> 20 Copyright (c) 2021 Donghee Na <donghee.na@python.org> 21 Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com> 22 Licensed under the MIT license: 23 24 Permission is hereby granted, free of charge, to any person obtaining 25 a copy of this software and associated documentation files (the 26 "Software"), to deal in the Software without restriction, including 27 without limitation the rights to use, copy, modify, merge, publish, 28 distribute, sublicense, and/or sell copies of the Software, and to permit 29 persons to whom the Software is furnished to do so, subject to the 30 following conditions: 31 32 The above copyright notice and this permission notice shall be included 33 in all copies or substantial portions of the Software. 34 35 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 36 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 37 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 38 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 39 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 40 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 41 USE OR OTHER DEALINGS IN THE SOFTWARE. 42 */ 43 44 #if defined(NDEBUG) 45 # undef NDEBUG /* because test suite relies on assert(...) at the moment */ 46 #endif 47 48 #include <assert.h> 49 #include <string.h> 50 51 #include "expat_config.h" 52 53 #include "expat.h" 54 #include "internal.h" 55 #include "minicheck.h" 56 #include "memcheck.h" 57 #include "common.h" 58 #include "ascii.h" /* for ASCII_xxx */ 59 #include "handlers.h" 60 #include "misc_tests.h" 61 62 /* Test that a failure to allocate the parser structure fails gracefully */ 63 START_TEST(test_misc_alloc_create_parser) { 64 XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free}; 65 unsigned int i; 66 const unsigned int max_alloc_count = 10; 67 68 /* Something this simple shouldn't need more than 10 allocations */ 69 for (i = 0; i < max_alloc_count; i++) { 70 g_allocation_count = i; 71 g_parser = XML_ParserCreate_MM(NULL, &memsuite, NULL); 72 if (g_parser != NULL) 73 break; 74 } 75 if (i == 0) 76 fail("Parser unexpectedly ignored failing allocator"); 77 else if (i == max_alloc_count) 78 fail("Parser not created with max allocation count"); 79 } 80 END_TEST 81 82 /* Test memory allocation failures for a parser with an encoding */ 83 START_TEST(test_misc_alloc_create_parser_with_encoding) { 84 XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free}; 85 unsigned int i; 86 const unsigned int max_alloc_count = 10; 87 88 /* Try several levels of allocation */ 89 for (i = 0; i < max_alloc_count; i++) { 90 g_allocation_count = i; 91 g_parser = XML_ParserCreate_MM(XCS("us-ascii"), &memsuite, NULL); 92 if (g_parser != NULL) 93 break; 94 } 95 if (i == 0) 96 fail("Parser ignored failing allocator"); 97 else if (i == max_alloc_count) 98 fail("Parser not created with max allocation count"); 99 } 100 END_TEST 101 102 /* Test that freeing a NULL parser doesn't cause an explosion. 103 * (Not actually tested anywhere else) 104 */ 105 START_TEST(test_misc_null_parser) { 106 XML_ParserFree(NULL); 107 } 108 END_TEST 109 110 #if defined(__has_feature) 111 # if __has_feature(undefined_behavior_sanitizer) 112 # define EXPAT_TESTS_UBSAN 1 113 # else 114 # define EXPAT_TESTS_UBSAN 0 115 # endif 116 #else 117 # define EXPAT_TESTS_UBSAN 0 118 #endif 119 120 /* Test that XML_ErrorString rejects out-of-range codes */ 121 START_TEST(test_misc_error_string) { 122 #if ! EXPAT_TESTS_UBSAN // because this would trigger UBSan 123 union { 124 enum XML_Error xml_error; 125 int integer; 126 } trickery; 127 128 assert_true(sizeof(enum XML_Error) == sizeof(int)); // self-test 129 130 trickery.integer = -1; 131 if (XML_ErrorString(trickery.xml_error) != NULL) 132 fail("Negative error code not rejected"); 133 134 trickery.integer = 100; 135 if (XML_ErrorString(trickery.xml_error) != NULL) 136 fail("Large error code not rejected"); 137 #endif 138 } 139 END_TEST 140 141 /* Test the version information is consistent */ 142 143 /* Since we are working in XML_LChars (potentially 16-bits), we 144 * can't use the standard C library functions for character 145 * manipulation and have to roll our own. 146 */ 147 static int 148 parse_version(const XML_LChar *version_text, 149 XML_Expat_Version *version_struct) { 150 if (! version_text) 151 return XML_FALSE; 152 153 while (*version_text != 0x00) { 154 if (*version_text >= ASCII_0 && *version_text <= ASCII_9) 155 break; 156 version_text++; 157 } 158 if (*version_text == 0x00) 159 return XML_FALSE; 160 161 /* version_struct->major = strtoul(version_text, 10, &version_text) */ 162 version_struct->major = 0; 163 while (*version_text >= ASCII_0 && *version_text <= ASCII_9) { 164 version_struct->major 165 = 10 * version_struct->major + (*version_text++ - ASCII_0); 166 } 167 if (*version_text++ != ASCII_PERIOD) 168 return XML_FALSE; 169 170 /* Now for the minor version number */ 171 version_struct->minor = 0; 172 while (*version_text >= ASCII_0 && *version_text <= ASCII_9) { 173 version_struct->minor 174 = 10 * version_struct->minor + (*version_text++ - ASCII_0); 175 } 176 if (*version_text++ != ASCII_PERIOD) 177 return XML_FALSE; 178 179 /* Finally the micro version number */ 180 version_struct->micro = 0; 181 while (*version_text >= ASCII_0 && *version_text <= ASCII_9) { 182 version_struct->micro 183 = 10 * version_struct->micro + (*version_text++ - ASCII_0); 184 } 185 if (*version_text != 0x00) 186 return XML_FALSE; 187 return XML_TRUE; 188 } 189 190 static int 191 versions_equal(const XML_Expat_Version *first, 192 const XML_Expat_Version *second) { 193 return (first->major == second->major && first->minor == second->minor 194 && first->micro == second->micro); 195 } 196 197 START_TEST(test_misc_version) { 198 XML_Expat_Version read_version = XML_ExpatVersionInfo(); 199 /* Silence compiler warning with the following assignment */ 200 XML_Expat_Version parsed_version = {0, 0, 0}; 201 const XML_LChar *version_text = XML_ExpatVersion(); 202 203 if (version_text == NULL) 204 fail("Could not obtain version text"); 205 assert(version_text != NULL); 206 if (! parse_version(version_text, &parsed_version)) 207 fail("Unable to parse version text"); 208 if (! versions_equal(&read_version, &parsed_version)) 209 fail("Version mismatch"); 210 211 if (xcstrcmp(version_text, XCS("expat_2.6.4"))) /* needs bump on releases */ 212 fail("XML_*_VERSION in expat.h out of sync?\n"); 213 } 214 END_TEST 215 216 /* Test feature information */ 217 START_TEST(test_misc_features) { 218 const XML_Feature *features = XML_GetFeatureList(); 219 220 /* Prevent problems with double-freeing parsers */ 221 g_parser = NULL; 222 if (features == NULL) { 223 fail("Failed to get feature information"); 224 } else { 225 /* Loop through the features checking what we can */ 226 while (features->feature != XML_FEATURE_END) { 227 switch (features->feature) { 228 case XML_FEATURE_SIZEOF_XML_CHAR: 229 if (features->value != sizeof(XML_Char)) 230 fail("Incorrect size of XML_Char"); 231 break; 232 case XML_FEATURE_SIZEOF_XML_LCHAR: 233 if (features->value != sizeof(XML_LChar)) 234 fail("Incorrect size of XML_LChar"); 235 break; 236 default: 237 break; 238 } 239 features++; 240 } 241 } 242 } 243 END_TEST 244 245 /* Regression test for GitHub Issue #17: memory leak parsing attribute 246 * values with mixed bound and unbound namespaces. 247 */ 248 START_TEST(test_misc_attribute_leak) { 249 const char *text = "<D xmlns:L=\"D\" l:a='' L:a=''/>"; 250 XML_Memory_Handling_Suite memsuite 251 = {tracking_malloc, tracking_realloc, tracking_free}; 252 253 g_parser = XML_ParserCreate_MM(XCS("UTF-8"), &memsuite, XCS("\n")); 254 expect_failure(text, XML_ERROR_UNBOUND_PREFIX, "Unbound prefixes not found"); 255 XML_ParserFree(g_parser); 256 /* Prevent the teardown trying to double free */ 257 g_parser = NULL; 258 259 if (! tracking_report()) 260 fail("Memory leak found"); 261 } 262 END_TEST 263 264 /* Test parser created for UTF-16LE is successful */ 265 START_TEST(test_misc_utf16le) { 266 const char text[] = 267 /* <?xml version='1.0'?><q>Hi</q> */ 268 "<\0?\0x\0m\0l\0 \0" 269 "v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0?\0>\0" 270 "<\0q\0>\0H\0i\0<\0/\0q\0>\0"; 271 const XML_Char *expected = XCS("Hi"); 272 CharData storage; 273 274 g_parser = XML_ParserCreate(XCS("UTF-16LE")); 275 if (g_parser == NULL) 276 fail("Parser not created"); 277 278 CharData_Init(&storage); 279 XML_SetUserData(g_parser, &storage); 280 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 281 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 282 == XML_STATUS_ERROR) 283 xml_failure(g_parser); 284 CharData_CheckXMLChars(&storage, expected); 285 } 286 END_TEST 287 288 START_TEST(test_misc_stop_during_end_handler_issue_240_1) { 289 XML_Parser parser; 290 DataIssue240 *mydata; 291 enum XML_Status result; 292 const char *const doc1 = "<doc><e1/><e><foo/></e></doc>"; 293 294 parser = XML_ParserCreate(NULL); 295 XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240); 296 mydata = (DataIssue240 *)malloc(sizeof(DataIssue240)); 297 mydata->parser = parser; 298 mydata->deep = 0; 299 XML_SetUserData(parser, mydata); 300 301 result = _XML_Parse_SINGLE_BYTES(parser, doc1, (int)strlen(doc1), 1); 302 XML_ParserFree(parser); 303 free(mydata); 304 if (result != XML_STATUS_ERROR) 305 fail("Stopping the parser did not work as expected"); 306 } 307 END_TEST 308 309 START_TEST(test_misc_stop_during_end_handler_issue_240_2) { 310 XML_Parser parser; 311 DataIssue240 *mydata; 312 enum XML_Status result; 313 const char *const doc2 = "<doc><elem/></doc>"; 314 315 parser = XML_ParserCreate(NULL); 316 XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240); 317 mydata = (DataIssue240 *)malloc(sizeof(DataIssue240)); 318 mydata->parser = parser; 319 mydata->deep = 0; 320 XML_SetUserData(parser, mydata); 321 322 result = _XML_Parse_SINGLE_BYTES(parser, doc2, (int)strlen(doc2), 1); 323 XML_ParserFree(parser); 324 free(mydata); 325 if (result != XML_STATUS_ERROR) 326 fail("Stopping the parser did not work as expected"); 327 } 328 END_TEST 329 330 START_TEST(test_misc_deny_internal_entity_closing_doctype_issue_317) { 331 const char *const inputOne = "<!DOCTYPE d [\n" 332 "<!ENTITY % e ']><d/>'>\n" 333 "\n" 334 "%e;"; 335 const char *const inputTwo 336 = "<!DOCTYPE d [\n" 337 "<!ENTITY % e1 ']><d/>'><!ENTITY % e2 '%e1;'>\n" 338 "\n" 339 "%e2;"; 340 const char *const inputThree = "<!DOCTYPE d [\n" 341 "<!ENTITY % e ']><d'>\n" 342 "\n" 343 "%e;/>"; 344 const char *const inputIssue317 = "<!DOCTYPE doc [\n" 345 "<!ENTITY % foo ']>\n" 346 "<doc>Hell<oc (#PCDATA)*>'>\n" 347 "%foo;\n" 348 "]>\n" 349 "<doc>Hello, world</dVc>"; 350 351 const char *const inputs[] = {inputOne, inputTwo, inputThree, inputIssue317}; 352 size_t inputIndex = 0; 353 354 for (; inputIndex < sizeof(inputs) / sizeof(inputs[0]); inputIndex++) { 355 set_subtest("%s", inputs[inputIndex]); 356 XML_Parser parser; 357 enum XML_Status parseResult; 358 int setParamEntityResult; 359 XML_Size lineNumber; 360 XML_Size columnNumber; 361 const char *const input = inputs[inputIndex]; 362 363 parser = XML_ParserCreate(NULL); 364 setParamEntityResult 365 = XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 366 if (setParamEntityResult != 1) 367 fail("Failed to set XML_PARAM_ENTITY_PARSING_ALWAYS."); 368 369 parseResult = _XML_Parse_SINGLE_BYTES(parser, input, (int)strlen(input), 0); 370 if (parseResult != XML_STATUS_ERROR) { 371 parseResult = _XML_Parse_SINGLE_BYTES(parser, "", 0, 1); 372 if (parseResult != XML_STATUS_ERROR) { 373 fail("Parsing was expected to fail but succeeded."); 374 } 375 } 376 377 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN) 378 fail("Error code does not match XML_ERROR_INVALID_TOKEN"); 379 380 lineNumber = XML_GetCurrentLineNumber(parser); 381 if (lineNumber != 4) 382 fail("XML_GetCurrentLineNumber does not work as expected."); 383 384 columnNumber = XML_GetCurrentColumnNumber(parser); 385 if (columnNumber != 0) 386 fail("XML_GetCurrentColumnNumber does not work as expected."); 387 388 XML_ParserFree(parser); 389 } 390 } 391 END_TEST 392 393 START_TEST(test_misc_tag_mismatch_reset_leak) { 394 #ifdef XML_NS 395 const char *const text = "<open xmlns='https://namespace1.test'></close>"; 396 XML_Parser parser = XML_ParserCreateNS(NULL, XCS('\n')); 397 398 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 399 != XML_STATUS_ERROR) 400 fail("Call to parse was expected to fail"); 401 if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH) 402 fail("Call to parse was expected to fail from a closing tag mismatch"); 403 404 XML_ParserReset(parser, NULL); 405 406 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 407 != XML_STATUS_ERROR) 408 fail("Call to parse was expected to fail"); 409 if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH) 410 fail("Call to parse was expected to fail from a closing tag mismatch"); 411 412 XML_ParserFree(parser); 413 #endif 414 } 415 END_TEST 416 417 START_TEST(test_misc_create_external_entity_parser_with_null_context) { 418 // With XML_DTD undefined, the only supported case of external entities 419 // is pattern "<!ENTITY entity123 SYSTEM 'filename123'>". A NULL context 420 // was causing a segfault through a null pointer dereference in function 421 // setContext, previously. 422 XML_Parser parser = XML_ParserCreate(NULL); 423 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); 424 #ifdef XML_DTD 425 assert_true(ext_parser != NULL); 426 XML_ParserFree(ext_parser); 427 #else 428 assert_true(ext_parser == NULL); 429 #endif /* XML_DTD */ 430 XML_ParserFree(parser); 431 } 432 END_TEST 433 434 START_TEST(test_misc_general_entities_support) { 435 const char *const doc 436 = "<!DOCTYPE r [\n" 437 "<!ENTITY e1 'v1'>\n" 438 "<!ENTITY e2 SYSTEM 'v2'>\n" 439 "]>\n" 440 "<r a1='[&e1;]'>[&e1;][&e2;][&'><"]</r>"; 441 442 CharData storage; 443 CharData_Init(&storage); 444 445 XML_Parser parser = XML_ParserCreate(NULL); 446 XML_SetUserData(parser, &storage); 447 XML_SetStartElementHandler(parser, accumulate_start_element); 448 XML_SetExternalEntityRefHandler(parser, 449 external_entity_failer__if_not_xml_ge); 450 XML_SetEntityDeclHandler(parser, accumulate_entity_decl); 451 XML_SetCharacterDataHandler(parser, accumulate_characters); 452 453 if (_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), XML_TRUE) 454 != XML_STATUS_OK) { 455 xml_failure(parser); 456 } 457 458 XML_ParserFree(parser); 459 460 CharData_CheckXMLChars(&storage, 461 /* clang-format off */ 462 #if XML_GE == 1 463 XCS("e1=v1\n") 464 XCS("e2=(null)\n") 465 XCS("(r(a1=[v1]))\n") 466 XCS("[v1][][&'><\"]") 467 #else 468 XCS("e1=&e1;\n") 469 XCS("e2=(null)\n") 470 XCS("(r(a1=[&e1;]))\n") 471 XCS("[&e1;][&e2;][&'><\"]") 472 #endif 473 ); 474 /* clang-format on */ 475 } 476 END_TEST 477 478 static void XMLCALL 479 resumable_stopping_character_handler(void *userData, const XML_Char *s, 480 int len) { 481 UNUSED_P(s); 482 UNUSED_P(len); 483 XML_Parser parser = (XML_Parser)userData; 484 XML_StopParser(parser, XML_TRUE); 485 } 486 487 // NOTE: This test needs active LeakSanitizer to be of actual use 488 START_TEST(test_misc_char_handler_stop_without_leak) { 489 const char *const data 490 = "<!DOCTYPE t1[<!ENTITY e1 'angle<'><!ENTITY e2 '&e1;'>]><t1>&e2;"; 491 XML_Parser parser = XML_ParserCreate(NULL); 492 assert_true(parser != NULL); 493 XML_SetUserData(parser, parser); 494 XML_SetCharacterDataHandler(parser, resumable_stopping_character_handler); 495 _XML_Parse_SINGLE_BYTES(parser, data, (int)strlen(data), XML_FALSE); 496 XML_ParserFree(parser); 497 } 498 END_TEST 499 500 START_TEST(test_misc_resumeparser_not_crashing) { 501 XML_Parser parser = XML_ParserCreate(NULL); 502 XML_GetBuffer(parser, 1); 503 XML_StopParser(parser, /*resumable=*/XML_TRUE); 504 XML_ResumeParser(parser); // could crash here, previously 505 XML_ParserFree(parser); 506 } 507 END_TEST 508 509 START_TEST(test_misc_stopparser_rejects_unstarted_parser) { 510 const XML_Bool cases[] = {XML_TRUE, XML_FALSE}; 511 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { 512 const XML_Bool resumable = cases[i]; 513 XML_Parser parser = XML_ParserCreate(NULL); 514 assert_true(XML_GetErrorCode(parser) == XML_ERROR_NONE); 515 assert_true(XML_StopParser(parser, resumable) == XML_STATUS_ERROR); 516 assert_true(XML_GetErrorCode(parser) == XML_ERROR_NOT_STARTED); 517 XML_ParserFree(parser); 518 } 519 } 520 END_TEST 521 522 void 523 make_miscellaneous_test_case(Suite *s) { 524 TCase *tc_misc = tcase_create("miscellaneous tests"); 525 526 suite_add_tcase(s, tc_misc); 527 tcase_add_checked_fixture(tc_misc, NULL, basic_teardown); 528 529 tcase_add_test(tc_misc, test_misc_alloc_create_parser); 530 tcase_add_test(tc_misc, test_misc_alloc_create_parser_with_encoding); 531 tcase_add_test(tc_misc, test_misc_null_parser); 532 tcase_add_test(tc_misc, test_misc_error_string); 533 tcase_add_test(tc_misc, test_misc_version); 534 tcase_add_test(tc_misc, test_misc_features); 535 tcase_add_test(tc_misc, test_misc_attribute_leak); 536 tcase_add_test(tc_misc, test_misc_utf16le); 537 tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_1); 538 tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_2); 539 tcase_add_test__ifdef_xml_dtd( 540 tc_misc, test_misc_deny_internal_entity_closing_doctype_issue_317); 541 tcase_add_test(tc_misc, test_misc_tag_mismatch_reset_leak); 542 tcase_add_test(tc_misc, 543 test_misc_create_external_entity_parser_with_null_context); 544 tcase_add_test(tc_misc, test_misc_general_entities_support); 545 tcase_add_test(tc_misc, test_misc_char_handler_stop_without_leak); 546 tcase_add_test(tc_misc, test_misc_resumeparser_not_crashing); 547 tcase_add_test(tc_misc, test_misc_stopparser_rejects_unstarted_parser); 548 } 549