1 /* Tests in the "miscellaneous" test case for the Expat test suite 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> 11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> 13 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> 14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 15 Copyright (c) 2017 Joe Orton <jorton@redhat.com> 16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> 17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com> 20 Copyright (c) 2021 Donghee Na <donghee.na@python.org> 21 Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com> 22 Licensed under the MIT license: 23 24 Permission is hereby granted, free of charge, to any person obtaining 25 a copy of this software and associated documentation files (the 26 "Software"), to deal in the Software without restriction, including 27 without limitation the rights to use, copy, modify, merge, publish, 28 distribute, sublicense, and/or sell copies of the Software, and to permit 29 persons to whom the Software is furnished to do so, subject to the 30 following conditions: 31 32 The above copyright notice and this permission notice shall be included 33 in all copies or substantial portions of the Software. 34 35 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 36 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 37 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 38 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 39 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 40 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 41 USE OR OTHER DEALINGS IN THE SOFTWARE. 42 */ 43 44 #if defined(NDEBUG) 45 # undef NDEBUG /* because test suite relies on assert(...) at the moment */ 46 #endif 47 48 #include <assert.h> 49 #include <string.h> 50 51 #include "expat_config.h" 52 53 #include "expat.h" 54 #include "internal.h" 55 #include "minicheck.h" 56 #include "memcheck.h" 57 #include "common.h" 58 #include "ascii.h" /* for ASCII_xxx */ 59 #include "handlers.h" 60 #include "misc_tests.h" 61 62 /* Test that a failure to allocate the parser structure fails gracefully */ 63 START_TEST(test_misc_alloc_create_parser) { 64 XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free}; 65 unsigned int i; 66 const unsigned int max_alloc_count = 10; 67 68 /* Something this simple shouldn't need more than 10 allocations */ 69 for (i = 0; i < max_alloc_count; i++) { 70 g_allocation_count = i; 71 g_parser = XML_ParserCreate_MM(NULL, &memsuite, NULL); 72 if (g_parser != NULL) 73 break; 74 } 75 if (i == 0) 76 fail("Parser unexpectedly ignored failing allocator"); 77 else if (i == max_alloc_count) 78 fail("Parser not created with max allocation count"); 79 } 80 END_TEST 81 82 /* Test memory allocation failures for a parser with an encoding */ 83 START_TEST(test_misc_alloc_create_parser_with_encoding) { 84 XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free}; 85 unsigned int i; 86 const unsigned int max_alloc_count = 10; 87 88 /* Try several levels of allocation */ 89 for (i = 0; i < max_alloc_count; i++) { 90 g_allocation_count = i; 91 g_parser = XML_ParserCreate_MM(XCS("us-ascii"), &memsuite, NULL); 92 if (g_parser != NULL) 93 break; 94 } 95 if (i == 0) 96 fail("Parser ignored failing allocator"); 97 else if (i == max_alloc_count) 98 fail("Parser not created with max allocation count"); 99 } 100 END_TEST 101 102 /* Test that freeing a NULL parser doesn't cause an explosion. 103 * (Not actually tested anywhere else) 104 */ 105 START_TEST(test_misc_null_parser) { 106 XML_ParserFree(NULL); 107 } 108 END_TEST 109 110 #if defined(__has_feature) 111 # if __has_feature(undefined_behavior_sanitizer) 112 # define EXPAT_TESTS_UBSAN 1 113 # else 114 # define EXPAT_TESTS_UBSAN 0 115 # endif 116 #else 117 # define EXPAT_TESTS_UBSAN 0 118 #endif 119 120 /* Test that XML_ErrorString rejects out-of-range codes */ 121 START_TEST(test_misc_error_string) { 122 #if ! EXPAT_TESTS_UBSAN // because this would trigger UBSan 123 union { 124 enum XML_Error xml_error; 125 int integer; 126 } trickery; 127 128 assert_true(sizeof(enum XML_Error) == sizeof(int)); // self-test 129 130 trickery.integer = -1; 131 if (XML_ErrorString(trickery.xml_error) != NULL) 132 fail("Negative error code not rejected"); 133 134 trickery.integer = 100; 135 if (XML_ErrorString(trickery.xml_error) != NULL) 136 fail("Large error code not rejected"); 137 #endif 138 } 139 END_TEST 140 141 /* Test the version information is consistent */ 142 143 /* Since we are working in XML_LChars (potentially 16-bits), we 144 * can't use the standard C library functions for character 145 * manipulation and have to roll our own. 146 */ 147 static int 148 parse_version(const XML_LChar *version_text, 149 XML_Expat_Version *version_struct) { 150 if (! version_text) 151 return XML_FALSE; 152 153 while (*version_text != 0x00) { 154 if (*version_text >= ASCII_0 && *version_text <= ASCII_9) 155 break; 156 version_text++; 157 } 158 if (*version_text == 0x00) 159 return XML_FALSE; 160 161 /* version_struct->major = strtoul(version_text, 10, &version_text) */ 162 version_struct->major = 0; 163 while (*version_text >= ASCII_0 && *version_text <= ASCII_9) { 164 version_struct->major 165 = 10 * version_struct->major + (*version_text++ - ASCII_0); 166 } 167 if (*version_text++ != ASCII_PERIOD) 168 return XML_FALSE; 169 170 /* Now for the minor version number */ 171 version_struct->minor = 0; 172 while (*version_text >= ASCII_0 && *version_text <= ASCII_9) { 173 version_struct->minor 174 = 10 * version_struct->minor + (*version_text++ - ASCII_0); 175 } 176 if (*version_text++ != ASCII_PERIOD) 177 return XML_FALSE; 178 179 /* Finally the micro version number */ 180 version_struct->micro = 0; 181 while (*version_text >= ASCII_0 && *version_text <= ASCII_9) { 182 version_struct->micro 183 = 10 * version_struct->micro + (*version_text++ - ASCII_0); 184 } 185 if (*version_text != 0x00) 186 return XML_FALSE; 187 return XML_TRUE; 188 } 189 190 static int 191 versions_equal(const XML_Expat_Version *first, 192 const XML_Expat_Version *second) { 193 return (first->major == second->major && first->minor == second->minor 194 && first->micro == second->micro); 195 } 196 197 START_TEST(test_misc_version) { 198 XML_Expat_Version read_version = XML_ExpatVersionInfo(); 199 /* Silence compiler warning with the following assignment */ 200 XML_Expat_Version parsed_version = {0, 0, 0}; 201 const XML_LChar *version_text = XML_ExpatVersion(); 202 203 if (version_text == NULL) 204 fail("Could not obtain version text"); 205 assert(version_text != NULL); 206 if (! parse_version(version_text, &parsed_version)) 207 fail("Unable to parse version text"); 208 if (! versions_equal(&read_version, &parsed_version)) 209 fail("Version mismatch"); 210 211 if (xcstrcmp(version_text, XCS("expat_2.6.3"))) /* needs bump on releases */ 212 fail("XML_*_VERSION in expat.h out of sync?\n"); 213 } 214 END_TEST 215 216 /* Test feature information */ 217 START_TEST(test_misc_features) { 218 const XML_Feature *features = XML_GetFeatureList(); 219 220 /* Prevent problems with double-freeing parsers */ 221 g_parser = NULL; 222 if (features == NULL) { 223 fail("Failed to get feature information"); 224 } else { 225 /* Loop through the features checking what we can */ 226 while (features->feature != XML_FEATURE_END) { 227 switch (features->feature) { 228 case XML_FEATURE_SIZEOF_XML_CHAR: 229 if (features->value != sizeof(XML_Char)) 230 fail("Incorrect size of XML_Char"); 231 break; 232 case XML_FEATURE_SIZEOF_XML_LCHAR: 233 if (features->value != sizeof(XML_LChar)) 234 fail("Incorrect size of XML_LChar"); 235 break; 236 default: 237 break; 238 } 239 features++; 240 } 241 } 242 } 243 END_TEST 244 245 /* Regression test for GitHub Issue #17: memory leak parsing attribute 246 * values with mixed bound and unbound namespaces. 247 */ 248 START_TEST(test_misc_attribute_leak) { 249 const char *text = "<D xmlns:L=\"D\" l:a='' L:a=''/>"; 250 XML_Memory_Handling_Suite memsuite 251 = {tracking_malloc, tracking_realloc, tracking_free}; 252 253 g_parser = XML_ParserCreate_MM(XCS("UTF-8"), &memsuite, XCS("\n")); 254 expect_failure(text, XML_ERROR_UNBOUND_PREFIX, "Unbound prefixes not found"); 255 XML_ParserFree(g_parser); 256 /* Prevent the teardown trying to double free */ 257 g_parser = NULL; 258 259 if (! tracking_report()) 260 fail("Memory leak found"); 261 } 262 END_TEST 263 264 /* Test parser created for UTF-16LE is successful */ 265 START_TEST(test_misc_utf16le) { 266 const char text[] = 267 /* <?xml version='1.0'?><q>Hi</q> */ 268 "<\0?\0x\0m\0l\0 \0" 269 "v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0?\0>\0" 270 "<\0q\0>\0H\0i\0<\0/\0q\0>\0"; 271 const XML_Char *expected = XCS("Hi"); 272 CharData storage; 273 274 g_parser = XML_ParserCreate(XCS("UTF-16LE")); 275 if (g_parser == NULL) 276 fail("Parser not created"); 277 278 CharData_Init(&storage); 279 XML_SetUserData(g_parser, &storage); 280 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 281 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 282 == XML_STATUS_ERROR) 283 xml_failure(g_parser); 284 CharData_CheckXMLChars(&storage, expected); 285 } 286 END_TEST 287 288 START_TEST(test_misc_stop_during_end_handler_issue_240_1) { 289 XML_Parser parser; 290 DataIssue240 *mydata; 291 enum XML_Status result; 292 const char *const doc1 = "<doc><e1/><e><foo/></e></doc>"; 293 294 parser = XML_ParserCreate(NULL); 295 XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240); 296 mydata = (DataIssue240 *)malloc(sizeof(DataIssue240)); 297 mydata->parser = parser; 298 mydata->deep = 0; 299 XML_SetUserData(parser, mydata); 300 301 result = _XML_Parse_SINGLE_BYTES(parser, doc1, (int)strlen(doc1), 1); 302 XML_ParserFree(parser); 303 free(mydata); 304 if (result != XML_STATUS_ERROR) 305 fail("Stopping the parser did not work as expected"); 306 } 307 END_TEST 308 309 START_TEST(test_misc_stop_during_end_handler_issue_240_2) { 310 XML_Parser parser; 311 DataIssue240 *mydata; 312 enum XML_Status result; 313 const char *const doc2 = "<doc><elem/></doc>"; 314 315 parser = XML_ParserCreate(NULL); 316 XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240); 317 mydata = (DataIssue240 *)malloc(sizeof(DataIssue240)); 318 mydata->parser = parser; 319 mydata->deep = 0; 320 XML_SetUserData(parser, mydata); 321 322 result = _XML_Parse_SINGLE_BYTES(parser, doc2, (int)strlen(doc2), 1); 323 XML_ParserFree(parser); 324 free(mydata); 325 if (result != XML_STATUS_ERROR) 326 fail("Stopping the parser did not work as expected"); 327 } 328 END_TEST 329 330 START_TEST(test_misc_deny_internal_entity_closing_doctype_issue_317) { 331 const char *const inputOne = "<!DOCTYPE d [\n" 332 "<!ENTITY % e ']><d/>'>\n" 333 "\n" 334 "%e;"; 335 const char *const inputTwo = "<!DOCTYPE d [\n" 336 "<!ENTITY % e1 ']><d/>'><!ENTITY % e2 '&e1;'>\n" 337 "\n" 338 "%e2;"; 339 const char *const inputThree = "<!DOCTYPE d [\n" 340 "<!ENTITY % e ']><d'>\n" 341 "\n" 342 "%e;"; 343 const char *const inputIssue317 = "<!DOCTYPE doc [\n" 344 "<!ENTITY % foo ']>\n" 345 "<doc>Hell<oc (#PCDATA)*>'>\n" 346 "%foo;\n" 347 "]>\n" 348 "<doc>Hello, world</dVc>"; 349 350 const char *const inputs[] = {inputOne, inputTwo, inputThree, inputIssue317}; 351 size_t inputIndex = 0; 352 353 for (; inputIndex < sizeof(inputs) / sizeof(inputs[0]); inputIndex++) { 354 set_subtest("%s", inputs[inputIndex]); 355 XML_Parser parser; 356 enum XML_Status parseResult; 357 int setParamEntityResult; 358 XML_Size lineNumber; 359 XML_Size columnNumber; 360 const char *const input = inputs[inputIndex]; 361 362 parser = XML_ParserCreate(NULL); 363 setParamEntityResult 364 = XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 365 if (setParamEntityResult != 1) 366 fail("Failed to set XML_PARAM_ENTITY_PARSING_ALWAYS."); 367 368 parseResult = _XML_Parse_SINGLE_BYTES(parser, input, (int)strlen(input), 0); 369 if (parseResult != XML_STATUS_ERROR) { 370 parseResult = _XML_Parse_SINGLE_BYTES(parser, "", 0, 1); 371 if (parseResult != XML_STATUS_ERROR) { 372 fail("Parsing was expected to fail but succeeded."); 373 } 374 } 375 376 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN) 377 fail("Error code does not match XML_ERROR_INVALID_TOKEN"); 378 379 lineNumber = XML_GetCurrentLineNumber(parser); 380 if (lineNumber != 4) 381 fail("XML_GetCurrentLineNumber does not work as expected."); 382 383 columnNumber = XML_GetCurrentColumnNumber(parser); 384 if (columnNumber != 0) 385 fail("XML_GetCurrentColumnNumber does not work as expected."); 386 387 XML_ParserFree(parser); 388 } 389 } 390 END_TEST 391 392 START_TEST(test_misc_tag_mismatch_reset_leak) { 393 #ifdef XML_NS 394 const char *const text = "<open xmlns='https://namespace1.test'></close>"; 395 XML_Parser parser = XML_ParserCreateNS(NULL, XCS('\n')); 396 397 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 398 != XML_STATUS_ERROR) 399 fail("Call to parse was expected to fail"); 400 if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH) 401 fail("Call to parse was expected to fail from a closing tag mismatch"); 402 403 XML_ParserReset(parser, NULL); 404 405 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 406 != XML_STATUS_ERROR) 407 fail("Call to parse was expected to fail"); 408 if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH) 409 fail("Call to parse was expected to fail from a closing tag mismatch"); 410 411 XML_ParserFree(parser); 412 #endif 413 } 414 END_TEST 415 416 START_TEST(test_misc_create_external_entity_parser_with_null_context) { 417 // With XML_DTD undefined, the only supported case of external entities 418 // is pattern "<!ENTITY entity123 SYSTEM 'filename123'>". A NULL context 419 // was causing a segfault through a null pointer dereference in function 420 // setContext, previously. 421 XML_Parser parser = XML_ParserCreate(NULL); 422 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); 423 #ifdef XML_DTD 424 assert_true(ext_parser != NULL); 425 XML_ParserFree(ext_parser); 426 #else 427 assert_true(ext_parser == NULL); 428 #endif /* XML_DTD */ 429 XML_ParserFree(parser); 430 } 431 END_TEST 432 433 START_TEST(test_misc_general_entities_support) { 434 const char *const doc 435 = "<!DOCTYPE r [\n" 436 "<!ENTITY e1 'v1'>\n" 437 "<!ENTITY e2 SYSTEM 'v2'>\n" 438 "]>\n" 439 "<r a1='[&e1;]'>[&e1;][&e2;][&'><"]</r>"; 440 441 CharData storage; 442 CharData_Init(&storage); 443 444 XML_Parser parser = XML_ParserCreate(NULL); 445 XML_SetUserData(parser, &storage); 446 XML_SetStartElementHandler(parser, accumulate_start_element); 447 XML_SetExternalEntityRefHandler(parser, 448 external_entity_failer__if_not_xml_ge); 449 XML_SetEntityDeclHandler(parser, accumulate_entity_decl); 450 XML_SetCharacterDataHandler(parser, accumulate_char_data); 451 452 if (_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), XML_TRUE) 453 != XML_STATUS_OK) { 454 xml_failure(parser); 455 } 456 457 XML_ParserFree(parser); 458 459 CharData_CheckXMLChars(&storage, 460 /* clang-format off */ 461 #if XML_GE == 1 462 XCS("e1=v1\n") 463 XCS("e2=(null)\n") 464 XCS("(r(a1=[v1]))\n") 465 XCS("[v1][][&'><\"]") 466 #else 467 XCS("e1=&e1;\n") 468 XCS("e2=(null)\n") 469 XCS("(r(a1=[&e1;]))\n") 470 XCS("[&e1;][&e2;][&'><\"]") 471 #endif 472 ); 473 /* clang-format on */ 474 } 475 END_TEST 476 477 static void XMLCALL 478 resumable_stopping_character_handler(void *userData, const XML_Char *s, 479 int len) { 480 UNUSED_P(s); 481 UNUSED_P(len); 482 XML_Parser parser = (XML_Parser)userData; 483 XML_StopParser(parser, XML_TRUE); 484 } 485 486 // NOTE: This test needs active LeakSanitizer to be of actual use 487 START_TEST(test_misc_char_handler_stop_without_leak) { 488 const char *const data 489 = "<!DOCTYPE t1[<!ENTITY e1 'angle<'><!ENTITY e2 '&e1;'>]><t1>&e2;"; 490 XML_Parser parser = XML_ParserCreate(NULL); 491 assert_true(parser != NULL); 492 XML_SetUserData(parser, parser); 493 XML_SetCharacterDataHandler(parser, resumable_stopping_character_handler); 494 _XML_Parse_SINGLE_BYTES(parser, data, (int)strlen(data), XML_FALSE); 495 XML_ParserFree(parser); 496 } 497 END_TEST 498 499 void 500 make_miscellaneous_test_case(Suite *s) { 501 TCase *tc_misc = tcase_create("miscellaneous tests"); 502 503 suite_add_tcase(s, tc_misc); 504 tcase_add_checked_fixture(tc_misc, NULL, basic_teardown); 505 506 tcase_add_test(tc_misc, test_misc_alloc_create_parser); 507 tcase_add_test(tc_misc, test_misc_alloc_create_parser_with_encoding); 508 tcase_add_test(tc_misc, test_misc_null_parser); 509 tcase_add_test(tc_misc, test_misc_error_string); 510 tcase_add_test(tc_misc, test_misc_version); 511 tcase_add_test(tc_misc, test_misc_features); 512 tcase_add_test(tc_misc, test_misc_attribute_leak); 513 tcase_add_test(tc_misc, test_misc_utf16le); 514 tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_1); 515 tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_2); 516 tcase_add_test__ifdef_xml_dtd( 517 tc_misc, test_misc_deny_internal_entity_closing_doctype_issue_317); 518 tcase_add_test(tc_misc, test_misc_tag_mismatch_reset_leak); 519 tcase_add_test(tc_misc, 520 test_misc_create_external_entity_parser_with_null_context); 521 tcase_add_test(tc_misc, test_misc_general_entities_support); 522 tcase_add_test(tc_misc, test_misc_char_handler_stop_without_leak); 523 } 524