1 /* Tests in the "basic" test case for the Expat test suite 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> 11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> 13 Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> 14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 15 Copyright (c) 2017 Joe Orton <jorton@redhat.com> 16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> 17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com> 20 Copyright (c) 2021 Donghee Na <donghee.na@python.org> 21 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com> 22 Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com> 23 Copyright (c) 2026 Francesco Bertolaccini 24 Licensed under the MIT license: 25 26 Permission is hereby granted, free of charge, to any person obtaining 27 a copy of this software and associated documentation files (the 28 "Software"), to deal in the Software without restriction, including 29 without limitation the rights to use, copy, modify, merge, publish, 30 distribute, sublicense, and/or sell copies of the Software, and to permit 31 persons to whom the Software is furnished to do so, subject to the 32 following conditions: 33 34 The above copyright notice and this permission notice shall be included 35 in all copies or substantial portions of the Software. 36 37 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 38 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 39 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 40 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 41 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 42 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 43 USE OR OTHER DEALINGS IN THE SOFTWARE. 44 */ 45 46 #if defined(NDEBUG) 47 # undef NDEBUG /* because test suite relies on assert(...) at the moment */ 48 #endif 49 50 #include <assert.h> 51 52 #include <stdio.h> 53 #include <string.h> 54 #include <time.h> 55 56 #if ! defined(__cplusplus) 57 # include <stdbool.h> 58 #endif 59 60 #include "expat_config.h" 61 62 #include "expat.h" 63 #include "internal.h" 64 #include "minicheck.h" 65 #include "structdata.h" 66 #include "common.h" 67 #include "dummy.h" 68 #include "handlers.h" 69 #include "siphash.h" 70 #include "basic_tests.h" 71 72 static void 73 basic_setup(void) { 74 g_parser = XML_ParserCreate(NULL); 75 if (g_parser == NULL) 76 fail("Parser not created."); 77 } 78 79 /* 80 * Character & encoding tests. 81 */ 82 83 START_TEST(test_nul_byte) { 84 char text[] = "<doc>\0</doc>"; 85 86 /* test that a NUL byte (in US-ASCII data) is an error */ 87 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 88 == XML_STATUS_OK) 89 fail("Parser did not report error on NUL-byte."); 90 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 91 xml_failure(g_parser); 92 } 93 END_TEST 94 95 START_TEST(test_u0000_char) { 96 /* test that a NUL byte (in US-ASCII data) is an error */ 97 expect_failure("<doc>�</doc>", XML_ERROR_BAD_CHAR_REF, 98 "Parser did not report error on NUL-byte."); 99 } 100 END_TEST 101 102 START_TEST(test_siphash_self) { 103 if (! sip24_valid()) 104 fail("SipHash self-test failed"); 105 } 106 END_TEST 107 108 START_TEST(test_siphash_spec) { 109 /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */ 110 const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" 111 "\x0a\x0b\x0c\x0d\x0e"; 112 const size_t len = sizeof(message) - 1; 113 const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U); 114 struct siphash state; 115 struct sipkey key; 116 117 sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" 118 "\x0a\x0b\x0c\x0d\x0e\x0f"); 119 sip24_init(&state, &key); 120 121 /* Cover spread across calls */ 122 sip24_update(&state, message, 4); 123 sip24_update(&state, message + 4, len - 4); 124 125 /* Cover null length */ 126 sip24_update(&state, message, 0); 127 128 if (sip24_final(&state) != expected) 129 fail("sip24_final failed spec test\n"); 130 131 /* Cover wrapper */ 132 if (siphash24(message, len, &key) != expected) 133 fail("siphash24 failed spec test\n"); 134 } 135 END_TEST 136 137 START_TEST(test_bom_utf8) { 138 /* This test is really just making sure we don't core on a UTF-8 BOM. */ 139 const char *text = "\357\273\277<e/>"; 140 141 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 142 == XML_STATUS_ERROR) 143 xml_failure(g_parser); 144 } 145 END_TEST 146 147 START_TEST(test_bom_utf16_be) { 148 char text[] = "\376\377\0<\0e\0/\0>"; 149 150 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 151 == XML_STATUS_ERROR) 152 xml_failure(g_parser); 153 } 154 END_TEST 155 156 START_TEST(test_bom_utf16_le) { 157 char text[] = "\377\376<\0e\0/\0>\0"; 158 159 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 160 == XML_STATUS_ERROR) 161 xml_failure(g_parser); 162 } 163 END_TEST 164 165 START_TEST(test_nobom_utf16_le) { 166 char text[] = " \0<\0e\0/\0>\0"; 167 168 if (g_chunkSize == 1) { 169 // TODO: with just the first byte, we can't tell the difference between 170 // UTF-16-LE and UTF-8. Avoid the failure for now. 171 return; 172 } 173 174 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 175 == XML_STATUS_ERROR) 176 xml_failure(g_parser); 177 } 178 END_TEST 179 180 START_TEST(test_hash_collision) { 181 /* For full coverage of the lookup routine, we need to ensure a 182 * hash collision even though we can only tell that we have one 183 * through breakpoint debugging or coverage statistics. The 184 * following will cause a hash collision on machines with a 64-bit 185 * long type; others will have to experiment. The full coverage 186 * tests invoked from qa.sh usually provide a hash collision, but 187 * not always. This is an attempt to provide insurance. 188 */ 189 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U) 190 const char *text 191 = "<doc>\n" 192 "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n" 193 "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n" 194 "<b5></b5><b6></b6><b7></b7><b8></b8>\n" 195 "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n" 196 "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n" 197 "<d8>This triggers the table growth and collides with b2</d8>\n" 198 "</doc>\n"; 199 200 XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT); 201 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 202 == XML_STATUS_ERROR) 203 xml_failure(g_parser); 204 } 205 END_TEST 206 #undef COLLIDING_HASH_SALT 207 208 START_TEST(test_hash_salt_setter) { 209 const uint8_t entropy[16] = {'0', '1', '2', '3', '4', '5', '6', '7', 210 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; 211 XML_Parser parser = XML_ParserCreate(NULL); 212 213 // NULL parser should be rejected 214 assert_true(XML_SetHashSalt16Bytes(NULL, entropy) == XML_FALSE); 215 216 // NULL entropy should be rejected 217 assert_true(XML_SetHashSalt16Bytes(parser, NULL) == XML_FALSE); 218 219 // Setting should be allowed more than once 220 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE); 221 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE); 222 223 // But not after parsing has started 224 assert_true(XML_Parse(parser, "", 0, XML_FALSE /* isFinal */) 225 == XML_STATUS_OK); 226 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_FALSE); 227 228 XML_ParserFree(parser); 229 } 230 END_TEST 231 232 /* Regression test for SF bug #491986. */ 233 START_TEST(test_danish_latin1) { 234 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 235 "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>"; 236 #ifdef XML_UNICODE 237 const XML_Char *expected 238 = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5"); 239 #else 240 const XML_Char *expected 241 = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85"); 242 #endif 243 run_character_check(text, expected); 244 } 245 END_TEST 246 247 /* Regression test for SF bug #514281. */ 248 START_TEST(test_french_charref_hexidecimal) { 249 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 250 "<doc>éèàçêÈ</doc>"; 251 #ifdef XML_UNICODE 252 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 253 #else 254 const XML_Char *expected 255 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 256 #endif 257 run_character_check(text, expected); 258 } 259 END_TEST 260 261 START_TEST(test_french_charref_decimal) { 262 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 263 "<doc>éèàçêÈ</doc>"; 264 #ifdef XML_UNICODE 265 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 266 #else 267 const XML_Char *expected 268 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 269 #endif 270 run_character_check(text, expected); 271 } 272 END_TEST 273 274 START_TEST(test_french_latin1) { 275 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 276 "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>"; 277 #ifdef XML_UNICODE 278 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 279 #else 280 const XML_Char *expected 281 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 282 #endif 283 run_character_check(text, expected); 284 } 285 END_TEST 286 287 START_TEST(test_french_utf8) { 288 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n" 289 "<doc>\xC3\xA9</doc>"; 290 #ifdef XML_UNICODE 291 const XML_Char *expected = XCS("\x00e9"); 292 #else 293 const XML_Char *expected = XCS("\xC3\xA9"); 294 #endif 295 run_character_check(text, expected); 296 } 297 END_TEST 298 299 /* Regression test for SF bug #600479. 300 XXX There should be a test that exercises all legal XML Unicode 301 characters as PCDATA and attribute value content, and XML Name 302 characters as part of element and attribute names. 303 */ 304 START_TEST(test_utf8_false_rejection) { 305 const char *text = "<doc>\xEF\xBA\xBF</doc>"; 306 #ifdef XML_UNICODE 307 const XML_Char *expected = XCS("\xfebf"); 308 #else 309 const XML_Char *expected = XCS("\xEF\xBA\xBF"); 310 #endif 311 run_character_check(text, expected); 312 } 313 END_TEST 314 315 /* Regression test for SF bug #477667. 316 This test assures that any 8-bit character followed by a 7-bit 317 character will not be mistakenly interpreted as a valid UTF-8 318 sequence. 319 */ 320 START_TEST(test_illegal_utf8) { 321 char text[100]; 322 int i; 323 324 for (i = 128; i <= 255; ++i) { 325 snprintf(text, sizeof(text), "<e>%ccd</e>", i); 326 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 327 == XML_STATUS_OK) { 328 snprintf(text, sizeof(text), 329 "expected token error for '%c' (ordinal %d) in UTF-8 text", i, 330 i); 331 fail(text); 332 } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 333 xml_failure(g_parser); 334 /* Reset the parser since we use the same parser repeatedly. */ 335 XML_ParserReset(g_parser, NULL); 336 } 337 } 338 END_TEST 339 340 /* Examples, not masks: */ 341 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */ 342 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */ 343 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */ 344 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */ 345 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */ 346 347 START_TEST(test_utf8_auto_align) { 348 struct TestCase { 349 ptrdiff_t expectedMovementInChars; 350 const char *input; 351 }; 352 353 struct TestCase cases[] = { 354 {00, ""}, 355 356 {00, UTF8_LEAD_1}, 357 358 {-1, UTF8_LEAD_2}, 359 {00, UTF8_LEAD_2 UTF8_FOLLOW}, 360 361 {-1, UTF8_LEAD_3}, 362 {-2, UTF8_LEAD_3 UTF8_FOLLOW}, 363 {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW}, 364 365 {-1, UTF8_LEAD_4}, 366 {-2, UTF8_LEAD_4 UTF8_FOLLOW}, 367 {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW}, 368 {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW}, 369 }; 370 371 size_t i = 0; 372 bool success = true; 373 for (; i < sizeof(cases) / sizeof(*cases); i++) { 374 const char *fromLim = cases[i].input + strlen(cases[i].input); 375 const char *const fromLimInitially = fromLim; 376 ptrdiff_t actualMovementInChars; 377 378 _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim); 379 380 actualMovementInChars = (fromLim - fromLimInitially); 381 if (actualMovementInChars != cases[i].expectedMovementInChars) { 382 size_t j = 0; 383 success = false; 384 printf("[-] UTF-8 case %2u: Expected movement by %2d chars" 385 ", actually moved by %2d chars: \"", 386 (unsigned)(i + 1), (int)cases[i].expectedMovementInChars, 387 (int)actualMovementInChars); 388 for (; j < strlen(cases[i].input); j++) { 389 printf("\\x%02x", (unsigned char)cases[i].input[j]); 390 } 391 printf("\"\n"); 392 } 393 } 394 395 if (! success) { 396 fail("UTF-8 auto-alignment is not bullet-proof\n"); 397 } 398 } 399 END_TEST 400 401 START_TEST(test_utf16) { 402 /* <?xml version="1.0" encoding="UTF-16"?> 403 * <doc a='123'>some {A} text</doc> 404 * 405 * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A 406 */ 407 char text[] 408 = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o" 409 "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o" 410 "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066" 411 "\000'\000?\000>\000\n" 412 "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>" 413 "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000" 414 "<\000/\000d\000o\000c\000>"; 415 #ifdef XML_UNICODE 416 const XML_Char *expected = XCS("some \xff21 text"); 417 #else 418 const XML_Char *expected = XCS("some \357\274\241 text"); 419 #endif 420 CharData storage; 421 422 CharData_Init(&storage); 423 XML_SetUserData(g_parser, &storage); 424 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 425 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 426 == XML_STATUS_ERROR) 427 xml_failure(g_parser); 428 CharData_CheckXMLChars(&storage, expected); 429 } 430 END_TEST 431 432 START_TEST(test_utf16_le_epilog_newline) { 433 unsigned int first_chunk_bytes = 17; 434 char text[] = "\xFF\xFE" /* BOM */ 435 "<\000e\000/\000>\000" /* document element */ 436 "\r\000\n\000\r\000\n\000"; /* epilog */ 437 438 if (first_chunk_bytes >= sizeof(text) - 1) 439 fail("bad value of first_chunk_bytes"); 440 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)first_chunk_bytes, XML_FALSE) 441 == XML_STATUS_ERROR) 442 xml_failure(g_parser); 443 else { 444 enum XML_Status rc; 445 rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes, 446 (int)(sizeof(text) - first_chunk_bytes - 1), 447 XML_TRUE); 448 if (rc == XML_STATUS_ERROR) 449 xml_failure(g_parser); 450 } 451 } 452 END_TEST 453 454 /* Test that an outright lie in the encoding is faulted */ 455 START_TEST(test_not_utf16) { 456 const char *text = "<?xml version='1.0' encoding='utf-16'?>" 457 "<doc>Hi</doc>"; 458 459 /* Use a handler to provoke the appropriate code paths */ 460 XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler); 461 expect_failure(text, XML_ERROR_INCORRECT_ENCODING, 462 "UTF-16 declared in UTF-8 not faulted"); 463 } 464 END_TEST 465 466 /* Test that an unknown encoding is rejected */ 467 START_TEST(test_bad_encoding) { 468 const char *text = "<doc>Hi</doc>"; 469 470 if (! XML_SetEncoding(g_parser, XCS("unknown-encoding"))) 471 fail("XML_SetEncoding failed"); 472 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 473 "Unknown encoding not faulted"); 474 } 475 END_TEST 476 477 /* Regression test for SF bug #481609, #774028. */ 478 START_TEST(test_latin1_umlauts) { 479 const char *text 480 = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 481 "<e a='\xE4 \xF6 \xFC ä ö ü ä ö ü >'\n" 482 " >\xE4 \xF6 \xFC ä ö ü ä ö ü ></e>"; 483 #ifdef XML_UNICODE 484 /* Expected results in UTF-16 */ 485 const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ") 486 XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >"); 487 #else 488 /* Expected results in UTF-8 */ 489 const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") 490 XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >"); 491 #endif 492 493 run_character_check(text, expected); 494 XML_ParserReset(g_parser, NULL); 495 run_attribute_check(text, expected); 496 /* Repeat with a default handler */ 497 XML_ParserReset(g_parser, NULL); 498 XML_SetDefaultHandler(g_parser, dummy_default_handler); 499 run_character_check(text, expected); 500 XML_ParserReset(g_parser, NULL); 501 XML_SetDefaultHandler(g_parser, dummy_default_handler); 502 run_attribute_check(text, expected); 503 } 504 END_TEST 505 506 /* Test that an element name with a 4-byte UTF-8 character is rejected */ 507 START_TEST(test_long_utf8_character) { 508 const char *text 509 = "<?xml version='1.0' encoding='utf-8'?>\n" 510 /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */ 511 "<do\xf0\x90\x80\x80/>"; 512 expect_failure(text, XML_ERROR_INVALID_TOKEN, 513 "4-byte UTF-8 character in element name not faulted"); 514 } 515 END_TEST 516 517 /* Test that a long latin-1 attribute (too long to convert in one go) 518 * is correctly converted 519 */ 520 START_TEST(test_long_latin1_attribute) { 521 const char *text 522 = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 523 "<doc att='" 524 /* 64 characters per line */ 525 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 526 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 527 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 528 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 529 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 530 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 531 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 532 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 533 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 534 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 535 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 536 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 537 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 538 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 539 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 540 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" 541 /* Last character splits across a buffer boundary */ 542 "\xe4'>\n</doc>"; 543 544 const XML_Char *expected = 545 /* 64 characters per line */ 546 /* clang-format off */ 547 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 548 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 549 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 550 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 551 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 552 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 553 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 554 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 555 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 556 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 557 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 558 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 559 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 560 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 561 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 562 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO") 563 /* clang-format on */ 564 #ifdef XML_UNICODE 565 XCS("\x00e4"); 566 #else 567 XCS("\xc3\xa4"); 568 #endif 569 570 run_attribute_check(text, expected); 571 } 572 END_TEST 573 574 /* Test that a long ASCII attribute (too long to convert in one go) 575 * is correctly converted 576 */ 577 START_TEST(test_long_ascii_attribute) { 578 const char *text 579 = "<?xml version='1.0' encoding='us-ascii'?>\n" 580 "<doc att='" 581 /* 64 characters per line */ 582 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 583 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 584 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 585 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 586 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 587 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 588 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 589 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 590 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 591 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 592 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 593 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 594 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 595 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 596 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 597 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 598 "01234'>\n</doc>"; 599 const XML_Char *expected = 600 /* 64 characters per line */ 601 /* clang-format off */ 602 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 603 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 604 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 605 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 606 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 607 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 608 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 609 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 610 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 611 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 612 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 613 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 614 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 615 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 616 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 617 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 618 XCS("01234"); 619 /* clang-format on */ 620 621 run_attribute_check(text, expected); 622 } 623 END_TEST 624 625 /* Regression test #1 for SF bug #653180. */ 626 START_TEST(test_line_number_after_parse) { 627 const char *text = "<tag>\n" 628 "\n" 629 "\n</tag>"; 630 XML_Size lineno; 631 632 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 633 == XML_STATUS_ERROR) 634 xml_failure(g_parser); 635 lineno = XML_GetCurrentLineNumber(g_parser); 636 if (lineno != 4) { 637 char buffer[100]; 638 snprintf(buffer, sizeof(buffer), 639 "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno); 640 fail(buffer); 641 } 642 } 643 END_TEST 644 645 /* Regression test #2 for SF bug #653180. */ 646 START_TEST(test_column_number_after_parse) { 647 const char *text = "<tag></tag>"; 648 XML_Size colno; 649 650 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 651 == XML_STATUS_ERROR) 652 xml_failure(g_parser); 653 colno = XML_GetCurrentColumnNumber(g_parser); 654 if (colno != 11) { 655 char buffer[100]; 656 snprintf(buffer, sizeof(buffer), 657 "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno); 658 fail(buffer); 659 } 660 } 661 END_TEST 662 663 /* Regression test #3 for SF bug #653180. */ 664 START_TEST(test_line_and_column_numbers_inside_handlers) { 665 const char *text = "<a>\n" /* Unix end-of-line */ 666 " <b>\r\n" /* Windows end-of-line */ 667 " <c/>\r" /* Mac OS end-of-line */ 668 " </b>\n" 669 " <d>\n" 670 " <f/>\n" 671 " </d>\n" 672 "</a>"; 673 const StructDataEntry expected[] 674 = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG}, 675 {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG}, 676 {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG}, 677 {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG}, 678 {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}}; 679 const int expected_count = sizeof(expected) / sizeof(StructDataEntry); 680 StructData storage; 681 682 StructData_Init(&storage); 683 XML_SetUserData(g_parser, &storage); 684 XML_SetStartElementHandler(g_parser, start_element_event_handler2); 685 XML_SetEndElementHandler(g_parser, end_element_event_handler2); 686 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 687 == XML_STATUS_ERROR) 688 xml_failure(g_parser); 689 690 StructData_CheckItems(&storage, expected, expected_count); 691 StructData_Dispose(&storage); 692 } 693 END_TEST 694 695 /* Regression test #4 for SF bug #653180. */ 696 START_TEST(test_line_number_after_error) { 697 const char *text = "<a>\n" 698 " <b>\n" 699 " </a>"; /* missing </b> */ 700 XML_Size lineno; 701 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 702 != XML_STATUS_ERROR) 703 fail("Expected a parse error"); 704 705 lineno = XML_GetCurrentLineNumber(g_parser); 706 if (lineno != 3) { 707 char buffer[100]; 708 snprintf(buffer, sizeof(buffer), 709 "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno); 710 fail(buffer); 711 } 712 } 713 END_TEST 714 715 /* Regression test #5 for SF bug #653180. */ 716 START_TEST(test_column_number_after_error) { 717 const char *text = "<a>\n" 718 " <b>\n" 719 " </a>"; /* missing </b> */ 720 XML_Size colno; 721 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 722 != XML_STATUS_ERROR) 723 fail("Expected a parse error"); 724 725 colno = XML_GetCurrentColumnNumber(g_parser); 726 if (colno != 4) { 727 char buffer[100]; 728 snprintf(buffer, sizeof(buffer), 729 "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno); 730 fail(buffer); 731 } 732 } 733 END_TEST 734 735 /* Regression test for SF bug #478332. */ 736 START_TEST(test_really_long_lines) { 737 /* This parses an input line longer than INIT_DATA_BUF_SIZE 738 characters long (defined to be 1024 in xmlparse.c). We take a 739 really cheesy approach to building the input buffer, because 740 this avoids writing bugs in buffer-filling code. 741 */ 742 const char *text 743 = "<e>" 744 /* 64 chars */ 745 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 746 /* until we have at least 1024 characters on the line: */ 747 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 748 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 749 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 750 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 751 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 752 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 753 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 754 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 755 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 756 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 757 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 758 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 759 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 760 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 761 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 762 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 763 "</e>"; 764 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 765 == XML_STATUS_ERROR) 766 xml_failure(g_parser); 767 } 768 END_TEST 769 770 /* Test cdata processing across a buffer boundary */ 771 START_TEST(test_really_long_encoded_lines) { 772 /* As above, except that we want to provoke an output buffer 773 * overflow with a non-trivial encoding. For this we need to pass 774 * the whole cdata in one go, not byte-by-byte. 775 */ 776 void *buffer; 777 const char *text 778 = "<?xml version='1.0' encoding='iso-8859-1'?>" 779 "<e>" 780 /* 64 chars */ 781 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 782 /* until we have at least 1024 characters on the line: */ 783 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 784 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 785 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 786 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 787 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 788 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 789 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 790 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 791 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 792 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 793 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 794 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 795 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 796 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 797 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 798 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 799 "</e>"; 800 int parse_len = (int)strlen(text); 801 802 /* Need a cdata handler to provoke the code path we want to test */ 803 XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler); 804 buffer = XML_GetBuffer(g_parser, parse_len); 805 if (buffer == NULL) 806 fail("Could not allocate parse buffer"); 807 assert(buffer != NULL); 808 memcpy(buffer, text, parse_len); 809 if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR) 810 xml_failure(g_parser); 811 } 812 END_TEST 813 814 /* 815 * Element event tests. 816 */ 817 818 START_TEST(test_end_element_events) { 819 const char *text = "<a><b><c/></b><d><f/></d></a>"; 820 const XML_Char *expected = XCS("/c/b/f/d/a"); 821 CharData storage; 822 823 CharData_Init(&storage); 824 XML_SetUserData(g_parser, &storage); 825 XML_SetEndElementHandler(g_parser, end_element_event_handler); 826 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 827 == XML_STATUS_ERROR) 828 xml_failure(g_parser); 829 CharData_CheckXMLChars(&storage, expected); 830 } 831 END_TEST 832 833 /* 834 * Attribute tests. 835 */ 836 837 /* Helper used by the following tests; this checks any "attr" and "refs" 838 attributes to make sure whitespace has been normalized. 839 840 Return true if whitespace has been normalized in a string, using 841 the rules for attribute value normalization. The 'is_cdata' flag 842 is needed since CDATA attributes don't need to have multiple 843 whitespace characters collapsed to a single space, while other 844 attribute data types do. (Section 3.3.3 of the recommendation.) 845 */ 846 static int 847 is_whitespace_normalized(const XML_Char *s, int is_cdata) { 848 int blanks = 0; 849 int at_start = 1; 850 while (*s) { 851 if (*s == XCS(' ')) 852 ++blanks; 853 else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r')) 854 return 0; 855 else { 856 if (at_start) { 857 at_start = 0; 858 if (blanks && ! is_cdata) 859 /* illegal leading blanks */ 860 return 0; 861 } else if (blanks > 1 && ! is_cdata) 862 return 0; 863 blanks = 0; 864 } 865 ++s; 866 } 867 if (blanks && ! is_cdata) 868 return 0; 869 return 1; 870 } 871 872 /* Check the attribute whitespace checker: */ 873 START_TEST(test_helper_is_whitespace_normalized) { 874 assert(is_whitespace_normalized(XCS("abc"), 0)); 875 assert(is_whitespace_normalized(XCS("abc"), 1)); 876 assert(is_whitespace_normalized(XCS("abc def ghi"), 0)); 877 assert(is_whitespace_normalized(XCS("abc def ghi"), 1)); 878 assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0)); 879 assert(is_whitespace_normalized(XCS(" abc def ghi"), 1)); 880 assert(! is_whitespace_normalized(XCS("abc def ghi"), 0)); 881 assert(is_whitespace_normalized(XCS("abc def ghi"), 1)); 882 assert(! is_whitespace_normalized(XCS("abc def ghi "), 0)); 883 assert(is_whitespace_normalized(XCS("abc def ghi "), 1)); 884 assert(! is_whitespace_normalized(XCS(" "), 0)); 885 assert(is_whitespace_normalized(XCS(" "), 1)); 886 assert(! is_whitespace_normalized(XCS("\t"), 0)); 887 assert(! is_whitespace_normalized(XCS("\t"), 1)); 888 assert(! is_whitespace_normalized(XCS("\n"), 0)); 889 assert(! is_whitespace_normalized(XCS("\n"), 1)); 890 assert(! is_whitespace_normalized(XCS("\r"), 0)); 891 assert(! is_whitespace_normalized(XCS("\r"), 1)); 892 assert(! is_whitespace_normalized(XCS("abc\t def"), 1)); 893 } 894 END_TEST 895 896 static void XMLCALL 897 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name, 898 const XML_Char **atts) { 899 int i; 900 UNUSED_P(userData); 901 UNUSED_P(name); 902 for (i = 0; atts[i] != NULL; i += 2) { 903 const XML_Char *attrname = atts[i]; 904 const XML_Char *value = atts[i + 1]; 905 if (xcstrcmp(XCS("attr"), attrname) == 0 906 || xcstrcmp(XCS("ents"), attrname) == 0 907 || xcstrcmp(XCS("refs"), attrname) == 0) { 908 if (! is_whitespace_normalized(value, 0)) { 909 char buffer[256]; 910 snprintf(buffer, sizeof(buffer), 911 "attribute value not normalized: %" XML_FMT_STR 912 "='%" XML_FMT_STR "'", 913 attrname, value); 914 fail(buffer); 915 } 916 } 917 } 918 } 919 920 START_TEST(test_attr_whitespace_normalization) { 921 const char *text 922 = "<!DOCTYPE doc [\n" 923 " <!ATTLIST doc\n" 924 " attr NMTOKENS #REQUIRED\n" 925 " ents ENTITIES #REQUIRED\n" 926 " refs IDREFS #REQUIRED>\n" 927 "]>\n" 928 "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n" 929 " ents=' ent-1 \t\r\n" 930 " ent-2 ' >\n" 931 " <e id='id-1'/>\n" 932 " <e id='id-2'/>\n" 933 "</doc>"; 934 935 XML_SetStartElementHandler(g_parser, 936 check_attr_contains_normalized_whitespace); 937 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 938 == XML_STATUS_ERROR) 939 xml_failure(g_parser); 940 } 941 END_TEST 942 943 /* 944 * XML declaration tests. 945 */ 946 947 START_TEST(test_xmldecl_misplaced) { 948 expect_failure("\n" 949 "<?xml version='1.0'?>\n" 950 "<a/>", 951 XML_ERROR_MISPLACED_XML_PI, 952 "failed to report misplaced XML declaration"); 953 } 954 END_TEST 955 956 START_TEST(test_xmldecl_invalid) { 957 expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL, 958 "Failed to report invalid XML declaration"); 959 } 960 END_TEST 961 962 START_TEST(test_xmldecl_missing_attr) { 963 expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL, 964 "Failed to report missing XML declaration attribute"); 965 } 966 END_TEST 967 968 START_TEST(test_xmldecl_missing_value) { 969 expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n" 970 "<doc/>", 971 XML_ERROR_XML_DECL, 972 "Failed to report missing attribute value"); 973 } 974 END_TEST 975 976 /* Regression test for SF bug #584832. */ 977 START_TEST(test_unknown_encoding_internal_entity) { 978 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n" 979 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n" 980 "<test a='&foo;'/>"; 981 982 XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL); 983 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 984 == XML_STATUS_ERROR) 985 xml_failure(g_parser); 986 } 987 END_TEST 988 989 /* Test unrecognised encoding handler */ 990 START_TEST(test_unrecognised_encoding_internal_entity) { 991 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n" 992 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n" 993 "<test a='&foo;'/>"; 994 995 XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL); 996 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 997 != XML_STATUS_ERROR) 998 fail("Unrecognised encoding not rejected"); 999 } 1000 END_TEST 1001 1002 /* Regression test for SF bug #620106. */ 1003 START_TEST(test_ext_entity_set_encoding) { 1004 const char *text = "<!DOCTYPE doc [\n" 1005 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1006 "]>\n" 1007 "<doc>&en;</doc>"; 1008 ExtTest test_data 1009 = {/* This text says it's an unsupported encoding, but it's really 1010 UTF-8, which we tell Expat using XML_SetEncoding(). 1011 */ 1012 "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL}; 1013 #ifdef XML_UNICODE 1014 const XML_Char *expected = XCS("\x00e9"); 1015 #else 1016 const XML_Char *expected = XCS("\xc3\xa9"); 1017 #endif 1018 1019 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1020 run_ext_character_check(text, &test_data, expected); 1021 } 1022 END_TEST 1023 1024 /* Test external entities with no handler */ 1025 START_TEST(test_ext_entity_no_handler) { 1026 const char *text = "<!DOCTYPE doc [\n" 1027 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1028 "]>\n" 1029 "<doc>&en;</doc>"; 1030 1031 XML_SetDefaultHandler(g_parser, dummy_default_handler); 1032 run_character_check(text, XCS("")); 1033 } 1034 END_TEST 1035 1036 /* Test UTF-8 BOM is accepted */ 1037 START_TEST(test_ext_entity_set_bom) { 1038 const char *text = "<!DOCTYPE doc [\n" 1039 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1040 "]>\n" 1041 "<doc>&en;</doc>"; 1042 ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */ 1043 "<?xml encoding='iso-8859-3'?>" 1044 "\xC3\xA9", 1045 XCS("utf-8"), NULL}; 1046 #ifdef XML_UNICODE 1047 const XML_Char *expected = XCS("\x00e9"); 1048 #else 1049 const XML_Char *expected = XCS("\xc3\xa9"); 1050 #endif 1051 1052 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1053 run_ext_character_check(text, &test_data, expected); 1054 } 1055 END_TEST 1056 1057 /* Test that bad encodings are faulted */ 1058 START_TEST(test_ext_entity_bad_encoding) { 1059 const char *text = "<!DOCTYPE doc [\n" 1060 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1061 "]>\n" 1062 "<doc>&en;</doc>"; 1063 ExtFaults fault 1064 = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted", 1065 XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING}; 1066 1067 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1068 XML_SetUserData(g_parser, &fault); 1069 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1070 "Bad encoding should not have been accepted"); 1071 } 1072 END_TEST 1073 1074 /* Try handing an invalid encoding to an external entity parser */ 1075 START_TEST(test_ext_entity_bad_encoding_2) { 1076 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1077 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1078 "<doc>&entity;</doc>"; 1079 ExtFaults fault 1080 = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted", 1081 XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING}; 1082 1083 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1084 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1085 XML_SetUserData(g_parser, &fault); 1086 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1087 "Bad encoding not faulted in external entity handler"); 1088 } 1089 END_TEST 1090 1091 /* Test that no error is reported for unknown entities if we don't 1092 read an external subset. This was fixed in Expat 1.95.5. 1093 */ 1094 START_TEST(test_wfc_undeclared_entity_unread_external_subset) { 1095 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 1096 "<doc>&entity;</doc>"; 1097 1098 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1099 == XML_STATUS_ERROR) 1100 xml_failure(g_parser); 1101 } 1102 END_TEST 1103 1104 /* Test that an error is reported for unknown entities if we don't 1105 have an external subset. 1106 */ 1107 START_TEST(test_wfc_undeclared_entity_no_external_subset) { 1108 expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY, 1109 "Parser did not report undefined entity w/out a DTD."); 1110 } 1111 END_TEST 1112 1113 /* Test that an error is reported for unknown entities if we don't 1114 read an external subset, but have been declared standalone. 1115 */ 1116 START_TEST(test_wfc_undeclared_entity_standalone) { 1117 const char *text 1118 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1119 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1120 "<doc>&entity;</doc>"; 1121 1122 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1123 "Parser did not report undefined entity (standalone)."); 1124 } 1125 END_TEST 1126 1127 /* Test that an error is reported for unknown entities if we have read 1128 an external subset, and standalone is true. 1129 */ 1130 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) { 1131 const char *text 1132 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1133 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1134 "<doc>&entity;</doc>"; 1135 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1136 1137 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1138 XML_SetUserData(g_parser, &test_data); 1139 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1140 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1141 "Parser did not report undefined entity (external DTD)."); 1142 } 1143 END_TEST 1144 1145 /* Test that external entity handling is not done if the parsing flag 1146 * is set to UNLESS_STANDALONE 1147 */ 1148 START_TEST(test_entity_with_external_subset_unless_standalone) { 1149 const char *text 1150 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1151 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1152 "<doc>&entity;</doc>"; 1153 ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL}; 1154 1155 XML_SetParamEntityParsing(g_parser, 1156 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); 1157 XML_SetUserData(g_parser, &test_data); 1158 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1159 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1160 "Parser did not report undefined entity"); 1161 } 1162 END_TEST 1163 1164 /* Test that no error is reported for unknown entities if we have read 1165 an external subset, and standalone is false. 1166 */ 1167 START_TEST(test_wfc_undeclared_entity_with_external_subset) { 1168 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1169 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1170 "<doc>&entity;</doc>"; 1171 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1172 1173 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1174 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1175 run_ext_character_check(text, &test_data, XCS("")); 1176 } 1177 END_TEST 1178 1179 /* Test that an error is reported if our NotStandalone handler fails */ 1180 START_TEST(test_not_standalone_handler_reject) { 1181 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1182 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1183 "<doc>&entity;</doc>"; 1184 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1185 1186 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1187 XML_SetUserData(g_parser, &test_data); 1188 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1189 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 1190 expect_failure(text, XML_ERROR_NOT_STANDALONE, 1191 "NotStandalone handler failed to reject"); 1192 1193 /* Try again but without external entity handling */ 1194 XML_ParserReset(g_parser, NULL); 1195 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 1196 expect_failure(text, XML_ERROR_NOT_STANDALONE, 1197 "NotStandalone handler failed to reject"); 1198 } 1199 END_TEST 1200 1201 /* Test that no error is reported if our NotStandalone handler succeeds */ 1202 START_TEST(test_not_standalone_handler_accept) { 1203 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1204 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1205 "<doc>&entity;</doc>"; 1206 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1207 1208 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1209 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1210 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler); 1211 run_ext_character_check(text, &test_data, XCS("")); 1212 1213 /* Repeat without the external entity handler */ 1214 XML_ParserReset(g_parser, NULL); 1215 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler); 1216 run_character_check(text, XCS("")); 1217 } 1218 END_TEST 1219 1220 START_TEST(test_entity_start_tag_level_greater_than_one) { 1221 const char *const text = "<!DOCTYPE t1 [\n" 1222 " <!ENTITY e1 'hello'>\n" 1223 "]>\n" 1224 "<t1>\n" 1225 " <t2>&e1;</t2>\n" 1226 "</t1>\n"; 1227 1228 XML_Parser parser = XML_ParserCreate(NULL); 1229 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), 1230 /*isFinal*/ XML_TRUE) 1231 == XML_STATUS_OK); 1232 XML_ParserFree(parser); 1233 } 1234 END_TEST 1235 1236 START_TEST(test_wfc_no_recursive_entity_refs) { 1237 const char *text = "<!DOCTYPE doc [\n" 1238 " <!ENTITY entity '&entity;'>\n" 1239 "]>\n" 1240 "<doc>&entity;</doc>"; 1241 1242 expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF, 1243 "Parser did not report recursive entity reference."); 1244 } 1245 END_TEST 1246 1247 START_TEST(test_no_indirectly_recursive_entity_refs) { 1248 struct TestCase { 1249 const char *doc; 1250 bool usesParameterEntities; 1251 }; 1252 1253 const struct TestCase cases[] = { 1254 // general entity + character data 1255 {"<!DOCTYPE a [\n" 1256 " <!ENTITY e1 '&e2;'>\n" 1257 " <!ENTITY e2 '&e1;'>\n" 1258 "]><a>&e2;</a>\n", 1259 false}, 1260 1261 // general entity + attribute value 1262 {"<!DOCTYPE a [\n" 1263 " <!ENTITY e1 '&e2;'>\n" 1264 " <!ENTITY e2 '&e1;'>\n" 1265 "]><a k1='&e2;' />\n", 1266 false}, 1267 1268 // parameter entity 1269 {"<!DOCTYPE doc [\n" 1270 " <!ENTITY % p1 '%p2;'>\n" 1271 " <!ENTITY % p2 '%p1;'>\n" 1272 " <!ENTITY % define_g \"<!ENTITY g '%p2;'>\">\n" 1273 " %define_g;\n" 1274 "]>\n" 1275 "<doc/>\n", 1276 true}, 1277 }; 1278 const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE}; 1279 1280 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { 1281 for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]); 1282 j++) { 1283 const XML_Bool reset_wanted = reset_or_not[j]; 1284 const char *const doc = cases[i].doc; 1285 const bool usesParameterEntities = cases[i].usesParameterEntities; 1286 1287 set_subtest("[%i,reset=%i] %s", (int)i, (int)j, doc); 1288 1289 #ifdef XML_DTD // both GE and DTD 1290 const bool rejection_expected = true; 1291 #elif XML_GE == 1 // GE but not DTD 1292 const bool rejection_expected = ! usesParameterEntities; 1293 #else // neither DTD nor GE 1294 const bool rejection_expected = false; 1295 #endif 1296 1297 XML_Parser parser = XML_ParserCreate(NULL); 1298 1299 #ifdef XML_DTD 1300 if (usesParameterEntities) { 1301 assert_true( 1302 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS) 1303 == 1); 1304 } 1305 #else 1306 UNUSED_P(usesParameterEntities); 1307 #endif // XML_DTD 1308 1309 const enum XML_Status status 1310 = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), 1311 /*isFinal*/ XML_TRUE); 1312 1313 if (rejection_expected) { 1314 assert_true(status == XML_STATUS_ERROR); 1315 assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF); 1316 } else { 1317 assert_true(status == XML_STATUS_OK); 1318 } 1319 1320 if (reset_wanted) { 1321 // This covers free'ing of (eventually) all three open entity lists by 1322 // XML_ParserReset. 1323 XML_ParserReset(parser, NULL); 1324 } 1325 1326 // This covers free'ing of (eventually) all three open entity lists by 1327 // XML_ParserFree (unless XML_ParserReset has already done that above). 1328 XML_ParserFree(parser); 1329 } 1330 } 1331 } 1332 END_TEST 1333 1334 START_TEST(test_recursive_external_parameter_entity_2) { 1335 struct TestCase { 1336 const char *doc; 1337 enum XML_Status expectedStatus; 1338 }; 1339 1340 struct TestCase cases[] = { 1341 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR}, 1342 {"<!ENTITY % p1 '%p1;'>" 1343 "<!ENTITY % p1 'first declaration wins'>", 1344 XML_STATUS_ERROR}, 1345 {"<!ENTITY % p1 'first declaration wins'>" 1346 "<!ENTITY % p1 '%p1;'>", 1347 XML_STATUS_OK}, 1348 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_OK}, 1349 }; 1350 1351 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { 1352 const char *const doc = cases[i].doc; 1353 const enum XML_Status expectedStatus = cases[i].expectedStatus; 1354 set_subtest("%s", doc); 1355 1356 XML_Parser parser = XML_ParserCreate(NULL); 1357 assert_true(parser != NULL); 1358 1359 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); 1360 assert_true(ext_parser != NULL); 1361 1362 const enum XML_Status actualStatus 1363 = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE); 1364 1365 assert_true(actualStatus == expectedStatus); 1366 if (actualStatus != XML_STATUS_OK) { 1367 assert_true(XML_GetErrorCode(ext_parser) 1368 == XML_ERROR_RECURSIVE_ENTITY_REF); 1369 } 1370 1371 XML_ParserFree(ext_parser); 1372 XML_ParserFree(parser); 1373 } 1374 } 1375 END_TEST 1376 1377 /* Test incomplete external entities are faulted */ 1378 START_TEST(test_ext_entity_invalid_parse) { 1379 const char *text = "<!DOCTYPE doc [\n" 1380 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1381 "]>\n" 1382 "<doc>&en;</doc>"; 1383 const ExtFaults faults[] 1384 = {{"<", "Incomplete element declaration not faulted", NULL, 1385 XML_ERROR_UNCLOSED_TOKEN}, 1386 {"<\xe2\x82", /* First two bytes of a three-byte char */ 1387 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, 1388 {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL, 1389 XML_ERROR_PARTIAL_CHAR}, 1390 {NULL, NULL, NULL, XML_ERROR_NONE}}; 1391 const ExtFaults *fault = faults; 1392 1393 for (; fault->parse_text != NULL; fault++) { 1394 set_subtest("\"%s\"", fault->parse_text); 1395 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1396 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1397 XML_SetUserData(g_parser, (void *)fault); 1398 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1399 "Parser did not report external entity error"); 1400 XML_ParserReset(g_parser, NULL); 1401 } 1402 } 1403 END_TEST 1404 1405 /* Regression test for SF bug #483514. */ 1406 START_TEST(test_dtd_default_handling) { 1407 const char *text = "<!DOCTYPE doc [\n" 1408 "<!ENTITY e SYSTEM 'http://example.org/e'>\n" 1409 "<!NOTATION n SYSTEM 'http://example.org/n'>\n" 1410 "<!ELEMENT doc EMPTY>\n" 1411 "<!ATTLIST doc a CDATA #IMPLIED>\n" 1412 "<?pi in dtd?>\n" 1413 "<!--comment in dtd-->\n" 1414 "]><doc/>"; 1415 1416 XML_SetDefaultHandler(g_parser, accumulate_characters); 1417 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 1418 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 1419 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 1420 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); 1421 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 1422 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 1423 XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler); 1424 XML_SetCommentHandler(g_parser, dummy_comment_handler); 1425 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler); 1426 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler); 1427 run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>")); 1428 } 1429 END_TEST 1430 1431 /* Test handling of attribute declarations */ 1432 START_TEST(test_dtd_attr_handling) { 1433 const char *prolog = "<!DOCTYPE doc [\n" 1434 "<!ELEMENT doc EMPTY>\n"; 1435 AttTest attr_data[] 1436 = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n" 1437 "]>" 1438 "<doc a='two'/>", 1439 XCS("doc"), XCS("a"), 1440 XCS("(one|two|three)"), /* Extraneous spaces will be removed */ 1441 NULL, XML_TRUE}, 1442 {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n" 1443 "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n" 1444 "]>" 1445 "<doc/>", 1446 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE}, 1447 {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n" 1448 "]>" 1449 "<doc/>", 1450 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE}, 1451 {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n" 1452 "]>" 1453 "<doc/>", 1454 XCS("doc"), XCS("a"), XCS("CDATA"), 1455 #ifdef XML_UNICODE 1456 XCS("\x06f2"), 1457 #else 1458 XCS("\xdb\xb2"), 1459 #endif 1460 XML_FALSE}, 1461 {NULL, NULL, NULL, NULL, NULL, XML_FALSE}}; 1462 AttTest *test; 1463 1464 for (test = attr_data; test->definition != NULL; test++) { 1465 set_subtest("%s", test->definition); 1466 XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler); 1467 XML_SetUserData(g_parser, test); 1468 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog), 1469 XML_FALSE) 1470 == XML_STATUS_ERROR) 1471 xml_failure(g_parser); 1472 if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition, 1473 (int)strlen(test->definition), XML_TRUE) 1474 == XML_STATUS_ERROR) 1475 xml_failure(g_parser); 1476 XML_ParserReset(g_parser, NULL); 1477 } 1478 } 1479 END_TEST 1480 1481 /* See related SF bug #673791. 1482 When namespace processing is enabled, setting the namespace URI for 1483 a prefix is not allowed; this test ensures that it *is* allowed 1484 when namespace processing is not enabled. 1485 (See Namespaces in XML, section 2.) 1486 */ 1487 START_TEST(test_empty_ns_without_namespaces) { 1488 const char *text = "<doc xmlns:prefix='http://example.org/'>\n" 1489 " <e xmlns:prefix=''/>\n" 1490 "</doc>"; 1491 1492 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1493 == XML_STATUS_ERROR) 1494 xml_failure(g_parser); 1495 } 1496 END_TEST 1497 1498 /* Regression test for SF bug #824420. 1499 Checks that an xmlns:prefix attribute set in an attribute's default 1500 value isn't misinterpreted. 1501 */ 1502 START_TEST(test_ns_in_attribute_default_without_namespaces) { 1503 const char *text = "<!DOCTYPE e:element [\n" 1504 " <!ATTLIST e:element\n" 1505 " xmlns:e CDATA 'http://example.org/'>\n" 1506 " ]>\n" 1507 "<e:element/>"; 1508 1509 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1510 == XML_STATUS_ERROR) 1511 xml_failure(g_parser); 1512 } 1513 END_TEST 1514 1515 /* Regression test for SF bug #1515266: missing check of stopped 1516 parser in doContext() 'for' loop. */ 1517 START_TEST(test_stop_parser_between_char_data_calls) { 1518 /* The sample data must be big enough that there are two calls to 1519 the character data handler from within the inner "for" loop of 1520 the XML_TOK_DATA_CHARS case in doContent(), and the character 1521 handler must stop the parser and clear the character data 1522 handler. 1523 */ 1524 const char *text = long_character_data_text; 1525 1526 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1527 g_resumable = XML_FALSE; 1528 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1529 != XML_STATUS_ERROR) 1530 xml_failure(g_parser); 1531 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED) 1532 xml_failure(g_parser); 1533 } 1534 END_TEST 1535 1536 /* Regression test for SF bug #1515266: missing check of stopped 1537 parser in doContext() 'for' loop. */ 1538 START_TEST(test_suspend_parser_between_char_data_calls) { 1539 /* The sample data must be big enough that there are two calls to 1540 the character data handler from within the inner "for" loop of 1541 the XML_TOK_DATA_CHARS case in doContent(), and the character 1542 handler must stop the parser and clear the character data 1543 handler. 1544 */ 1545 const char *text = long_character_data_text; 1546 1547 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1548 g_resumable = XML_TRUE; 1549 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 1550 // we won't know exactly how much input we actually managed to give Expat. 1551 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 1552 != XML_STATUS_SUSPENDED) 1553 xml_failure(g_parser); 1554 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 1555 xml_failure(g_parser); 1556 /* Try parsing directly */ 1557 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1558 != XML_STATUS_ERROR) 1559 fail("Attempt to continue parse while suspended not faulted"); 1560 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) 1561 fail("Suspended parse not faulted with correct error"); 1562 } 1563 END_TEST 1564 1565 /* Test repeated calls to XML_StopParser are handled correctly */ 1566 START_TEST(test_repeated_stop_parser_between_char_data_calls) { 1567 const char *text = long_character_data_text; 1568 1569 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1570 g_resumable = XML_FALSE; 1571 g_abortable = XML_FALSE; 1572 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1573 != XML_STATUS_ERROR) 1574 fail("Failed to double-stop parser"); 1575 1576 XML_ParserReset(g_parser, NULL); 1577 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1578 g_resumable = XML_TRUE; 1579 g_abortable = XML_FALSE; 1580 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 1581 // we won't know exactly how much input we actually managed to give Expat. 1582 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 1583 != XML_STATUS_SUSPENDED) 1584 fail("Failed to double-suspend parser"); 1585 1586 XML_ParserReset(g_parser, NULL); 1587 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1588 g_resumable = XML_TRUE; 1589 g_abortable = XML_TRUE; 1590 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1591 != XML_STATUS_ERROR) 1592 fail("Failed to suspend-abort parser"); 1593 } 1594 END_TEST 1595 1596 START_TEST(test_good_cdata_ascii) { 1597 const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>"; 1598 const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>"); 1599 1600 CharData storage; 1601 CharData_Init(&storage); 1602 XML_SetUserData(g_parser, &storage); 1603 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1604 /* Add start and end handlers for coverage */ 1605 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler); 1606 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler); 1607 1608 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1609 == XML_STATUS_ERROR) 1610 xml_failure(g_parser); 1611 CharData_CheckXMLChars(&storage, expected); 1612 1613 /* Try again, this time with a default handler */ 1614 XML_ParserReset(g_parser, NULL); 1615 CharData_Init(&storage); 1616 XML_SetUserData(g_parser, &storage); 1617 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1618 XML_SetDefaultHandler(g_parser, dummy_default_handler); 1619 1620 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1621 == XML_STATUS_ERROR) 1622 xml_failure(g_parser); 1623 CharData_CheckXMLChars(&storage, expected); 1624 } 1625 END_TEST 1626 1627 START_TEST(test_good_cdata_utf16) { 1628 /* Test data is: 1629 * <?xml version='1.0' encoding='utf-16'?> 1630 * <a><![CDATA[hello]]></a> 1631 */ 1632 const char text[] 1633 = "\0<\0?\0x\0m\0l\0" 1634 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1635 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1636 "1\0" 1637 "6\0'" 1638 "\0?\0>\0\n" 1639 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>"; 1640 const XML_Char *expected = XCS("hello"); 1641 1642 CharData storage; 1643 CharData_Init(&storage); 1644 XML_SetUserData(g_parser, &storage); 1645 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1646 1647 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1648 == XML_STATUS_ERROR) 1649 xml_failure(g_parser); 1650 CharData_CheckXMLChars(&storage, expected); 1651 } 1652 END_TEST 1653 1654 START_TEST(test_good_cdata_utf16_le) { 1655 /* Test data is: 1656 * <?xml version='1.0' encoding='utf-16'?> 1657 * <a><![CDATA[hello]]></a> 1658 */ 1659 const char text[] 1660 = "<\0?\0x\0m\0l\0" 1661 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1662 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1663 "1\0" 1664 "6\0'" 1665 "\0?\0>\0\n" 1666 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0"; 1667 const XML_Char *expected = XCS("hello"); 1668 1669 CharData storage; 1670 CharData_Init(&storage); 1671 XML_SetUserData(g_parser, &storage); 1672 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1673 1674 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1675 == XML_STATUS_ERROR) 1676 xml_failure(g_parser); 1677 CharData_CheckXMLChars(&storage, expected); 1678 } 1679 END_TEST 1680 1681 /* Test UTF16 conversion of a long cdata string */ 1682 1683 /* 16 characters: handy macro to reduce visual clutter */ 1684 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P" 1685 1686 START_TEST(test_long_cdata_utf16) { 1687 /* Test data is: 1688 * <?xlm version='1.0' encoding='utf-16'?> 1689 * <a><![CDATA[ 1690 * ABCDEFGHIJKLMNOP 1691 * ]]></a> 1692 */ 1693 const char text[] 1694 = "\0<\0?\0x\0m\0l\0 " 1695 "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 " 1696 "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>" 1697 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1698 /* 64 characters per line */ 1699 /* clang-format off */ 1700 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1701 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1702 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1703 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1704 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1705 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1706 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1707 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1708 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1709 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1710 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1711 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1712 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1713 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1714 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1715 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1716 A_TO_P_IN_UTF16 1717 /* clang-format on */ 1718 "\0]\0]\0>\0<\0/\0a\0>"; 1719 const XML_Char *expected = 1720 /* clang-format off */ 1721 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1722 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1723 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1724 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1725 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1726 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1727 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1728 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1729 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1730 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1731 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1732 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1733 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1734 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1735 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1736 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1737 XCS("ABCDEFGHIJKLMNOP"); 1738 /* clang-format on */ 1739 CharData storage; 1740 void *buffer; 1741 1742 CharData_Init(&storage); 1743 XML_SetUserData(g_parser, &storage); 1744 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1745 buffer = XML_GetBuffer(g_parser, sizeof(text) - 1); 1746 if (buffer == NULL) 1747 fail("Could not allocate parse buffer"); 1748 assert(buffer != NULL); 1749 memcpy(buffer, text, sizeof(text) - 1); 1750 if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) 1751 xml_failure(g_parser); 1752 CharData_CheckXMLChars(&storage, expected); 1753 } 1754 END_TEST 1755 1756 /* Test handling of multiple unit UTF-16 characters */ 1757 START_TEST(test_multichar_cdata_utf16) { 1758 /* Test data is: 1759 * <?xml version='1.0' encoding='utf-16'?> 1760 * <a><![CDATA[{MINIM}{CROTCHET}]]></a> 1761 * 1762 * where {MINIM} is U+1d15e (a minim or half-note) 1763 * UTF-16: 0xd834 0xdd5e 1764 * UTF-8: 0xf0 0x9d 0x85 0x9e 1765 * and {CROTCHET} is U+1d15f (a crotchet or quarter-note) 1766 * UTF-16: 0xd834 0xdd5f 1767 * UTF-8: 0xf0 0x9d 0x85 0x9f 1768 */ 1769 const char text[] = "\0<\0?\0x\0m\0l\0" 1770 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1771 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1772 "1\0" 1773 "6\0'" 1774 "\0?\0>\0\n" 1775 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1776 "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f" 1777 "\0]\0]\0>\0<\0/\0a\0>"; 1778 #ifdef XML_UNICODE 1779 const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f"); 1780 #else 1781 const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f"); 1782 #endif 1783 CharData storage; 1784 1785 CharData_Init(&storage); 1786 XML_SetUserData(g_parser, &storage); 1787 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1788 1789 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1790 == XML_STATUS_ERROR) 1791 xml_failure(g_parser); 1792 CharData_CheckXMLChars(&storage, expected); 1793 } 1794 END_TEST 1795 1796 /* Test that an element name with a UTF-16 surrogate pair is rejected */ 1797 START_TEST(test_utf16_bad_surrogate_pair) { 1798 /* Test data is: 1799 * <?xml version='1.0' encoding='utf-16'?> 1800 * <a><![CDATA[{BADLINB}]]></a> 1801 * 1802 * where {BADLINB} is U+10000 (the first Linear B character) 1803 * with the UTF-16 surrogate pair in the wrong order, i.e. 1804 * 0xdc00 0xd800 1805 */ 1806 const char text[] = "\0<\0?\0x\0m\0l\0" 1807 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1808 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1809 "1\0" 1810 "6\0'" 1811 "\0?\0>\0\n" 1812 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1813 "\xdc\x00\xd8\x00" 1814 "\0]\0]\0>\0<\0/\0a\0>"; 1815 1816 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1817 != XML_STATUS_ERROR) 1818 fail("Reversed UTF-16 surrogate pair not faulted"); 1819 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 1820 xml_failure(g_parser); 1821 } 1822 END_TEST 1823 1824 START_TEST(test_bad_cdata) { 1825 struct CaseData { 1826 const char *text; 1827 enum XML_Error expectedError; 1828 }; 1829 1830 struct CaseData cases[] 1831 = {{"<a><", XML_ERROR_UNCLOSED_TOKEN}, 1832 {"<a><!", XML_ERROR_UNCLOSED_TOKEN}, 1833 {"<a><![", XML_ERROR_UNCLOSED_TOKEN}, 1834 {"<a><![C", XML_ERROR_UNCLOSED_TOKEN}, 1835 {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN}, 1836 {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN}, 1837 {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN}, 1838 {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN}, 1839 1840 {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1841 {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1842 {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1843 1844 {"<a><!<a/>", XML_ERROR_INVALID_TOKEN}, 1845 {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ 1846 {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ 1847 {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN}, 1848 {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN}, 1849 {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN}, 1850 {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN}, 1851 1852 {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1853 {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1854 {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}}; 1855 1856 size_t i = 0; 1857 for (; i < sizeof(cases) / sizeof(struct CaseData); i++) { 1858 set_subtest("%s", cases[i].text); 1859 const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES( 1860 g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE); 1861 const enum XML_Error actualError = XML_GetErrorCode(g_parser); 1862 1863 assert(actualStatus == XML_STATUS_ERROR); 1864 1865 if (actualError != cases[i].expectedError) { 1866 char message[100]; 1867 snprintf(message, sizeof(message), 1868 "Expected error %d but got error %d for case %u: \"%s\"\n", 1869 cases[i].expectedError, actualError, (unsigned int)i + 1, 1870 cases[i].text); 1871 fail(message); 1872 } 1873 1874 XML_ParserReset(g_parser, NULL); 1875 } 1876 } 1877 END_TEST 1878 1879 /* Test failures in UTF-16 CDATA */ 1880 START_TEST(test_bad_cdata_utf16) { 1881 struct CaseData { 1882 size_t text_bytes; 1883 const char *text; 1884 enum XML_Error expected_error; 1885 }; 1886 1887 const char prolog[] = "\0<\0?\0x\0m\0l\0" 1888 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1889 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1890 "1\0" 1891 "6\0'" 1892 "\0?\0>\0\n" 1893 "\0<\0a\0>"; 1894 struct CaseData cases[] = { 1895 {1, "\0", XML_ERROR_UNCLOSED_TOKEN}, 1896 {2, "\0<", XML_ERROR_UNCLOSED_TOKEN}, 1897 {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN}, 1898 {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN}, 1899 {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN}, 1900 {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN}, 1901 {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN}, 1902 {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN}, 1903 {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN}, 1904 {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN}, 1905 {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN}, 1906 {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN}, 1907 {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN}, 1908 {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN}, 1909 {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN}, 1910 {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN}, 1911 {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN}, 1912 {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1913 {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1914 {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1915 /* Now add a four-byte UTF-16 character */ 1916 {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8", 1917 XML_ERROR_UNCLOSED_CDATA_SECTION}, 1918 {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR}, 1919 {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd", 1920 XML_ERROR_PARTIAL_CHAR}, 1921 {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e", 1922 XML_ERROR_UNCLOSED_CDATA_SECTION}}; 1923 size_t i; 1924 1925 for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) { 1926 set_subtest("case %lu", (long unsigned)(i + 1)); 1927 enum XML_Status actual_status; 1928 enum XML_Error actual_error; 1929 1930 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1, 1931 XML_FALSE) 1932 == XML_STATUS_ERROR) 1933 xml_failure(g_parser); 1934 actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text, 1935 (int)cases[i].text_bytes, XML_TRUE); 1936 assert(actual_status == XML_STATUS_ERROR); 1937 actual_error = XML_GetErrorCode(g_parser); 1938 if (actual_error != cases[i].expected_error) { 1939 char message[1024]; 1940 1941 snprintf(message, sizeof(message), 1942 "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR 1943 ") for case %lu\n", 1944 cases[i].expected_error, 1945 XML_ErrorString(cases[i].expected_error), actual_error, 1946 XML_ErrorString(actual_error), (long unsigned)(i + 1)); 1947 fail(message); 1948 } 1949 XML_ParserReset(g_parser, NULL); 1950 } 1951 } 1952 END_TEST 1953 1954 /* Test stopping the parser in cdata handler */ 1955 START_TEST(test_stop_parser_between_cdata_calls) { 1956 const char *text = long_cdata_text; 1957 1958 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1959 g_resumable = XML_FALSE; 1960 expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler"); 1961 } 1962 END_TEST 1963 1964 /* Test suspending the parser in cdata handler */ 1965 START_TEST(test_suspend_parser_between_cdata_calls) { 1966 if (g_chunkSize != 0) { 1967 // this test does not use SINGLE_BYTES, because of suspension 1968 return; 1969 } 1970 1971 const char *text = long_cdata_text; 1972 enum XML_Status result; 1973 1974 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1975 g_resumable = XML_TRUE; 1976 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 1977 // we won't know exactly how much input we actually managed to give Expat. 1978 result = XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE); 1979 if (result != XML_STATUS_SUSPENDED) { 1980 if (result == XML_STATUS_ERROR) 1981 xml_failure(g_parser); 1982 fail("Parse not suspended in CDATA handler"); 1983 } 1984 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 1985 xml_failure(g_parser); 1986 } 1987 END_TEST 1988 1989 /* Test memory allocation functions */ 1990 START_TEST(test_memory_allocation) { 1991 char *buffer = (char *)XML_MemMalloc(g_parser, 256); 1992 char *p; 1993 1994 if (buffer == NULL) { 1995 fail("Allocation failed"); 1996 } else { 1997 /* Try writing to memory; some OSes try to cheat! */ 1998 buffer[0] = 'T'; 1999 buffer[1] = 'E'; 2000 buffer[2] = 'S'; 2001 buffer[3] = 'T'; 2002 buffer[4] = '\0'; 2003 if (strcmp(buffer, "TEST") != 0) { 2004 fail("Memory not writable"); 2005 } else { 2006 p = (char *)XML_MemRealloc(g_parser, buffer, 512); 2007 if (p == NULL) { 2008 fail("Reallocation failed"); 2009 } else { 2010 /* Write again, just to be sure */ 2011 buffer = p; 2012 buffer[0] = 'V'; 2013 if (strcmp(buffer, "VEST") != 0) { 2014 fail("Reallocated memory not writable"); 2015 } 2016 } 2017 } 2018 XML_MemFree(g_parser, buffer); 2019 } 2020 } 2021 END_TEST 2022 2023 /* Test XML_DefaultCurrent() passes handling on correctly */ 2024 START_TEST(test_default_current) { 2025 const char *text = "<doc>hell]</doc>"; 2026 const char *entity_text = "<!DOCTYPE doc [\n" 2027 "<!ENTITY entity '%'>\n" 2028 "]>\n" 2029 "<doc>&entity;</doc>"; 2030 2031 set_subtest("with defaulting"); 2032 { 2033 struct handler_record_list storage; 2034 storage.count = 0; 2035 XML_SetDefaultHandler(g_parser, record_default_handler); 2036 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2037 XML_SetUserData(g_parser, &storage); 2038 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2039 == XML_STATUS_ERROR) 2040 xml_failure(g_parser); 2041 int i = 0; 2042 assert_record_handler_called(&storage, i++, "record_default_handler", 5); 2043 // we should have gotten one or more cdata callbacks, totaling 5 chars 2044 int cdata_len_remaining = 5; 2045 while (cdata_len_remaining > 0) { 2046 const struct handler_record_entry *c_entry 2047 = handler_record_get(&storage, i++); 2048 assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0); 2049 assert_true(c_entry->arg > 0); 2050 assert_true(c_entry->arg <= cdata_len_remaining); 2051 cdata_len_remaining -= c_entry->arg; 2052 // default handler must follow, with the exact same len argument. 2053 assert_record_handler_called(&storage, i++, "record_default_handler", 2054 c_entry->arg); 2055 } 2056 assert_record_handler_called(&storage, i++, "record_default_handler", 6); 2057 assert_true(storage.count == i); 2058 } 2059 2060 /* Again, without the defaulting */ 2061 set_subtest("no defaulting"); 2062 { 2063 struct handler_record_list storage; 2064 storage.count = 0; 2065 XML_ParserReset(g_parser, NULL); 2066 XML_SetDefaultHandler(g_parser, record_default_handler); 2067 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); 2068 XML_SetUserData(g_parser, &storage); 2069 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2070 == XML_STATUS_ERROR) 2071 xml_failure(g_parser); 2072 int i = 0; 2073 assert_record_handler_called(&storage, i++, "record_default_handler", 5); 2074 // we should have gotten one or more cdata callbacks, totaling 5 chars 2075 int cdata_len_remaining = 5; 2076 while (cdata_len_remaining > 0) { 2077 const struct handler_record_entry *c_entry 2078 = handler_record_get(&storage, i++); 2079 assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0); 2080 assert_true(c_entry->arg > 0); 2081 assert_true(c_entry->arg <= cdata_len_remaining); 2082 cdata_len_remaining -= c_entry->arg; 2083 } 2084 assert_record_handler_called(&storage, i++, "record_default_handler", 6); 2085 assert_true(storage.count == i); 2086 } 2087 2088 /* Now with an internal entity to complicate matters */ 2089 set_subtest("with internal entity"); 2090 { 2091 struct handler_record_list storage; 2092 storage.count = 0; 2093 XML_ParserReset(g_parser, NULL); 2094 XML_SetDefaultHandler(g_parser, record_default_handler); 2095 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2096 XML_SetUserData(g_parser, &storage); 2097 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2098 XML_TRUE) 2099 == XML_STATUS_ERROR) 2100 xml_failure(g_parser); 2101 /* The default handler suppresses the entity */ 2102 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2103 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2104 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2105 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2106 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2107 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2108 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2109 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2110 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2111 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2112 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2113 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2114 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2115 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2116 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2117 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2118 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2119 assert_record_handler_called(&storage, 17, "record_default_handler", 8); 2120 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2121 assert_true(storage.count == 19); 2122 } 2123 2124 /* Again, with a skip handler */ 2125 set_subtest("with skip handler"); 2126 { 2127 struct handler_record_list storage; 2128 storage.count = 0; 2129 XML_ParserReset(g_parser, NULL); 2130 XML_SetDefaultHandler(g_parser, record_default_handler); 2131 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2132 XML_SetSkippedEntityHandler(g_parser, record_skip_handler); 2133 XML_SetUserData(g_parser, &storage); 2134 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2135 XML_TRUE) 2136 == XML_STATUS_ERROR) 2137 xml_failure(g_parser); 2138 /* The default handler suppresses the entity */ 2139 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2140 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2141 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2142 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2143 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2144 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2145 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2146 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2147 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2148 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2149 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2150 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2151 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2152 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2153 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2154 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2155 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2156 assert_record_handler_called(&storage, 17, "record_skip_handler", 0); 2157 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2158 assert_true(storage.count == 19); 2159 } 2160 2161 /* This time, allow the entity through */ 2162 set_subtest("allow entity"); 2163 { 2164 struct handler_record_list storage; 2165 storage.count = 0; 2166 XML_ParserReset(g_parser, NULL); 2167 XML_SetDefaultHandlerExpand(g_parser, record_default_handler); 2168 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2169 XML_SetUserData(g_parser, &storage); 2170 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2171 XML_TRUE) 2172 == XML_STATUS_ERROR) 2173 xml_failure(g_parser); 2174 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2175 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2176 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2177 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2178 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2179 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2180 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2181 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2182 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2183 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2184 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2185 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2186 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2187 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2188 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2189 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2190 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2191 assert_record_handler_called(&storage, 17, "record_cdata_handler", 1); 2192 assert_record_handler_called(&storage, 18, "record_default_handler", 1); 2193 assert_record_handler_called(&storage, 19, "record_default_handler", 6); 2194 assert_true(storage.count == 20); 2195 } 2196 2197 /* Finally, without passing the cdata to the default handler */ 2198 set_subtest("not passing cdata"); 2199 { 2200 struct handler_record_list storage; 2201 storage.count = 0; 2202 XML_ParserReset(g_parser, NULL); 2203 XML_SetDefaultHandlerExpand(g_parser, record_default_handler); 2204 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); 2205 XML_SetUserData(g_parser, &storage); 2206 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2207 XML_TRUE) 2208 == XML_STATUS_ERROR) 2209 xml_failure(g_parser); 2210 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2211 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2212 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2213 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2214 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2215 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2216 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2217 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2218 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2219 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2220 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2221 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2222 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2223 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2224 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2225 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2226 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2227 assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler", 2228 1); 2229 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2230 assert_true(storage.count == 19); 2231 } 2232 } 2233 END_TEST 2234 2235 /* Test DTD element parsing code paths */ 2236 START_TEST(test_dtd_elements) { 2237 const char *text = "<!DOCTYPE doc [\n" 2238 "<!ELEMENT doc (chapter)>\n" 2239 "<!ELEMENT chapter (#PCDATA)>\n" 2240 "]>\n" 2241 "<doc><chapter>Wombats are go</chapter></doc>"; 2242 2243 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 2244 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2245 == XML_STATUS_ERROR) 2246 xml_failure(g_parser); 2247 } 2248 END_TEST 2249 2250 static void XMLCALL 2251 element_decl_check_model(void *userData, const XML_Char *name, 2252 XML_Content *model) { 2253 UNUSED_P(userData); 2254 uint32_t errorFlags = 0; 2255 2256 /* Expected model array structure is this: 2257 * [0] (type 6, quant 0) 2258 * [1] (type 5, quant 0) 2259 * [3] (type 4, quant 0, name "bar") 2260 * [4] (type 4, quant 0, name "foo") 2261 * [5] (type 4, quant 3, name "xyz") 2262 * [2] (type 4, quant 2, name "zebra") 2263 */ 2264 errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0)); 2265 errorFlags |= ((model != NULL) ? 0 : (1u << 1)); 2266 2267 if (model != NULL) { 2268 errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2)); 2269 errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3)); 2270 errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4)); 2271 errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5)); 2272 errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6)); 2273 2274 errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7)); 2275 errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8)); 2276 errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9)); 2277 errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10)); 2278 errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11)); 2279 2280 errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12)); 2281 errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13)); 2282 errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14)); 2283 errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15)); 2284 errorFlags 2285 |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16)); 2286 2287 errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17)); 2288 errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18)); 2289 errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19)); 2290 errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20)); 2291 errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21)); 2292 2293 errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22)); 2294 errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23)); 2295 errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24)); 2296 errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25)); 2297 errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26)); 2298 2299 errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27)); 2300 errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28)); 2301 errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29)); 2302 errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30)); 2303 errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31)); 2304 } 2305 2306 XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags); 2307 XML_FreeContentModel(g_parser, model); 2308 } 2309 2310 START_TEST(test_dtd_elements_nesting) { 2311 // Payload inspired by a test in Perl's XML::Parser 2312 const char *text = "<!DOCTYPE foo [\n" 2313 "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n" 2314 "]>\n" 2315 "<foo/>"; 2316 2317 XML_SetUserData(g_parser, (void *)(uintptr_t)-1); 2318 2319 XML_SetElementDeclHandler(g_parser, element_decl_check_model); 2320 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2321 == XML_STATUS_ERROR) 2322 xml_failure(g_parser); 2323 2324 if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0) 2325 fail("Element declaration model regression detected"); 2326 } 2327 END_TEST 2328 2329 /* Test foreign DTD handling */ 2330 START_TEST(test_set_foreign_dtd) { 2331 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"; 2332 const char *text2 = "<doc>&entity;</doc>"; 2333 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2334 2335 /* Check hash salt is passed through too */ 2336 XML_SetHashSalt(g_parser, 0x12345678); 2337 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2338 XML_SetUserData(g_parser, &test_data); 2339 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2340 /* Add a default handler to exercise more code paths */ 2341 XML_SetDefaultHandler(g_parser, dummy_default_handler); 2342 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2343 fail("Could not set foreign DTD"); 2344 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2345 == XML_STATUS_ERROR) 2346 xml_failure(g_parser); 2347 2348 /* Ensure that trying to set the DTD after parsing has started 2349 * is faulted, even if it's the same setting. 2350 */ 2351 if (XML_UseForeignDTD(g_parser, XML_TRUE) 2352 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) 2353 fail("Failed to reject late foreign DTD setting"); 2354 /* Ditto for the hash salt */ 2355 if (XML_SetHashSalt(g_parser, 0x23456789)) 2356 fail("Failed to reject late hash salt change"); 2357 2358 /* Now finish the parse */ 2359 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2360 == XML_STATUS_ERROR) 2361 xml_failure(g_parser); 2362 } 2363 END_TEST 2364 2365 /* Test foreign DTD handling with a failing NotStandalone handler */ 2366 START_TEST(test_foreign_dtd_not_standalone) { 2367 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2368 "<doc>&entity;</doc>"; 2369 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2370 2371 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2372 XML_SetUserData(g_parser, &test_data); 2373 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2374 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 2375 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2376 fail("Could not set foreign DTD"); 2377 expect_failure(text, XML_ERROR_NOT_STANDALONE, 2378 "NotStandalonehandler failed to reject"); 2379 } 2380 END_TEST 2381 2382 /* Test invalid character in a foreign DTD is faulted */ 2383 START_TEST(test_invalid_foreign_dtd) { 2384 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2385 "<doc>&entity;</doc>"; 2386 ExtFaults test_data 2387 = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN}; 2388 2389 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2390 XML_SetUserData(g_parser, &test_data); 2391 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 2392 XML_UseForeignDTD(g_parser, XML_TRUE); 2393 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 2394 "Bad DTD should not have been accepted"); 2395 } 2396 END_TEST 2397 2398 /* Test foreign DTD use with a doctype */ 2399 START_TEST(test_foreign_dtd_with_doctype) { 2400 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n" 2401 "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n"; 2402 const char *text2 = "<doc>&entity;</doc>"; 2403 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2404 2405 /* Check hash salt is passed through too */ 2406 XML_SetHashSalt(g_parser, 0x12345678); 2407 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2408 XML_SetUserData(g_parser, &test_data); 2409 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2410 /* Add a default handler to exercise more code paths */ 2411 XML_SetDefaultHandler(g_parser, dummy_default_handler); 2412 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2413 fail("Could not set foreign DTD"); 2414 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2415 == XML_STATUS_ERROR) 2416 xml_failure(g_parser); 2417 2418 /* Ensure that trying to set the DTD after parsing has started 2419 * is faulted, even if it's the same setting. 2420 */ 2421 if (XML_UseForeignDTD(g_parser, XML_TRUE) 2422 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) 2423 fail("Failed to reject late foreign DTD setting"); 2424 /* Ditto for the hash salt */ 2425 if (XML_SetHashSalt(g_parser, 0x23456789)) 2426 fail("Failed to reject late hash salt change"); 2427 2428 /* Now finish the parse */ 2429 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2430 == XML_STATUS_ERROR) 2431 xml_failure(g_parser); 2432 } 2433 END_TEST 2434 2435 /* Test XML_UseForeignDTD with no external subset present */ 2436 START_TEST(test_foreign_dtd_without_external_subset) { 2437 const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n" 2438 "<doc>&foo;</doc>"; 2439 2440 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2441 XML_SetUserData(g_parser, NULL); 2442 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); 2443 XML_UseForeignDTD(g_parser, XML_TRUE); 2444 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2445 == XML_STATUS_ERROR) 2446 xml_failure(g_parser); 2447 } 2448 END_TEST 2449 2450 START_TEST(test_empty_foreign_dtd) { 2451 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2452 "<doc>&entity;</doc>"; 2453 2454 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2455 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); 2456 XML_UseForeignDTD(g_parser, XML_TRUE); 2457 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 2458 "Undefined entity not faulted"); 2459 } 2460 END_TEST 2461 2462 /* Test XML Base is set and unset appropriately */ 2463 START_TEST(test_set_base) { 2464 const XML_Char *old_base; 2465 const XML_Char *new_base = XCS("/local/file/name.xml"); 2466 2467 old_base = XML_GetBase(g_parser); 2468 if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK) 2469 fail("Unable to set base"); 2470 if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0) 2471 fail("Base setting not correct"); 2472 if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK) 2473 fail("Unable to NULL base"); 2474 if (XML_GetBase(g_parser) != NULL) 2475 fail("Base setting not nulled"); 2476 XML_SetBase(g_parser, old_base); 2477 } 2478 END_TEST 2479 2480 /* Test attribute counts, indexing, etc */ 2481 START_TEST(test_attributes) { 2482 const char *text = "<!DOCTYPE doc [\n" 2483 "<!ELEMENT doc (tag)>\n" 2484 "<!ATTLIST doc id ID #REQUIRED>\n" 2485 "]>" 2486 "<doc a='1' id='one' b='2'>" 2487 "<tag c='3'/>" 2488 "</doc>"; 2489 AttrInfo doc_info[] = {{XCS("a"), XCS("1")}, 2490 {XCS("b"), XCS("2")}, 2491 {XCS("id"), XCS("one")}, 2492 {NULL, NULL}}; 2493 AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}}; 2494 ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL}, 2495 {XCS("tag"), 1, NULL, NULL}, 2496 {NULL, 0, NULL, NULL}}; 2497 info[0].attributes = doc_info; 2498 info[1].attributes = tag_info; 2499 2500 XML_Parser parser = XML_ParserCreate(NULL); 2501 assert_true(parser != NULL); 2502 ParserAndElementInfo parserAndElementInfos = { 2503 parser, 2504 info, 2505 }; 2506 2507 XML_SetStartElementHandler(parser, counting_start_element_handler); 2508 XML_SetUserData(parser, &parserAndElementInfos); 2509 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2510 == XML_STATUS_ERROR) 2511 xml_failure(parser); 2512 2513 XML_ParserFree(parser); 2514 } 2515 END_TEST 2516 2517 /* Test reset works correctly in the middle of processing an internal 2518 * entity. Exercises some obscure code in XML_ParserReset(). 2519 */ 2520 START_TEST(test_reset_in_entity) { 2521 if (g_chunkSize != 0) { 2522 // this test does not use SINGLE_BYTES, because of suspension 2523 return; 2524 } 2525 2526 const char *text = "<!DOCTYPE doc [\n" 2527 "<!ENTITY wombat 'wom'>\n" 2528 "<!ENTITY entity 'hi &wom; there'>\n" 2529 "]>\n" 2530 "<doc>&entity;</doc>"; 2531 XML_ParsingStatus status; 2532 2533 g_resumable = XML_TRUE; 2534 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2535 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 2536 // we won't know exactly how much input we actually managed to give Expat. 2537 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2538 == XML_STATUS_ERROR) 2539 xml_failure(g_parser); 2540 XML_GetParsingStatus(g_parser, &status); 2541 if (status.parsing != XML_SUSPENDED) 2542 fail("Parsing status not SUSPENDED"); 2543 XML_ParserReset(g_parser, NULL); 2544 XML_GetParsingStatus(g_parser, &status); 2545 if (status.parsing != XML_INITIALIZED) 2546 fail("Parsing status doesn't reset to INITIALIZED"); 2547 } 2548 END_TEST 2549 2550 /* Test that resume correctly passes through parse errors */ 2551 START_TEST(test_resume_invalid_parse) { 2552 const char *text = "<doc>Hello</doc"; /* Missing closing wedge */ 2553 2554 g_resumable = XML_TRUE; 2555 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2556 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2557 == XML_STATUS_ERROR) 2558 xml_failure(g_parser); 2559 if (XML_ResumeParser(g_parser) == XML_STATUS_OK) 2560 fail("Resumed invalid parse not faulted"); 2561 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN) 2562 fail("Invalid parse not correctly faulted"); 2563 } 2564 END_TEST 2565 2566 /* Test that re-suspended parses are correctly passed through */ 2567 START_TEST(test_resume_resuspended) { 2568 const char *text = "<doc>Hello<meep/>world</doc>"; 2569 2570 g_resumable = XML_TRUE; 2571 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2572 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2573 == XML_STATUS_ERROR) 2574 xml_failure(g_parser); 2575 g_resumable = XML_TRUE; 2576 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2577 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED) 2578 fail("Resumption not suspended"); 2579 /* This one should succeed and finish up */ 2580 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 2581 xml_failure(g_parser); 2582 } 2583 END_TEST 2584 2585 /* Test that CDATA shows up correctly through a default handler */ 2586 START_TEST(test_cdata_default) { 2587 const char *text = "<doc><![CDATA[Hello\nworld]]></doc>"; 2588 const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>"); 2589 CharData storage; 2590 2591 CharData_Init(&storage); 2592 XML_SetUserData(g_parser, &storage); 2593 XML_SetDefaultHandler(g_parser, accumulate_characters); 2594 2595 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2596 == XML_STATUS_ERROR) 2597 xml_failure(g_parser); 2598 CharData_CheckXMLChars(&storage, expected); 2599 } 2600 END_TEST 2601 2602 /* Test resetting a subordinate parser does exactly nothing */ 2603 START_TEST(test_subordinate_reset) { 2604 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2605 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2606 "<doc>&entity;</doc>"; 2607 2608 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2609 XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter); 2610 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2611 == XML_STATUS_ERROR) 2612 xml_failure(g_parser); 2613 } 2614 END_TEST 2615 2616 /* Test suspending a subordinate parser */ 2617 START_TEST(test_subordinate_suspend) { 2618 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2619 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2620 "<doc>&entity;</doc>"; 2621 2622 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2623 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender); 2624 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2625 == XML_STATUS_ERROR) 2626 xml_failure(g_parser); 2627 } 2628 END_TEST 2629 2630 /* Test suspending a subordinate parser from an XML declaration */ 2631 /* Increases code coverage of the tests */ 2632 2633 START_TEST(test_subordinate_xdecl_suspend) { 2634 const char *text 2635 = "<!DOCTYPE doc [\n" 2636 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n" 2637 "]>\n" 2638 "<doc>&entity;</doc>"; 2639 2640 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2641 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl); 2642 g_resumable = XML_TRUE; 2643 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2644 == XML_STATUS_ERROR) 2645 xml_failure(g_parser); 2646 } 2647 END_TEST 2648 2649 START_TEST(test_subordinate_xdecl_abort) { 2650 const char *text 2651 = "<!DOCTYPE doc [\n" 2652 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n" 2653 "]>\n" 2654 "<doc>&entity;</doc>"; 2655 2656 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2657 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl); 2658 g_resumable = XML_FALSE; 2659 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2660 == XML_STATUS_ERROR) 2661 xml_failure(g_parser); 2662 } 2663 END_TEST 2664 2665 /* Test external entity fault handling with suspension */ 2666 START_TEST(test_ext_entity_invalid_suspended_parse) { 2667 const char *text = "<!DOCTYPE doc [\n" 2668 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2669 "]>\n" 2670 "<doc>&en;</doc>"; 2671 ExtFaults faults[] 2672 = {{"<?xml version='1.0' encoding='us-ascii'?><", 2673 "Incomplete element declaration not faulted", NULL, 2674 XML_ERROR_UNCLOSED_TOKEN}, 2675 {/* First two bytes of a three-byte char */ 2676 "<?xml version='1.0' encoding='utf-8'?>\xe2\x82", 2677 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, 2678 {NULL, NULL, NULL, XML_ERROR_NONE}}; 2679 ExtFaults *fault; 2680 2681 for (fault = &faults[0]; fault->parse_text != NULL; fault++) { 2682 set_subtest("%s", fault->parse_text); 2683 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2684 XML_SetExternalEntityRefHandler(g_parser, 2685 external_entity_suspending_faulter); 2686 XML_SetUserData(g_parser, fault); 2687 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 2688 "Parser did not report external entity error"); 2689 XML_ParserReset(g_parser, NULL); 2690 } 2691 } 2692 END_TEST 2693 2694 /* Test setting an explicit encoding */ 2695 START_TEST(test_explicit_encoding) { 2696 const char *text1 = "<doc>Hello "; 2697 const char *text2 = " World</doc>"; 2698 2699 /* Just check that we can set the encoding to NULL before starting */ 2700 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK) 2701 fail("Failed to initialise encoding to NULL"); 2702 /* Say we are UTF-8 */ 2703 if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK) 2704 fail("Failed to set explicit encoding"); 2705 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2706 == XML_STATUS_ERROR) 2707 xml_failure(g_parser); 2708 /* Try to switch encodings mid-parse */ 2709 if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR) 2710 fail("Allowed encoding change"); 2711 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2712 == XML_STATUS_ERROR) 2713 xml_failure(g_parser); 2714 /* Try now the parse is over */ 2715 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK) 2716 fail("Failed to unset encoding"); 2717 } 2718 END_TEST 2719 2720 /* Test handling of trailing CR (rather than newline) */ 2721 START_TEST(test_trailing_cr) { 2722 const char *text = "<doc>\r"; 2723 int found_cr; 2724 2725 /* Try with a character handler, for code coverage */ 2726 XML_SetCharacterDataHandler(g_parser, cr_cdata_handler); 2727 XML_SetUserData(g_parser, &found_cr); 2728 found_cr = 0; 2729 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2730 == XML_STATUS_OK) 2731 fail("Failed to fault unclosed doc"); 2732 if (found_cr == 0) 2733 fail("Did not catch the carriage return"); 2734 XML_ParserReset(g_parser, NULL); 2735 2736 /* Now with a default handler instead */ 2737 XML_SetDefaultHandler(g_parser, cr_cdata_handler); 2738 XML_SetUserData(g_parser, &found_cr); 2739 found_cr = 0; 2740 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2741 == XML_STATUS_OK) 2742 fail("Failed to fault unclosed doc"); 2743 if (found_cr == 0) 2744 fail("Did not catch default carriage return"); 2745 } 2746 END_TEST 2747 2748 /* Test trailing CR in an external entity parse */ 2749 START_TEST(test_ext_entity_trailing_cr) { 2750 const char *text = "<!DOCTYPE doc [\n" 2751 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2752 "]>\n" 2753 "<doc>&en;</doc>"; 2754 int found_cr; 2755 2756 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2757 XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher); 2758 XML_SetUserData(g_parser, &found_cr); 2759 found_cr = 0; 2760 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2761 != XML_STATUS_OK) 2762 xml_failure(g_parser); 2763 if (found_cr == 0) 2764 fail("No carriage return found"); 2765 XML_ParserReset(g_parser, NULL); 2766 2767 /* Try again with a different trailing CR */ 2768 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2769 XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher); 2770 XML_SetUserData(g_parser, &found_cr); 2771 found_cr = 0; 2772 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2773 != XML_STATUS_OK) 2774 xml_failure(g_parser); 2775 if (found_cr == 0) 2776 fail("No carriage return found"); 2777 } 2778 END_TEST 2779 2780 /* Test handling of trailing square bracket */ 2781 START_TEST(test_trailing_rsqb) { 2782 const char *text8 = "<doc>]"; 2783 const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000"; 2784 int found_rsqb; 2785 int text8_len = (int)strlen(text8); 2786 2787 XML_SetCharacterDataHandler(g_parser, rsqb_handler); 2788 XML_SetUserData(g_parser, &found_rsqb); 2789 found_rsqb = 0; 2790 if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE) 2791 == XML_STATUS_OK) 2792 fail("Failed to fault unclosed doc"); 2793 if (found_rsqb == 0) 2794 fail("Did not catch the right square bracket"); 2795 2796 /* Try again with a different encoding */ 2797 XML_ParserReset(g_parser, NULL); 2798 XML_SetCharacterDataHandler(g_parser, rsqb_handler); 2799 XML_SetUserData(g_parser, &found_rsqb); 2800 found_rsqb = 0; 2801 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1, 2802 XML_TRUE) 2803 == XML_STATUS_OK) 2804 fail("Failed to fault unclosed doc"); 2805 if (found_rsqb == 0) 2806 fail("Did not catch the right square bracket"); 2807 2808 /* And finally with a default handler */ 2809 XML_ParserReset(g_parser, NULL); 2810 XML_SetDefaultHandler(g_parser, rsqb_handler); 2811 XML_SetUserData(g_parser, &found_rsqb); 2812 found_rsqb = 0; 2813 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1, 2814 XML_TRUE) 2815 == XML_STATUS_OK) 2816 fail("Failed to fault unclosed doc"); 2817 if (found_rsqb == 0) 2818 fail("Did not catch the right square bracket"); 2819 } 2820 END_TEST 2821 2822 /* Test trailing right square bracket in an external entity parse */ 2823 START_TEST(test_ext_entity_trailing_rsqb) { 2824 const char *text = "<!DOCTYPE doc [\n" 2825 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2826 "]>\n" 2827 "<doc>&en;</doc>"; 2828 int found_rsqb; 2829 2830 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2831 XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher); 2832 XML_SetUserData(g_parser, &found_rsqb); 2833 found_rsqb = 0; 2834 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2835 != XML_STATUS_OK) 2836 xml_failure(g_parser); 2837 if (found_rsqb == 0) 2838 fail("No right square bracket found"); 2839 } 2840 END_TEST 2841 2842 /* Test CDATA handling in an external entity */ 2843 START_TEST(test_ext_entity_good_cdata) { 2844 const char *text = "<!DOCTYPE doc [\n" 2845 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2846 "]>\n" 2847 "<doc>&en;</doc>"; 2848 2849 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2850 XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii); 2851 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2852 != XML_STATUS_OK) 2853 xml_failure(g_parser); 2854 } 2855 END_TEST 2856 2857 /* Test user parameter settings */ 2858 START_TEST(test_user_parameters) { 2859 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2860 "<!-- Primary parse -->\n" 2861 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2862 "<doc>&entity;"; 2863 const char *epilog = "<!-- Back to primary parser -->\n" 2864 "</doc>"; 2865 2866 g_comment_count = 0; 2867 g_skip_count = 0; 2868 g_xdecl_count = 0; 2869 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2870 XML_SetXmlDeclHandler(g_parser, xml_decl_handler); 2871 XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker); 2872 XML_SetCommentHandler(g_parser, data_check_comment_handler); 2873 XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler); 2874 XML_UseParserAsHandlerArg(g_parser); 2875 XML_SetUserData(g_parser, (void *)1); 2876 g_handler_data = g_parser; 2877 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 2878 == XML_STATUS_ERROR) 2879 xml_failure(g_parser); 2880 /* Ensure we can't change policy mid-parse */ 2881 if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER)) 2882 fail("Changed param entity parsing policy while parsing"); 2883 if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE) 2884 == XML_STATUS_ERROR) 2885 xml_failure(g_parser); 2886 if (g_comment_count != 3) 2887 fail("Comment handler not invoked enough times"); 2888 if (g_skip_count != 1) 2889 fail("Skip handler not invoked enough times"); 2890 if (g_xdecl_count != 1) 2891 fail("XML declaration handler not invoked"); 2892 } 2893 END_TEST 2894 2895 /* Test that an explicit external entity handler argument replaces 2896 * the parser as the first argument. 2897 * 2898 * We do not call the first parameter to the external entity handler 2899 * 'parser' for once, since the first time the handler is called it 2900 * will actually be a text string. We need to be able to access the 2901 * global 'parser' variable to create our external entity parser from, 2902 * since there are code paths we need to ensure get executed. 2903 */ 2904 START_TEST(test_ext_entity_ref_parameter) { 2905 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2906 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2907 "<doc>&entity;</doc>"; 2908 2909 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2910 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker); 2911 /* Set a handler arg that is not NULL and not parser (which is 2912 * what NULL would cause to be passed. 2913 */ 2914 XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text); 2915 g_handler_data = text; 2916 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2917 == XML_STATUS_ERROR) 2918 xml_failure(g_parser); 2919 2920 /* Now try again with unset args */ 2921 XML_ParserReset(g_parser, NULL); 2922 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2923 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker); 2924 XML_SetExternalEntityRefHandlerArg(g_parser, NULL); 2925 g_handler_data = g_parser; 2926 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2927 == XML_STATUS_ERROR) 2928 xml_failure(g_parser); 2929 } 2930 END_TEST 2931 2932 /* Test the parsing of an empty string */ 2933 START_TEST(test_empty_parse) { 2934 const char *text = "<doc></doc>"; 2935 const char *partial = "<doc>"; 2936 2937 if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR) 2938 fail("Parsing empty string faulted"); 2939 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 2940 fail("Parsing final empty string not faulted"); 2941 if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS) 2942 fail("Parsing final empty string faulted for wrong reason"); 2943 2944 /* Now try with valid text before the empty end */ 2945 XML_ParserReset(g_parser, NULL); 2946 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 2947 == XML_STATUS_ERROR) 2948 xml_failure(g_parser); 2949 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR) 2950 fail("Parsing final empty string faulted"); 2951 2952 /* Now try with invalid text before the empty end */ 2953 XML_ParserReset(g_parser, NULL); 2954 if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial), 2955 XML_FALSE) 2956 == XML_STATUS_ERROR) 2957 xml_failure(g_parser); 2958 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 2959 fail("Parsing final incomplete empty string not faulted"); 2960 } 2961 END_TEST 2962 2963 /* Test XML_Parse for len < 0 */ 2964 START_TEST(test_negative_len_parse) { 2965 const char *const doc = "<root/>"; 2966 for (int isFinal = 0; isFinal < 2; isFinal++) { 2967 set_subtest("isFinal=%d", isFinal); 2968 2969 XML_Parser parser = XML_ParserCreate(NULL); 2970 2971 if (XML_GetErrorCode(parser) != XML_ERROR_NONE) 2972 fail("There was not supposed to be any initial parse error."); 2973 2974 const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal); 2975 2976 if (status != XML_STATUS_ERROR) 2977 fail("Negative len was expected to fail the parse but did not."); 2978 2979 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT) 2980 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT."); 2981 2982 XML_ParserFree(parser); 2983 } 2984 } 2985 END_TEST 2986 2987 /* Test XML_ParseBuffer for len < 0 */ 2988 START_TEST(test_negative_len_parse_buffer) { 2989 const char *const doc = "<root/>"; 2990 for (int isFinal = 0; isFinal < 2; isFinal++) { 2991 set_subtest("isFinal=%d", isFinal); 2992 2993 XML_Parser parser = XML_ParserCreate(NULL); 2994 2995 if (XML_GetErrorCode(parser) != XML_ERROR_NONE) 2996 fail("There was not supposed to be any initial parse error."); 2997 2998 void *const buffer = XML_GetBuffer(parser, (int)strlen(doc)); 2999 3000 if (buffer == NULL) 3001 fail("XML_GetBuffer failed."); 3002 3003 memcpy(buffer, doc, strlen(doc)); 3004 3005 const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal); 3006 3007 if (status != XML_STATUS_ERROR) 3008 fail("Negative len was expected to fail the parse but did not."); 3009 3010 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT) 3011 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT."); 3012 3013 XML_ParserFree(parser); 3014 } 3015 } 3016 END_TEST 3017 3018 /* Test odd corners of the XML_GetBuffer interface */ 3019 static enum XML_Status 3020 get_feature(enum XML_FeatureEnum feature_id, long *presult) { 3021 const XML_Feature *feature = XML_GetFeatureList(); 3022 3023 if (feature == NULL) 3024 return XML_STATUS_ERROR; 3025 for (; feature->feature != XML_FEATURE_END; feature++) { 3026 if (feature->feature == feature_id) { 3027 *presult = feature->value; 3028 return XML_STATUS_OK; 3029 } 3030 } 3031 return XML_STATUS_ERROR; 3032 } 3033 3034 /* Test odd corners of the XML_GetBuffer interface */ 3035 START_TEST(test_get_buffer_1) { 3036 const char *text = get_buffer_test_text; 3037 void *buffer; 3038 long context_bytes; 3039 3040 /* Attempt to allocate a negative length buffer */ 3041 if (XML_GetBuffer(g_parser, -12) != NULL) 3042 fail("Negative length buffer not failed"); 3043 3044 /* Now get a small buffer and extend it past valid length */ 3045 buffer = XML_GetBuffer(g_parser, 1536); 3046 if (buffer == NULL) 3047 fail("1.5K buffer failed"); 3048 assert(buffer != NULL); 3049 memcpy(buffer, text, strlen(text)); 3050 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE) 3051 == XML_STATUS_ERROR) 3052 xml_failure(g_parser); 3053 if (XML_GetBuffer(g_parser, INT_MAX) != NULL) 3054 fail("INT_MAX buffer not failed"); 3055 3056 /* Now try extending it a more reasonable but still too large 3057 * amount. The allocator in XML_GetBuffer() doubles the buffer 3058 * size until it exceeds the requested amount or INT_MAX. If it 3059 * exceeds INT_MAX, it rejects the request, so we want a request 3060 * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable, 3061 * with an extra byte just to ensure that the request is off any 3062 * boundary. The request will be inflated internally by 3063 * XML_CONTEXT_BYTES (if >=1), so we subtract that from our 3064 * request. 3065 */ 3066 if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK) 3067 context_bytes = 0; 3068 if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL) 3069 fail("INT_MAX- buffer not failed"); 3070 3071 /* Now try extending it a carefully crafted amount */ 3072 if (XML_GetBuffer(g_parser, 1000) == NULL) 3073 fail("1000 buffer failed"); 3074 } 3075 END_TEST 3076 3077 /* Test more corners of the XML_GetBuffer interface */ 3078 START_TEST(test_get_buffer_2) { 3079 const char *text = get_buffer_test_text; 3080 void *buffer; 3081 3082 /* Now get a decent buffer */ 3083 buffer = XML_GetBuffer(g_parser, 1536); 3084 if (buffer == NULL) 3085 fail("1.5K buffer failed"); 3086 assert(buffer != NULL); 3087 memcpy(buffer, text, strlen(text)); 3088 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE) 3089 == XML_STATUS_ERROR) 3090 xml_failure(g_parser); 3091 3092 /* Extend it, to catch a different code path */ 3093 if (XML_GetBuffer(g_parser, 1024) == NULL) 3094 fail("1024 buffer failed"); 3095 } 3096 END_TEST 3097 3098 /* Test for signed integer overflow CVE-2022-23852 */ 3099 #if XML_CONTEXT_BYTES > 0 3100 START_TEST(test_get_buffer_3_overflow) { 3101 XML_Parser parser = XML_ParserCreate(NULL); 3102 assert(parser != NULL); 3103 3104 const char *const text = "\n"; 3105 const int expectedKeepValue = (int)strlen(text); 3106 3107 // After this call, variable "keep" in XML_GetBuffer will 3108 // have value expectedKeepValue 3109 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), 3110 XML_FALSE /* isFinal */) 3111 == XML_STATUS_ERROR) 3112 xml_failure(parser); 3113 3114 assert(expectedKeepValue > 0); 3115 if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL) 3116 fail("enlarging buffer not failed"); 3117 3118 XML_ParserFree(parser); 3119 } 3120 END_TEST 3121 #endif // XML_CONTEXT_BYTES > 0 3122 3123 START_TEST(test_buffer_can_grow_to_max) { 3124 const char *const prefixes[] = { 3125 "", 3126 "<", 3127 "<x a='", 3128 "<doc><x a='", 3129 "<document><x a='", 3130 "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand" 3131 "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif" 3132 "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin" 3133 "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping" 3134 "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"}; 3135 const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]); 3136 int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow 3137 #if defined(__MINGW32__) && ! defined(__MINGW64__) 3138 // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB 3139 // Can we make a big allocation? 3140 for (int i = 1; i <= 2; i++) { 3141 void *const big = malloc(maxbuf); 3142 if (big != NULL) { 3143 free(big); 3144 break; 3145 } 3146 // The big allocation failed. Let's be a little lenient. 3147 maxbuf = maxbuf / 2; 3148 fprintf(stderr, "Reducing maxbuf to %d...\n", maxbuf); 3149 } 3150 #endif 3151 3152 for (int i = 0; i < num_prefixes; ++i) { 3153 set_subtest("\"%s\"", prefixes[i]); 3154 XML_Parser parser = XML_ParserCreate(NULL); 3155 #if XML_GE == 1 3156 assert_true(XML_SetAllocTrackerActivationThreshold(parser, (size_t)-1) 3157 == XML_TRUE); // i.e. deactivate 3158 #endif 3159 const int prefix_len = (int)strlen(prefixes[i]); 3160 const enum XML_Status s 3161 = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE); 3162 if (s != XML_STATUS_OK) 3163 xml_failure(parser); 3164 3165 // XML_CONTEXT_BYTES of the prefix may remain in the buffer; 3166 // subtracting the whole prefix is easiest, and close enough. 3167 assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL); 3168 // The limit should be consistent; no prefix should allow us to 3169 // reach above the max buffer size. 3170 assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL); 3171 XML_ParserFree(parser); 3172 } 3173 } 3174 END_TEST 3175 3176 START_TEST(test_getbuffer_allocates_on_zero_len) { 3177 for (int first_len = 1; first_len >= 0; first_len--) { 3178 set_subtest("with len=%d first", first_len); 3179 XML_Parser parser = XML_ParserCreate(NULL); 3180 assert_true(parser != NULL); 3181 assert_true(XML_GetBuffer(parser, first_len) != NULL); 3182 assert_true(XML_GetBuffer(parser, 0) != NULL); 3183 if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK) 3184 xml_failure(parser); 3185 XML_ParserFree(parser); 3186 } 3187 } 3188 END_TEST 3189 3190 /* Test position information macros */ 3191 START_TEST(test_byte_info_at_end) { 3192 const char *text = "<doc></doc>"; 3193 3194 if (XML_GetCurrentByteIndex(g_parser) != -1 3195 || XML_GetCurrentByteCount(g_parser) != 0) 3196 fail("Byte index/count incorrect at start of parse"); 3197 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3198 == XML_STATUS_ERROR) 3199 xml_failure(g_parser); 3200 /* At end, the count will be zero and the index the end of string */ 3201 if (XML_GetCurrentByteCount(g_parser) != 0) 3202 fail("Terminal byte count incorrect"); 3203 if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text)) 3204 fail("Terminal byte index incorrect"); 3205 } 3206 END_TEST 3207 3208 /* Test position information from errors */ 3209 #define PRE_ERROR_STR "<doc></" 3210 #define POST_ERROR_STR "wombat></doc>" 3211 START_TEST(test_byte_info_at_error) { 3212 const char *text = PRE_ERROR_STR POST_ERROR_STR; 3213 3214 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3215 == XML_STATUS_OK) 3216 fail("Syntax error not faulted"); 3217 if (XML_GetCurrentByteCount(g_parser) != 0) 3218 fail("Error byte count incorrect"); 3219 if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR)) 3220 fail("Error byte index incorrect"); 3221 } 3222 END_TEST 3223 #undef PRE_ERROR_STR 3224 #undef POST_ERROR_STR 3225 3226 /* Test position information in handler */ 3227 #define START_ELEMENT "<e>" 3228 #define CDATA_TEXT "Hello" 3229 #define END_ELEMENT "</e>" 3230 START_TEST(test_byte_info_at_cdata) { 3231 const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT; 3232 int offset, size; 3233 ByteTestData data; 3234 3235 /* Check initial context is empty */ 3236 if (XML_GetInputContext(g_parser, &offset, &size) != NULL) 3237 fail("Unexpected context at start of parse"); 3238 3239 data.start_element_len = (int)strlen(START_ELEMENT); 3240 data.cdata_len = (int)strlen(CDATA_TEXT); 3241 data.total_string_len = (int)strlen(text); 3242 XML_SetCharacterDataHandler(g_parser, byte_character_handler); 3243 XML_SetUserData(g_parser, &data); 3244 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) 3245 xml_failure(g_parser); 3246 } 3247 END_TEST 3248 #undef START_ELEMENT 3249 #undef CDATA_TEXT 3250 #undef END_ELEMENT 3251 3252 /* Test predefined entities are correctly recognised */ 3253 START_TEST(test_predefined_entities) { 3254 const char *text = "<doc><>&"'</doc>"; 3255 const XML_Char *expected = XCS("<doc><>&"'</doc>"); 3256 const XML_Char *result = XCS("<>&\"'"); 3257 CharData storage; 3258 3259 XML_SetDefaultHandler(g_parser, accumulate_characters); 3260 /* run_character_check uses XML_SetCharacterDataHandler(), which 3261 * unfortunately heads off a code path that we need to exercise. 3262 */ 3263 CharData_Init(&storage); 3264 XML_SetUserData(g_parser, &storage); 3265 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3266 == XML_STATUS_ERROR) 3267 xml_failure(g_parser); 3268 /* The default handler doesn't translate the entities */ 3269 CharData_CheckXMLChars(&storage, expected); 3270 3271 /* Now try again and check the translation */ 3272 XML_ParserReset(g_parser, NULL); 3273 run_character_check(text, result); 3274 } 3275 END_TEST 3276 3277 /* Regression test that an invalid tag in an external parameter 3278 * reference in an external DTD is correctly faulted. 3279 * 3280 * Only a few specific tags are legal in DTDs ignoring comments and 3281 * processing instructions, all of which begin with an exclamation 3282 * mark. "<el/>" is not one of them, so the parser should raise an 3283 * error on encountering it. 3284 */ 3285 START_TEST(test_invalid_tag_in_dtd) { 3286 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3287 "<doc></doc>\n"; 3288 3289 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3290 XML_SetExternalEntityRefHandler(g_parser, external_entity_param); 3291 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3292 "Invalid tag IN DTD external param not rejected"); 3293 } 3294 END_TEST 3295 3296 /* Test entities not quite the predefined ones are not mis-recognised */ 3297 START_TEST(test_not_predefined_entities) { 3298 const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>", 3299 "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL}; 3300 int i = 0; 3301 3302 while (text[i] != NULL) { 3303 expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY, 3304 "Undefined entity not rejected"); 3305 XML_ParserReset(g_parser, NULL); 3306 i++; 3307 } 3308 } 3309 END_TEST 3310 3311 /* Test conditional inclusion (IGNORE) */ 3312 START_TEST(test_ignore_section) { 3313 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3314 "<doc><e>&entity;</e></doc>"; 3315 const XML_Char *expected 3316 = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;"); 3317 CharData storage; 3318 3319 CharData_Init(&storage); 3320 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3321 XML_SetUserData(g_parser, &storage); 3322 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore); 3323 XML_SetDefaultHandler(g_parser, accumulate_characters); 3324 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3325 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3326 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3327 XML_SetStartElementHandler(g_parser, dummy_start_element); 3328 XML_SetEndElementHandler(g_parser, dummy_end_element); 3329 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3330 == XML_STATUS_ERROR) 3331 xml_failure(g_parser); 3332 CharData_CheckXMLChars(&storage, expected); 3333 } 3334 END_TEST 3335 3336 START_TEST(test_ignore_section_utf16) { 3337 const char text[] = 3338 /* <!DOCTYPE d SYSTEM 's'> */ 3339 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " 3340 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0" 3341 /* <d><e>&en;</e></d> */ 3342 "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0"; 3343 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;"); 3344 CharData storage; 3345 3346 CharData_Init(&storage); 3347 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3348 XML_SetUserData(g_parser, &storage); 3349 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16); 3350 XML_SetDefaultHandler(g_parser, accumulate_characters); 3351 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3352 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3353 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3354 XML_SetStartElementHandler(g_parser, dummy_start_element); 3355 XML_SetEndElementHandler(g_parser, dummy_end_element); 3356 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 3357 == XML_STATUS_ERROR) 3358 xml_failure(g_parser); 3359 CharData_CheckXMLChars(&storage, expected); 3360 } 3361 END_TEST 3362 3363 START_TEST(test_ignore_section_utf16_be) { 3364 const char text[] = 3365 /* <!DOCTYPE d SYSTEM 's'> */ 3366 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " 3367 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n" 3368 /* <d><e>&en;</e></d> */ 3369 "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>"; 3370 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;"); 3371 CharData storage; 3372 3373 CharData_Init(&storage); 3374 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3375 XML_SetUserData(g_parser, &storage); 3376 XML_SetExternalEntityRefHandler(g_parser, 3377 external_entity_load_ignore_utf16_be); 3378 XML_SetDefaultHandler(g_parser, accumulate_characters); 3379 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3380 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3381 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3382 XML_SetStartElementHandler(g_parser, dummy_start_element); 3383 XML_SetEndElementHandler(g_parser, dummy_end_element); 3384 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 3385 == XML_STATUS_ERROR) 3386 xml_failure(g_parser); 3387 CharData_CheckXMLChars(&storage, expected); 3388 } 3389 END_TEST 3390 3391 /* Test mis-formatted conditional exclusion */ 3392 START_TEST(test_bad_ignore_section) { 3393 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3394 "<doc><e>&entity;</e></doc>"; 3395 ExtFaults faults[] 3396 = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL, 3397 XML_ERROR_SYNTAX}, 3398 {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL, 3399 XML_ERROR_INVALID_TOKEN}, 3400 {/* FIrst two bytes of a three-byte char */ 3401 "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL, 3402 XML_ERROR_PARTIAL_CHAR}, 3403 {NULL, NULL, NULL, XML_ERROR_NONE}}; 3404 ExtFaults *fault; 3405 3406 for (fault = &faults[0]; fault->parse_text != NULL; fault++) { 3407 set_subtest("%s", fault->parse_text); 3408 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3409 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 3410 XML_SetUserData(g_parser, fault); 3411 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3412 "Incomplete IGNORE section not failed"); 3413 XML_ParserReset(g_parser, NULL); 3414 } 3415 } 3416 END_TEST 3417 3418 struct bom_testdata { 3419 const char *external; 3420 int split; 3421 XML_Bool nested_callback_happened; 3422 }; 3423 3424 static int XMLCALL 3425 external_bom_checker(XML_Parser parser, const XML_Char *context, 3426 const XML_Char *base, const XML_Char *systemId, 3427 const XML_Char *publicId) { 3428 const char *text; 3429 UNUSED_P(base); 3430 UNUSED_P(systemId); 3431 UNUSED_P(publicId); 3432 3433 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); 3434 if (ext_parser == NULL) 3435 fail("Could not create external entity parser"); 3436 3437 if (! xcstrcmp(systemId, XCS("004-2.ent"))) { 3438 struct bom_testdata *const testdata 3439 = (struct bom_testdata *)XML_GetUserData(parser); 3440 const char *const external = testdata->external; 3441 const int split = testdata->split; 3442 testdata->nested_callback_happened = XML_TRUE; 3443 3444 if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE) 3445 != XML_STATUS_OK) { 3446 xml_failure(ext_parser); 3447 } 3448 text = external + split; // the parse below will continue where we left off. 3449 } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) { 3450 text = "<!ELEMENT doc EMPTY>\n" 3451 "<!ENTITY % e1 SYSTEM '004-2.ent'>\n" 3452 "<!ENTITY % e2 '%e1;'>\n"; 3453 } else { 3454 fail("unknown systemId"); 3455 } 3456 3457 if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) 3458 != XML_STATUS_OK) 3459 xml_failure(ext_parser); 3460 3461 XML_ParserFree(ext_parser); 3462 return XML_STATUS_OK; 3463 } 3464 3465 /* regression test: BOM should be consumed when followed by a partial token. */ 3466 START_TEST(test_external_bom_consumed) { 3467 const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3468 "<doc></doc>\n"; 3469 const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>"; 3470 const int len = (int)strlen(external); 3471 for (int split = 0; split <= len; ++split) { 3472 set_subtest("split at byte %d", split); 3473 3474 struct bom_testdata testdata; 3475 testdata.external = external; 3476 testdata.split = split; 3477 testdata.nested_callback_happened = XML_FALSE; 3478 3479 XML_Parser parser = XML_ParserCreate(NULL); 3480 if (parser == NULL) { 3481 fail("Couldn't create parser"); 3482 } 3483 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3484 XML_SetExternalEntityRefHandler(parser, external_bom_checker); 3485 XML_SetUserData(parser, &testdata); 3486 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 3487 == XML_STATUS_ERROR) 3488 xml_failure(parser); 3489 if (! testdata.nested_callback_happened) { 3490 fail("ref handler not called"); 3491 } 3492 XML_ParserFree(parser); 3493 } 3494 } 3495 END_TEST 3496 3497 /* Test recursive parsing */ 3498 START_TEST(test_external_entity_values) { 3499 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3500 "<doc></doc>\n"; 3501 ExtFaults data_004_2[] = { 3502 {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE}, 3503 {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL, 3504 XML_ERROR_INVALID_TOKEN}, 3505 {"'wombat", "Unterminated string not faulted", NULL, 3506 XML_ERROR_UNCLOSED_TOKEN}, 3507 {"\xe2\x82", "Partial UTF-8 character not faulted", NULL, 3508 XML_ERROR_PARTIAL_CHAR}, 3509 {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE}, 3510 {"<?xml?>", "Malformed XML declaration not faulted", NULL, 3511 XML_ERROR_XML_DECL}, 3512 {/* UTF-8 BOM */ 3513 "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, 3514 XML_ERROR_NONE}, 3515 {"<?xml version='1.0' encoding='utf-8'?>\n$", 3516 "Invalid token after text declaration not faulted", NULL, 3517 XML_ERROR_INVALID_TOKEN}, 3518 {"<?xml version='1.0' encoding='utf-8'?>\n'wombat", 3519 "Unterminated string after text decl not faulted", NULL, 3520 XML_ERROR_UNCLOSED_TOKEN}, 3521 {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82", 3522 "Partial UTF-8 character after text decl not faulted", NULL, 3523 XML_ERROR_PARTIAL_CHAR}, 3524 {"%e1;", "Recursive parameter entity not faulted", NULL, 3525 XML_ERROR_RECURSIVE_ENTITY_REF}, 3526 {NULL, NULL, NULL, XML_ERROR_NONE}}; 3527 int i; 3528 3529 for (i = 0; data_004_2[i].parse_text != NULL; i++) { 3530 set_subtest("%s", data_004_2[i].parse_text); 3531 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3532 XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer); 3533 XML_SetUserData(g_parser, &data_004_2[i]); 3534 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3535 == XML_STATUS_ERROR) 3536 xml_failure(g_parser); 3537 XML_ParserReset(g_parser, NULL); 3538 } 3539 } 3540 END_TEST 3541 3542 /* Test the recursive parse interacts with a not standalone handler */ 3543 START_TEST(test_ext_entity_not_standalone) { 3544 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3545 "<doc></doc>"; 3546 3547 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3548 XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone); 3549 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3550 "Standalone rejection not caught"); 3551 } 3552 END_TEST 3553 3554 START_TEST(test_ext_entity_value_abort) { 3555 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3556 "<doc></doc>\n"; 3557 3558 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3559 XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter); 3560 g_resumable = XML_FALSE; 3561 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3562 == XML_STATUS_ERROR) 3563 xml_failure(g_parser); 3564 } 3565 END_TEST 3566 3567 START_TEST(test_bad_public_doctype) { 3568 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n" 3569 "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n" 3570 "<doc></doc>"; 3571 3572 /* Setting a handler provokes a particular code path */ 3573 XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler, 3574 dummy_end_doctype_handler); 3575 expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed"); 3576 } 3577 END_TEST 3578 3579 /* Test based on ibm/valid/P32/ibm32v04.xml */ 3580 START_TEST(test_attribute_enum_value) { 3581 const char *text = "<?xml version='1.0' standalone='no'?>\n" 3582 "<!DOCTYPE animal SYSTEM 'test.dtd'>\n" 3583 "<animal>This is a \n <a/> \n\nyellow tiger</animal>"; 3584 ExtTest dtd_data 3585 = {"<!ELEMENT animal (#PCDATA|a)*>\n" 3586 "<!ELEMENT a EMPTY>\n" 3587 "<!ATTLIST animal xml:space (default|preserve) 'preserve'>", 3588 NULL, NULL}; 3589 const XML_Char *expected = XCS("This is a \n \n\nyellow tiger"); 3590 3591 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3592 XML_SetUserData(g_parser, &dtd_data); 3593 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3594 /* An attribute list handler provokes a different code path */ 3595 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 3596 run_ext_character_check(text, &dtd_data, expected); 3597 } 3598 END_TEST 3599 3600 /* Slightly bizarrely, the library seems to silently ignore entity 3601 * definitions for predefined entities, even when they are wrong. The 3602 * language of the XML 1.0 spec is somewhat unhelpful as to what ought 3603 * to happen, so this is currently treated as acceptable. 3604 */ 3605 START_TEST(test_predefined_entity_redefinition) { 3606 const char *text = "<!DOCTYPE doc [\n" 3607 "<!ENTITY apos 'foo'>\n" 3608 "]>\n" 3609 "<doc>'</doc>"; 3610 run_character_check(text, XCS("'")); 3611 } 3612 END_TEST 3613 3614 /* Test that the parser stops processing the DTD after an unresolved 3615 * parameter entity is encountered. 3616 */ 3617 START_TEST(test_dtd_stop_processing) { 3618 const char *text = "<!DOCTYPE doc [\n" 3619 "%foo;\n" 3620 "<!ENTITY bar 'bas'>\n" 3621 "]><doc/>"; 3622 3623 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 3624 init_dummy_handlers(); 3625 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3626 == XML_STATUS_ERROR) 3627 xml_failure(g_parser); 3628 if (get_dummy_handler_flags() != 0) 3629 fail("DTD processing still going after undefined PE"); 3630 } 3631 END_TEST 3632 3633 /* Test public notations with no system ID */ 3634 START_TEST(test_public_notation_no_sysid) { 3635 const char *text = "<!DOCTYPE doc [\n" 3636 "<!NOTATION note PUBLIC 'foo'>\n" 3637 "<!ELEMENT doc EMPTY>\n" 3638 "]>\n<doc/>"; 3639 3640 init_dummy_handlers(); 3641 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); 3642 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3643 == XML_STATUS_ERROR) 3644 xml_failure(g_parser); 3645 if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG) 3646 fail("Notation declaration handler not called"); 3647 } 3648 END_TEST 3649 3650 START_TEST(test_nested_groups) { 3651 const char *text 3652 = "<!DOCTYPE doc [\n" 3653 "<!ELEMENT doc " 3654 /* Sixteen elements per line */ 3655 "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?," 3656 "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?" 3657 "))))))))))))))))))))))))))))))))>\n" 3658 "<!ELEMENT e EMPTY>" 3659 "]>\n" 3660 "<doc><e/></doc>"; 3661 CharData storage; 3662 3663 CharData_Init(&storage); 3664 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3665 XML_SetStartElementHandler(g_parser, record_element_start_handler); 3666 XML_SetUserData(g_parser, &storage); 3667 init_dummy_handlers(); 3668 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3669 == XML_STATUS_ERROR) 3670 xml_failure(g_parser); 3671 CharData_CheckXMLChars(&storage, XCS("doce")); 3672 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG) 3673 fail("Element handler not fired"); 3674 } 3675 END_TEST 3676 3677 START_TEST(test_group_choice) { 3678 const char *text = "<!DOCTYPE doc [\n" 3679 "<!ELEMENT doc (a|b|c)+>\n" 3680 "<!ELEMENT a EMPTY>\n" 3681 "<!ELEMENT b (#PCDATA)>\n" 3682 "<!ELEMENT c ANY>\n" 3683 "]>\n" 3684 "<doc>\n" 3685 "<a/>\n" 3686 "<b attr='foo'>This is a foo</b>\n" 3687 "<c></c>\n" 3688 "</doc>\n"; 3689 3690 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3691 init_dummy_handlers(); 3692 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3693 == XML_STATUS_ERROR) 3694 xml_failure(g_parser); 3695 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG) 3696 fail("Element handler flag not raised"); 3697 } 3698 END_TEST 3699 3700 START_TEST(test_standalone_parameter_entity) { 3701 const char *text = "<?xml version='1.0' standalone='yes'?>\n" 3702 "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n" 3703 "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n" 3704 "%entity;\n" 3705 "]>\n" 3706 "<doc></doc>"; 3707 char dtd_data[] = "<!ENTITY % e1 'foo'>\n"; 3708 3709 XML_SetUserData(g_parser, dtd_data); 3710 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3711 XML_SetExternalEntityRefHandler(g_parser, external_entity_public); 3712 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3713 == XML_STATUS_ERROR) 3714 xml_failure(g_parser); 3715 } 3716 END_TEST 3717 3718 /* Test skipping of parameter entity in an external DTD */ 3719 /* Derived from ibm/invalid/P69/ibm69i01.xml */ 3720 START_TEST(test_skipped_parameter_entity) { 3721 const char *text = "<?xml version='1.0'?>\n" 3722 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n" 3723 "<!ELEMENT root (#PCDATA|a)* >\n" 3724 "]>\n" 3725 "<root></root>"; 3726 ExtTest dtd_data = {"%pe2;", NULL, NULL}; 3727 3728 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3729 XML_SetUserData(g_parser, &dtd_data); 3730 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3731 XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler); 3732 init_dummy_handlers(); 3733 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3734 == XML_STATUS_ERROR) 3735 xml_failure(g_parser); 3736 if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG) 3737 fail("Skip handler not executed"); 3738 } 3739 END_TEST 3740 3741 /* Test recursive parameter entity definition rejected in external DTD */ 3742 START_TEST(test_recursive_external_parameter_entity) { 3743 const char *text = "<?xml version='1.0'?>\n" 3744 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n" 3745 "<!ELEMENT root (#PCDATA|a)* >\n" 3746 "]>\n" 3747 "<root></root>"; 3748 ExtFaults dtd_data = {"<!ENTITY % pe2 '%pe2;'>\n%pe2;", 3749 "Recursive external parameter entity not faulted", NULL, 3750 XML_ERROR_RECURSIVE_ENTITY_REF}; 3751 3752 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 3753 XML_SetUserData(g_parser, &dtd_data); 3754 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3755 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3756 "Recursive external parameter not spotted"); 3757 } 3758 END_TEST 3759 3760 /* Test undefined parameter entity in external entity handler */ 3761 START_TEST(test_undefined_ext_entity_in_external_dtd) { 3762 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3763 "<doc></doc>\n"; 3764 3765 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3766 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer); 3767 XML_SetUserData(g_parser, NULL); 3768 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3769 == XML_STATUS_ERROR) 3770 xml_failure(g_parser); 3771 3772 /* Now repeat without the external entity ref handler invoking 3773 * another copy of itself. 3774 */ 3775 XML_ParserReset(g_parser, NULL); 3776 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3777 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer); 3778 XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */ 3779 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3780 == XML_STATUS_ERROR) 3781 xml_failure(g_parser); 3782 } 3783 END_TEST 3784 3785 /* Test suspending the parse on receiving an XML declaration works */ 3786 START_TEST(test_suspend_xdecl) { 3787 const char *text = long_character_data_text; 3788 3789 XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler); 3790 XML_SetUserData(g_parser, g_parser); 3791 g_resumable = XML_TRUE; 3792 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 3793 // we won't know exactly how much input we actually managed to give Expat. 3794 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 3795 != XML_STATUS_SUSPENDED) 3796 xml_failure(g_parser); 3797 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 3798 xml_failure(g_parser); 3799 /* Attempt to start a new parse while suspended */ 3800 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3801 != XML_STATUS_ERROR) 3802 fail("Attempt to parse while suspended not faulted"); 3803 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) 3804 fail("Suspended parse not faulted with correct error"); 3805 } 3806 END_TEST 3807 3808 /* Test aborting the parse in an epilog works */ 3809 START_TEST(test_abort_epilog) { 3810 const char *text = "<doc></doc>\n\r\n"; 3811 XML_Char trigger_char = XCS('\r'); 3812 3813 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 3814 XML_SetUserData(g_parser, &trigger_char); 3815 g_resumable = XML_FALSE; 3816 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3817 != XML_STATUS_ERROR) 3818 fail("Abort not triggered"); 3819 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED) 3820 xml_failure(g_parser); 3821 } 3822 END_TEST 3823 3824 /* Test a different code path for abort in the epilog */ 3825 START_TEST(test_abort_epilog_2) { 3826 const char *text = "<doc></doc>\n"; 3827 XML_Char trigger_char = XCS('\n'); 3828 3829 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 3830 XML_SetUserData(g_parser, &trigger_char); 3831 g_resumable = XML_FALSE; 3832 expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered"); 3833 } 3834 END_TEST 3835 3836 /* Test suspension from the epilog */ 3837 START_TEST(test_suspend_epilog) { 3838 const char *text = "<doc></doc>\n"; 3839 XML_Char trigger_char = XCS('\n'); 3840 3841 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 3842 XML_SetUserData(g_parser, &trigger_char); 3843 g_resumable = XML_TRUE; 3844 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3845 != XML_STATUS_SUSPENDED) 3846 xml_failure(g_parser); 3847 } 3848 END_TEST 3849 3850 START_TEST(test_suspend_in_sole_empty_tag) { 3851 const char *text = "<doc/>"; 3852 enum XML_Status rc; 3853 3854 XML_SetEndElementHandler(g_parser, suspending_end_handler); 3855 XML_SetUserData(g_parser, g_parser); 3856 rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE); 3857 if (rc == XML_STATUS_ERROR) 3858 xml_failure(g_parser); 3859 else if (rc != XML_STATUS_SUSPENDED) 3860 fail("Suspend not triggered"); 3861 rc = XML_ResumeParser(g_parser); 3862 if (rc == XML_STATUS_ERROR) 3863 xml_failure(g_parser); 3864 else if (rc != XML_STATUS_OK) 3865 fail("Resume failed"); 3866 } 3867 END_TEST 3868 3869 START_TEST(test_unfinished_epilog) { 3870 const char *text = "<doc></doc><"; 3871 3872 expect_failure(text, XML_ERROR_UNCLOSED_TOKEN, 3873 "Incomplete epilog entry not faulted"); 3874 } 3875 END_TEST 3876 3877 START_TEST(test_partial_char_in_epilog) { 3878 const char *text = "<doc></doc>\xe2\x82"; 3879 3880 /* First check that no fault is raised if the parse is not finished */ 3881 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 3882 == XML_STATUS_ERROR) 3883 xml_failure(g_parser); 3884 /* Now check that it is faulted once we finish */ 3885 if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR) 3886 fail("Partial character in epilog not faulted"); 3887 if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR) 3888 xml_failure(g_parser); 3889 } 3890 END_TEST 3891 3892 /* Test resuming a parse suspended in entity substitution */ 3893 START_TEST(test_suspend_resume_internal_entity) { 3894 const char *text 3895 = "<!DOCTYPE doc [\n" 3896 "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n" 3897 "]>\n" 3898 "<doc>&foo;</doc>\n"; 3899 const XML_Char *expected1 = XCS("Hi"); 3900 const XML_Char *expected2 = XCS("HiHo"); 3901 CharData storage; 3902 3903 CharData_Init(&storage); 3904 XML_SetStartElementHandler(g_parser, start_element_suspender); 3905 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 3906 XML_SetUserData(g_parser, &storage); 3907 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 3908 // we won't know exactly how much input we actually managed to give Expat. 3909 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 3910 != XML_STATUS_SUSPENDED) 3911 xml_failure(g_parser); 3912 CharData_CheckXMLChars(&storage, XCS("")); 3913 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED) 3914 xml_failure(g_parser); 3915 CharData_CheckXMLChars(&storage, expected1); 3916 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 3917 xml_failure(g_parser); 3918 CharData_CheckXMLChars(&storage, expected2); 3919 } 3920 END_TEST 3921 3922 START_TEST(test_suspend_resume_internal_entity_issue_629) { 3923 const char *const text 3924 = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n" 3925 "<" 3926 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3927 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3928 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3929 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3930 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3931 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3932 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3933 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3934 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3935 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3936 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3937 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3938 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3939 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3940 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3941 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3942 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3943 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3944 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3945 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3946 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3947 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3948 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3949 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3950 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3951 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3952 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3953 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3954 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3955 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3956 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3957 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3958 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3959 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3960 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3961 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3962 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3963 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3964 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3965 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3966 "/>" 3967 "</b></a>"; 3968 const size_t firstChunkSizeBytes = 54; 3969 3970 XML_Parser parser = XML_ParserCreate(NULL); 3971 XML_SetUserData(parser, parser); 3972 XML_SetCommentHandler(parser, suspending_comment_handler); 3973 3974 if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE) 3975 != XML_STATUS_SUSPENDED) 3976 xml_failure(parser); 3977 if (XML_ResumeParser(parser) != XML_STATUS_OK) 3978 xml_failure(parser); 3979 if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes, 3980 (int)(strlen(text) - firstChunkSizeBytes), 3981 XML_TRUE) 3982 != XML_STATUS_OK) 3983 xml_failure(parser); 3984 XML_ParserFree(parser); 3985 } 3986 END_TEST 3987 3988 /* Test syntax error is caught at parse resumption */ 3989 START_TEST(test_resume_entity_with_syntax_error) { 3990 if (g_chunkSize != 0) { 3991 // this test does not use SINGLE_BYTES, because of suspension 3992 return; 3993 } 3994 3995 const char *text = "<!DOCTYPE doc [\n" 3996 "<!ENTITY foo '<suspend>Hi</wombat>'>\n" 3997 "]>\n" 3998 "<doc>&foo;</doc>\n"; 3999 4000 XML_SetStartElementHandler(g_parser, start_element_suspender); 4001 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 4002 // we won't know exactly how much input we actually managed to give Expat. 4003 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 4004 != XML_STATUS_SUSPENDED) 4005 xml_failure(g_parser); 4006 if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR) 4007 fail("Syntax error in entity not faulted"); 4008 if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH) 4009 xml_failure(g_parser); 4010 } 4011 END_TEST 4012 4013 /* Test suspending and resuming in a parameter entity substitution */ 4014 START_TEST(test_suspend_resume_parameter_entity) { 4015 const char *text = "<!DOCTYPE doc [\n" 4016 "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n" 4017 "%foo;\n" 4018 "]>\n" 4019 "<doc>Hello, world</doc>"; 4020 const XML_Char *expected = XCS("Hello, world"); 4021 CharData storage; 4022 4023 CharData_Init(&storage); 4024 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4025 XML_SetElementDeclHandler(g_parser, element_decl_suspender); 4026 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 4027 XML_SetUserData(g_parser, &storage); 4028 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 4029 != XML_STATUS_SUSPENDED) 4030 xml_failure(g_parser); 4031 CharData_CheckXMLChars(&storage, XCS("")); 4032 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 4033 xml_failure(g_parser); 4034 CharData_CheckXMLChars(&storage, expected); 4035 } 4036 END_TEST 4037 4038 /* Test attempting to use parser after an error is faulted */ 4039 START_TEST(test_restart_on_error) { 4040 const char *text = "<$doc><doc></doc>"; 4041 4042 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4043 != XML_STATUS_ERROR) 4044 fail("Invalid tag name not faulted"); 4045 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 4046 xml_failure(g_parser); 4047 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 4048 fail("Restarting invalid parse not faulted"); 4049 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 4050 xml_failure(g_parser); 4051 } 4052 END_TEST 4053 4054 /* Test that angle brackets in an attribute default value are faulted */ 4055 START_TEST(test_reject_lt_in_attribute_value) { 4056 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n" 4057 "<doc></doc>"; 4058 4059 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4060 "Bad attribute default not faulted"); 4061 } 4062 END_TEST 4063 4064 START_TEST(test_reject_unfinished_param_in_att_value) { 4065 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n" 4066 "<doc></doc>"; 4067 4068 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4069 "Bad attribute default not faulted"); 4070 } 4071 END_TEST 4072 4073 START_TEST(test_trailing_cr_in_att_value) { 4074 const char *text = "<doc a='value\r'/>"; 4075 4076 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4077 == XML_STATUS_ERROR) 4078 xml_failure(g_parser); 4079 } 4080 END_TEST 4081 4082 /* Try parsing a general entity within a parameter entity in a 4083 * standalone internal DTD. Covers a corner case in the parser. 4084 */ 4085 START_TEST(test_standalone_internal_entity) { 4086 const char *text = "<?xml version='1.0' standalone='yes' ?>\n" 4087 "<!DOCTYPE doc [\n" 4088 " <!ELEMENT doc (#PCDATA)>\n" 4089 " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"≥\">'>\n" 4090 " <!ENTITY ge 'AttDefaultValue'>\n" 4091 " %pe;\n" 4092 "]>\n" 4093 "<doc att2='any'/>"; 4094 4095 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4096 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4097 == XML_STATUS_ERROR) 4098 xml_failure(g_parser); 4099 } 4100 END_TEST 4101 4102 /* Test that a reference to an unknown external entity is skipped */ 4103 START_TEST(test_skipped_external_entity) { 4104 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" 4105 "<doc></doc>\n"; 4106 ExtTest test_data = {"<!ELEMENT doc EMPTY>\n" 4107 "<!ENTITY % e2 '%e1;'>\n", 4108 NULL, NULL}; 4109 4110 XML_SetUserData(g_parser, &test_data); 4111 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4112 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 4113 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4114 == XML_STATUS_ERROR) 4115 xml_failure(g_parser); 4116 } 4117 END_TEST 4118 4119 /* Test a different form of unknown external entity */ 4120 START_TEST(test_skipped_null_loaded_ext_entity) { 4121 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" 4122 "<doc />"; 4123 ExtHdlrData test_data 4124 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n" 4125 "<!ENTITY % pe2 '%pe1;'>\n" 4126 "%pe2;\n", 4127 external_entity_null_loader, NULL}; 4128 4129 XML_SetUserData(g_parser, &test_data); 4130 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4131 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader); 4132 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4133 == XML_STATUS_ERROR) 4134 xml_failure(g_parser); 4135 } 4136 END_TEST 4137 4138 START_TEST(test_skipped_unloaded_ext_entity) { 4139 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" 4140 "<doc />"; 4141 ExtHdlrData test_data 4142 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n" 4143 "<!ENTITY % pe2 '%pe1;'>\n" 4144 "%pe2;\n", 4145 NULL, NULL}; 4146 4147 XML_SetUserData(g_parser, &test_data); 4148 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4149 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader); 4150 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4151 == XML_STATUS_ERROR) 4152 xml_failure(g_parser); 4153 } 4154 END_TEST 4155 4156 /* Test that a parameter entity value ending with a carriage return 4157 * has it translated internally into a newline. 4158 */ 4159 START_TEST(test_param_entity_with_trailing_cr) { 4160 #define PARAM_ENTITY_NAME "pe" 4161 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">" 4162 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" 4163 "<doc/>"; 4164 ExtTest test_data 4165 = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n" 4166 "%" PARAM_ENTITY_NAME ";\n", 4167 NULL, NULL}; 4168 4169 XML_SetUserData(g_parser, &test_data); 4170 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4171 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 4172 XML_SetEntityDeclHandler(g_parser, param_entity_match_handler); 4173 param_entity_match_init(XCS(PARAM_ENTITY_NAME), 4174 XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n")); 4175 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4176 == XML_STATUS_ERROR) 4177 xml_failure(g_parser); 4178 int entity_match_flag = get_param_entity_match_flag(); 4179 if (entity_match_flag == ENTITY_MATCH_FAIL) 4180 fail("Parameter entity CR->NEWLINE conversion failed"); 4181 else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND) 4182 fail("Parameter entity not parsed"); 4183 } 4184 #undef PARAM_ENTITY_NAME 4185 #undef PARAM_ENTITY_CORE_VALUE 4186 END_TEST 4187 4188 START_TEST(test_invalid_character_entity) { 4189 const char *text = "<!DOCTYPE doc [\n" 4190 " <!ENTITY entity '�'>\n" 4191 "]>\n" 4192 "<doc>&entity;</doc>"; 4193 4194 expect_failure(text, XML_ERROR_BAD_CHAR_REF, 4195 "Out of range character reference not faulted"); 4196 } 4197 END_TEST 4198 4199 START_TEST(test_invalid_character_entity_2) { 4200 const char *text = "<!DOCTYPE doc [\n" 4201 " <!ENTITY entity '&#xg0;'>\n" 4202 "]>\n" 4203 "<doc>&entity;</doc>"; 4204 4205 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4206 "Out of range character reference not faulted"); 4207 } 4208 END_TEST 4209 4210 START_TEST(test_invalid_character_entity_3) { 4211 const char text[] = 4212 /* <!DOCTYPE doc [\n */ 4213 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" 4214 /* U+0E04 = KHO KHWAI 4215 * U+0E08 = CHO CHAN */ 4216 /* <!ENTITY entity '&\u0e04\u0e08;'>\n */ 4217 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 " 4218 "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n" 4219 /* ]>\n */ 4220 "\0]\0>\0\n" 4221 /* <doc>&entity;</doc> */ 4222 "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>"; 4223 4224 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4225 != XML_STATUS_ERROR) 4226 fail("Invalid start of entity name not faulted"); 4227 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY) 4228 xml_failure(g_parser); 4229 } 4230 END_TEST 4231 4232 START_TEST(test_invalid_character_entity_4) { 4233 const char *text = "<!DOCTYPE doc [\n" 4234 " <!ENTITY entity '�'>\n" /* = � */ 4235 "]>\n" 4236 "<doc>&entity;</doc>"; 4237 4238 expect_failure(text, XML_ERROR_BAD_CHAR_REF, 4239 "Out of range character reference not faulted"); 4240 } 4241 END_TEST 4242 4243 /* Test that processing instructions are picked up by a default handler */ 4244 START_TEST(test_pi_handled_in_default) { 4245 const char *text = "<?test processing instruction?>\n<doc/>"; 4246 const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>"); 4247 CharData storage; 4248 4249 CharData_Init(&storage); 4250 XML_SetDefaultHandler(g_parser, accumulate_characters); 4251 XML_SetUserData(g_parser, &storage); 4252 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4253 == XML_STATUS_ERROR) 4254 xml_failure(g_parser); 4255 CharData_CheckXMLChars(&storage, expected); 4256 } 4257 END_TEST 4258 4259 /* Test that comments are picked up by a default handler */ 4260 START_TEST(test_comment_handled_in_default) { 4261 const char *text = "<!-- This is a comment -->\n<doc/>"; 4262 const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>"); 4263 CharData storage; 4264 4265 CharData_Init(&storage); 4266 XML_SetDefaultHandler(g_parser, accumulate_characters); 4267 XML_SetUserData(g_parser, &storage); 4268 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4269 == XML_STATUS_ERROR) 4270 xml_failure(g_parser); 4271 CharData_CheckXMLChars(&storage, expected); 4272 } 4273 END_TEST 4274 4275 /* Test PIs that look almost but not quite like XML declarations */ 4276 START_TEST(test_pi_yml) { 4277 const char *text = "<?yml something like data?><doc/>"; 4278 const XML_Char *expected = XCS("yml: something like data\n"); 4279 CharData storage; 4280 4281 CharData_Init(&storage); 4282 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4283 XML_SetUserData(g_parser, &storage); 4284 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4285 == XML_STATUS_ERROR) 4286 xml_failure(g_parser); 4287 CharData_CheckXMLChars(&storage, expected); 4288 } 4289 END_TEST 4290 4291 START_TEST(test_pi_xnl) { 4292 const char *text = "<?xnl nothing like data?><doc/>"; 4293 const XML_Char *expected = XCS("xnl: nothing like data\n"); 4294 CharData storage; 4295 4296 CharData_Init(&storage); 4297 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4298 XML_SetUserData(g_parser, &storage); 4299 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4300 == XML_STATUS_ERROR) 4301 xml_failure(g_parser); 4302 CharData_CheckXMLChars(&storage, expected); 4303 } 4304 END_TEST 4305 4306 START_TEST(test_pi_xmm) { 4307 const char *text = "<?xmm everything like data?><doc/>"; 4308 const XML_Char *expected = XCS("xmm: everything like data\n"); 4309 CharData storage; 4310 4311 CharData_Init(&storage); 4312 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4313 XML_SetUserData(g_parser, &storage); 4314 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4315 == XML_STATUS_ERROR) 4316 xml_failure(g_parser); 4317 CharData_CheckXMLChars(&storage, expected); 4318 } 4319 END_TEST 4320 4321 START_TEST(test_utf16_pi) { 4322 const char text[] = 4323 /* <?{KHO KHWAI}{CHO CHAN}?> 4324 * where {KHO KHWAI} = U+0E04 4325 * and {CHO CHAN} = U+0E08 4326 */ 4327 "<\0?\0\x04\x0e\x08\x0e?\0>\0" 4328 /* <q/> */ 4329 "<\0q\0/\0>\0"; 4330 #ifdef XML_UNICODE 4331 const XML_Char *expected = XCS("\x0e04\x0e08: \n"); 4332 #else 4333 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n"); 4334 #endif 4335 CharData storage; 4336 4337 CharData_Init(&storage); 4338 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4339 XML_SetUserData(g_parser, &storage); 4340 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4341 == XML_STATUS_ERROR) 4342 xml_failure(g_parser); 4343 CharData_CheckXMLChars(&storage, expected); 4344 } 4345 END_TEST 4346 4347 START_TEST(test_utf16_be_pi) { 4348 const char text[] = 4349 /* <?{KHO KHWAI}{CHO CHAN}?> 4350 * where {KHO KHWAI} = U+0E04 4351 * and {CHO CHAN} = U+0E08 4352 */ 4353 "\0<\0?\x0e\x04\x0e\x08\0?\0>" 4354 /* <q/> */ 4355 "\0<\0q\0/\0>"; 4356 #ifdef XML_UNICODE 4357 const XML_Char *expected = XCS("\x0e04\x0e08: \n"); 4358 #else 4359 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n"); 4360 #endif 4361 CharData storage; 4362 4363 CharData_Init(&storage); 4364 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4365 XML_SetUserData(g_parser, &storage); 4366 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4367 == XML_STATUS_ERROR) 4368 xml_failure(g_parser); 4369 CharData_CheckXMLChars(&storage, expected); 4370 } 4371 END_TEST 4372 4373 /* Test that comments can be picked up and translated */ 4374 START_TEST(test_utf16_be_comment) { 4375 const char text[] = 4376 /* <!-- Comment A --> */ 4377 "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n" 4378 /* <doc/> */ 4379 "\0<\0d\0o\0c\0/\0>"; 4380 const XML_Char *expected = XCS(" Comment A "); 4381 CharData storage; 4382 4383 CharData_Init(&storage); 4384 XML_SetCommentHandler(g_parser, accumulate_comment); 4385 XML_SetUserData(g_parser, &storage); 4386 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4387 == XML_STATUS_ERROR) 4388 xml_failure(g_parser); 4389 CharData_CheckXMLChars(&storage, expected); 4390 } 4391 END_TEST 4392 4393 START_TEST(test_utf16_le_comment) { 4394 const char text[] = 4395 /* <!-- Comment B --> */ 4396 "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0" 4397 /* <doc/> */ 4398 "<\0d\0o\0c\0/\0>\0"; 4399 const XML_Char *expected = XCS(" Comment B "); 4400 CharData storage; 4401 4402 CharData_Init(&storage); 4403 XML_SetCommentHandler(g_parser, accumulate_comment); 4404 XML_SetUserData(g_parser, &storage); 4405 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4406 == XML_STATUS_ERROR) 4407 xml_failure(g_parser); 4408 CharData_CheckXMLChars(&storage, expected); 4409 } 4410 END_TEST 4411 4412 /* Test that the unknown encoding handler with map entries that expect 4413 * conversion but no conversion function is faulted 4414 */ 4415 START_TEST(test_missing_encoding_conversion_fn) { 4416 const char *text = "<?xml version='1.0' encoding='no-conv'?>\n" 4417 "<doc>\x81</doc>"; 4418 4419 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4420 /* MiscEncodingHandler sets up an encoding with every top-bit-set 4421 * character introducing a two-byte sequence. For this, it 4422 * requires a convert function. The above function call doesn't 4423 * pass one through, so when BadEncodingHandler actually gets 4424 * called it should supply an invalid encoding. 4425 */ 4426 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4427 "Encoding with missing convert() not faulted"); 4428 } 4429 END_TEST 4430 4431 START_TEST(test_failing_encoding_conversion_fn) { 4432 const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n" 4433 "<doc>\x81</doc>"; 4434 4435 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4436 /* BadEncodingHandler sets up an encoding with every top-bit-set 4437 * character introducing a two-byte sequence. For this, it 4438 * requires a convert function. The above function call passes 4439 * one that insists all possible sequences are invalid anyway. 4440 */ 4441 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4442 "Encoding with failing convert() not faulted"); 4443 } 4444 END_TEST 4445 4446 /* Test unknown encoding conversions */ 4447 START_TEST(test_unknown_encoding_success) { 4448 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4449 /* Equivalent to <eoc>Hello, world</eoc> */ 4450 "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>"; 4451 4452 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4453 run_character_check(text, XCS("Hello, world")); 4454 } 4455 END_TEST 4456 4457 /* Test bad name character in unknown encoding */ 4458 START_TEST(test_unknown_encoding_bad_name) { 4459 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4460 "<\xff\x64oc>Hello, world</\xff\x64oc>"; 4461 4462 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4463 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4464 "Bad name start in unknown encoding not faulted"); 4465 } 4466 END_TEST 4467 4468 /* Test bad mid-name character in unknown encoding */ 4469 START_TEST(test_unknown_encoding_bad_name_2) { 4470 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4471 "<d\xffoc>Hello, world</d\xffoc>"; 4472 4473 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4474 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4475 "Bad name in unknown encoding not faulted"); 4476 } 4477 END_TEST 4478 4479 /* Test element name that is long enough to fill the conversion buffer 4480 * in an unknown encoding, finishing with an encoded character. 4481 */ 4482 START_TEST(test_unknown_encoding_long_name_1) { 4483 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4484 "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>" 4485 "Hi" 4486 "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"; 4487 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop"); 4488 CharData storage; 4489 4490 CharData_Init(&storage); 4491 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4492 XML_SetStartElementHandler(g_parser, record_element_start_handler); 4493 XML_SetUserData(g_parser, &storage); 4494 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4495 == XML_STATUS_ERROR) 4496 xml_failure(g_parser); 4497 CharData_CheckXMLChars(&storage, expected); 4498 } 4499 END_TEST 4500 4501 /* Test element name that is long enough to fill the conversion buffer 4502 * in an unknown encoding, finishing with an simple character. 4503 */ 4504 START_TEST(test_unknown_encoding_long_name_2) { 4505 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4506 "<abcdefghabcdefghabcdefghijklmnop>" 4507 "Hi" 4508 "</abcdefghabcdefghabcdefghijklmnop>"; 4509 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop"); 4510 CharData storage; 4511 4512 CharData_Init(&storage); 4513 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4514 XML_SetStartElementHandler(g_parser, record_element_start_handler); 4515 XML_SetUserData(g_parser, &storage); 4516 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4517 == XML_STATUS_ERROR) 4518 xml_failure(g_parser); 4519 CharData_CheckXMLChars(&storage, expected); 4520 } 4521 END_TEST 4522 4523 START_TEST(test_invalid_unknown_encoding) { 4524 const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n" 4525 "<doc>Hello world</doc>"; 4526 4527 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4528 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4529 "Invalid unknown encoding not faulted"); 4530 } 4531 END_TEST 4532 4533 START_TEST(test_unknown_ascii_encoding_ok) { 4534 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n" 4535 "<doc>Hello, world</doc>"; 4536 4537 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4538 run_character_check(text, XCS("Hello, world")); 4539 } 4540 END_TEST 4541 4542 START_TEST(test_unknown_ascii_encoding_fail) { 4543 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n" 4544 "<doc>Hello, \x80 world</doc>"; 4545 4546 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4547 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4548 "Invalid character not faulted"); 4549 } 4550 END_TEST 4551 4552 START_TEST(test_unknown_encoding_invalid_length) { 4553 const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n" 4554 "<doc>Hello, world</doc>"; 4555 4556 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4557 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4558 "Invalid unknown encoding not faulted"); 4559 } 4560 END_TEST 4561 4562 START_TEST(test_unknown_encoding_invalid_topbit) { 4563 const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n" 4564 "<doc>Hello, world</doc>"; 4565 4566 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4567 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4568 "Invalid unknown encoding not faulted"); 4569 } 4570 END_TEST 4571 4572 START_TEST(test_unknown_encoding_invalid_surrogate) { 4573 const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n" 4574 "<doc>Hello, \x82 world</doc>"; 4575 4576 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4577 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4578 "Invalid unknown encoding not faulted"); 4579 } 4580 END_TEST 4581 4582 START_TEST(test_unknown_encoding_invalid_high) { 4583 const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n" 4584 "<doc>Hello, world</doc>"; 4585 4586 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4587 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4588 "Invalid unknown encoding not faulted"); 4589 } 4590 END_TEST 4591 4592 START_TEST(test_unknown_encoding_invalid_attr_value) { 4593 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4594 "<doc attr='\xff\x30'/>"; 4595 4596 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4597 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4598 "Invalid attribute valid not faulted"); 4599 } 4600 END_TEST 4601 4602 START_TEST(test_unknown_encoding_user_data_primary) { 4603 // This test is based on ideas contributed by Artiphishell Inc. 4604 const char *const text = "<?xml version='1.0' encoding='x-unk'?>\n" 4605 "<root />\n"; 4606 XML_Parser parser = XML_ParserCreate(NULL); 4607 XML_SetUnknownEncodingHandler(parser, 4608 user_data_checking_unknown_encoding_handler, 4609 (void *)(intptr_t)0xC0FFEE); 4610 4611 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 4612 == XML_STATUS_OK); 4613 4614 XML_ParserFree(parser); 4615 } 4616 END_TEST 4617 4618 START_TEST(test_unknown_encoding_user_data_secondary) { 4619 // This test is based on ideas contributed by Artiphishell Inc. 4620 const char *const text_main = "<!DOCTYPE r [\n" 4621 " <!ENTITY ext SYSTEM 'ext.ent'>\n" 4622 "]>\n" 4623 "<r>&ext;</r>\n"; 4624 const char *const text_external = "<?xml version='1.0' encoding='x-unk'?>\n" 4625 "<e>data</e>"; 4626 ExtTest2 test_data = {text_external, (int)strlen(text_external), NULL, NULL}; 4627 XML_Parser parser = XML_ParserCreate(NULL); 4628 XML_SetExternalEntityRefHandler(parser, external_entity_loader2); 4629 XML_SetUnknownEncodingHandler(parser, 4630 user_data_checking_unknown_encoding_handler, 4631 (void *)(intptr_t)0xC0FFEE); 4632 XML_SetUserData(parser, &test_data); 4633 4634 assert_true(_XML_Parse_SINGLE_BYTES(parser, text_main, (int)strlen(text_main), 4635 XML_TRUE) 4636 == XML_STATUS_OK); 4637 4638 XML_ParserFree(parser); 4639 } 4640 END_TEST 4641 4642 /* Test an external entity parser set to use latin-1 detects UTF-16 4643 * BOMs correctly. 4644 */ 4645 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */ 4646 START_TEST(test_ext_entity_latin1_utf16le_bom) { 4647 const char *text = "<!DOCTYPE doc [\n" 4648 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4649 "]>\n" 4650 "<doc>&en;</doc>"; 4651 ExtTest2 test_data 4652 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4653 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4654 * 0x4c = L and 0x20 is a space 4655 */ 4656 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL}; 4657 #ifdef XML_UNICODE 4658 const XML_Char *expected = XCS("\x00ff\x00feL "); 4659 #else 4660 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4661 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL "); 4662 #endif 4663 CharData storage; 4664 4665 CharData_Init(&storage); 4666 test_data.storage = &storage; 4667 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4668 XML_SetUserData(g_parser, &test_data); 4669 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4670 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4671 == XML_STATUS_ERROR) 4672 xml_failure(g_parser); 4673 CharData_CheckXMLChars(&storage, expected); 4674 } 4675 END_TEST 4676 4677 START_TEST(test_ext_entity_latin1_utf16be_bom) { 4678 const char *text = "<!DOCTYPE doc [\n" 4679 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4680 "]>\n" 4681 "<doc>&en;</doc>"; 4682 ExtTest2 test_data 4683 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4684 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4685 * 0x4c = L and 0x20 is a space 4686 */ 4687 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL}; 4688 #ifdef XML_UNICODE 4689 const XML_Char *expected = XCS("\x00fe\x00ff L"); 4690 #else 4691 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4692 const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L"); 4693 #endif 4694 CharData storage; 4695 4696 CharData_Init(&storage); 4697 test_data.storage = &storage; 4698 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4699 XML_SetUserData(g_parser, &test_data); 4700 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4701 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4702 == XML_STATUS_ERROR) 4703 xml_failure(g_parser); 4704 CharData_CheckXMLChars(&storage, expected); 4705 } 4706 END_TEST 4707 4708 /* Parsing the full buffer rather than a byte at a time makes a 4709 * difference to the encoding scanning code, so repeat the above tests 4710 * without breaking them down by byte. 4711 */ 4712 START_TEST(test_ext_entity_latin1_utf16le_bom2) { 4713 const char *text = "<!DOCTYPE doc [\n" 4714 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4715 "]>\n" 4716 "<doc>&en;</doc>"; 4717 ExtTest2 test_data 4718 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4719 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4720 * 0x4c = L and 0x20 is a space 4721 */ 4722 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL}; 4723 #ifdef XML_UNICODE 4724 const XML_Char *expected = XCS("\x00ff\x00feL "); 4725 #else 4726 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4727 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL "); 4728 #endif 4729 CharData storage; 4730 4731 CharData_Init(&storage); 4732 test_data.storage = &storage; 4733 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4734 XML_SetUserData(g_parser, &test_data); 4735 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4736 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4737 == XML_STATUS_ERROR) 4738 xml_failure(g_parser); 4739 CharData_CheckXMLChars(&storage, expected); 4740 } 4741 END_TEST 4742 4743 START_TEST(test_ext_entity_latin1_utf16be_bom2) { 4744 const char *text = "<!DOCTYPE doc [\n" 4745 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4746 "]>\n" 4747 "<doc>&en;</doc>"; 4748 ExtTest2 test_data 4749 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4750 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4751 * 0x4c = L and 0x20 is a space 4752 */ 4753 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL}; 4754 #ifdef XML_UNICODE 4755 const XML_Char *expected = XCS("\x00fe\x00ff L"); 4756 #else 4757 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4758 const XML_Char *expected = "\xc3\xbe\xc3\xbf L"; 4759 #endif 4760 CharData storage; 4761 4762 CharData_Init(&storage); 4763 test_data.storage = &storage; 4764 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4765 XML_SetUserData(g_parser, &test_data); 4766 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4767 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4768 == XML_STATUS_ERROR) 4769 xml_failure(g_parser); 4770 CharData_CheckXMLChars(&storage, expected); 4771 } 4772 END_TEST 4773 4774 /* Test little-endian UTF-16 given an explicit big-endian encoding */ 4775 START_TEST(test_ext_entity_utf16_be) { 4776 const char *text = "<!DOCTYPE doc [\n" 4777 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4778 "]>\n" 4779 "<doc>&en;</doc>"; 4780 ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL}; 4781 #ifdef XML_UNICODE 4782 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00"); 4783 #else 4784 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */ 4785 "\xe6\x94\x80" /* U+6500 */ 4786 "\xe2\xbc\x80" /* U+2F00 */ 4787 "\xe3\xb8\x80"); /* U+3E00 */ 4788 #endif 4789 CharData storage; 4790 4791 CharData_Init(&storage); 4792 test_data.storage = &storage; 4793 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4794 XML_SetUserData(g_parser, &test_data); 4795 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4796 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4797 == XML_STATUS_ERROR) 4798 xml_failure(g_parser); 4799 CharData_CheckXMLChars(&storage, expected); 4800 } 4801 END_TEST 4802 4803 /* Test big-endian UTF-16 given an explicit little-endian encoding */ 4804 START_TEST(test_ext_entity_utf16_le) { 4805 const char *text = "<!DOCTYPE doc [\n" 4806 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4807 "]>\n" 4808 "<doc>&en;</doc>"; 4809 ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL}; 4810 #ifdef XML_UNICODE 4811 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00"); 4812 #else 4813 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */ 4814 "\xe6\x94\x80" /* U+6500 */ 4815 "\xe2\xbc\x80" /* U+2F00 */ 4816 "\xe3\xb8\x80"); /* U+3E00 */ 4817 #endif 4818 CharData storage; 4819 4820 CharData_Init(&storage); 4821 test_data.storage = &storage; 4822 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4823 XML_SetUserData(g_parser, &test_data); 4824 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4825 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4826 == XML_STATUS_ERROR) 4827 xml_failure(g_parser); 4828 CharData_CheckXMLChars(&storage, expected); 4829 } 4830 END_TEST 4831 4832 /* Test little-endian UTF-16 given no explicit encoding. 4833 * The existing default encoding (UTF-8) is assumed to hold without a 4834 * BOM to contradict it, so the entity value will in fact provoke an 4835 * error because 0x00 is not a valid XML character. We parse the 4836 * whole buffer in one go rather than feeding it in byte by byte to 4837 * exercise different code paths in the initial scanning routines. 4838 */ 4839 START_TEST(test_ext_entity_utf16_unknown) { 4840 const char *text = "<!DOCTYPE doc [\n" 4841 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4842 "]>\n" 4843 "<doc>&en;</doc>"; 4844 ExtFaults2 test_data 4845 = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL, 4846 XML_ERROR_INVALID_TOKEN}; 4847 4848 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2); 4849 XML_SetUserData(g_parser, &test_data); 4850 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 4851 "Invalid character should not have been accepted"); 4852 } 4853 END_TEST 4854 4855 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */ 4856 START_TEST(test_ext_entity_utf8_non_bom) { 4857 const char *text = "<!DOCTYPE doc [\n" 4858 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4859 "]>\n" 4860 "<doc>&en;</doc>"; 4861 ExtTest2 test_data 4862 = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */ 4863 3, NULL, NULL}; 4864 #ifdef XML_UNICODE 4865 const XML_Char *expected = XCS("\xfec0"); 4866 #else 4867 const XML_Char *expected = XCS("\xef\xbb\x80"); 4868 #endif 4869 CharData storage; 4870 4871 CharData_Init(&storage); 4872 test_data.storage = &storage; 4873 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4874 XML_SetUserData(g_parser, &test_data); 4875 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4876 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4877 == XML_STATUS_ERROR) 4878 xml_failure(g_parser); 4879 CharData_CheckXMLChars(&storage, expected); 4880 } 4881 END_TEST 4882 4883 /* Test that UTF-8 in a CDATA section is correctly passed through */ 4884 START_TEST(test_utf8_in_cdata_section) { 4885 const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>"; 4886 #ifdef XML_UNICODE 4887 const XML_Char *expected = XCS("one \x00e9 two"); 4888 #else 4889 const XML_Char *expected = XCS("one \xc3\xa9 two"); 4890 #endif 4891 4892 run_character_check(text, expected); 4893 } 4894 END_TEST 4895 4896 /* Test that little-endian UTF-16 in a CDATA section is handled */ 4897 START_TEST(test_utf8_in_cdata_section_2) { 4898 const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>"; 4899 #ifdef XML_UNICODE 4900 const XML_Char *expected = XCS("\x00e9]\x00e9two"); 4901 #else 4902 const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two"); 4903 #endif 4904 4905 run_character_check(text, expected); 4906 } 4907 END_TEST 4908 4909 START_TEST(test_utf8_in_start_tags) { 4910 struct test_case { 4911 bool goodName; 4912 bool goodNameStart; 4913 const char *tagName; 4914 }; 4915 4916 // The idea with the tests below is this: 4917 // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences 4918 // go to isNever and are hence not a concern. 4919 // 4920 // We start with a character that is a valid name character 4921 // (or even name-start character, see XML 1.0r4 spec) and then we flip 4922 // single bits at places where (1) the result leaves the UTF-8 encoding space 4923 // and (2) we stay in the same n-byte sequence family. 4924 // 4925 // The flipped bits are highlighted in angle brackets in comments, 4926 // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped 4927 // the most significant bit to 1 to leave UTF-8 encoding space. 4928 struct test_case cases[] = { 4929 // 1-byte UTF-8: [0xxx xxxx] 4930 {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':' 4931 {false, false, "\xBA"}, // [<1>011 1010] 4932 {true, false, "\x39"}, // [0011 1001] = ASCII nine '9' 4933 {false, false, "\xB9"}, // [<1>011 1001] 4934 4935 // 2-byte UTF-8: [110x xxxx] [10xx xxxx] 4936 {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] = 4937 // Arabic small waw U+06E5 4938 {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101] 4939 {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101] 4940 {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101] 4941 {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] = 4942 // combining char U+0301 4943 {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001] 4944 {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001] 4945 {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001] 4946 4947 // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx] 4948 {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] = 4949 // Devanagari Letter A U+0905 4950 {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101] 4951 {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101] 4952 {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101] 4953 {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101] 4954 {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101] 4955 {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] = 4956 // combining char U+0901 4957 {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001] 4958 {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001] 4959 {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001] 4960 {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001] 4961 {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001] 4962 }; 4963 const bool atNameStart[] = {true, false}; 4964 4965 size_t i = 0; 4966 char doc[1024]; 4967 size_t failCount = 0; 4968 4969 // we need all the bytes to be parsed, but we don't want the errors that can 4970 // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on. 4971 if (g_reparseDeferralEnabledDefault) { 4972 return; 4973 } 4974 4975 for (; i < sizeof(cases) / sizeof(cases[0]); i++) { 4976 size_t j = 0; 4977 for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { 4978 const bool expectedSuccess 4979 = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName; 4980 snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a", 4981 cases[i].tagName); 4982 XML_Parser parser = XML_ParserCreate(NULL); 4983 4984 const enum XML_Status status = _XML_Parse_SINGLE_BYTES( 4985 parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE); 4986 4987 bool success = true; 4988 if ((status == XML_STATUS_OK) != expectedSuccess) { 4989 success = false; 4990 } 4991 if ((status == XML_STATUS_ERROR) 4992 && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) { 4993 success = false; 4994 } 4995 4996 if (! success) { 4997 fprintf( 4998 stderr, 4999 "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n", 5000 (unsigned)i + 1u, atNameStart[j] ? " " : "not ", 5001 (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser)); 5002 failCount++; 5003 } 5004 5005 XML_ParserFree(parser); 5006 } 5007 } 5008 5009 if (failCount > 0) { 5010 fail("UTF-8 regression detected"); 5011 } 5012 } 5013 END_TEST 5014 5015 /* Test trailing spaces in elements are accepted */ 5016 START_TEST(test_trailing_spaces_in_elements) { 5017 const char *text = "<doc >Hi</doc >"; 5018 const XML_Char *expected = XCS("doc/doc"); 5019 CharData storage; 5020 5021 CharData_Init(&storage); 5022 XML_SetElementHandler(g_parser, record_element_start_handler, 5023 record_element_end_handler); 5024 XML_SetUserData(g_parser, &storage); 5025 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5026 == XML_STATUS_ERROR) 5027 xml_failure(g_parser); 5028 CharData_CheckXMLChars(&storage, expected); 5029 } 5030 END_TEST 5031 5032 START_TEST(test_utf16_attribute) { 5033 const char text[] = 5034 /* <d {KHO KHWAI}{CHO CHAN}='a'/> 5035 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 5036 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 5037 */ 5038 "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0"; 5039 const XML_Char *expected = XCS("a"); 5040 CharData storage; 5041 5042 CharData_Init(&storage); 5043 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5044 XML_SetUserData(g_parser, &storage); 5045 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5046 == XML_STATUS_ERROR) 5047 xml_failure(g_parser); 5048 CharData_CheckXMLChars(&storage, expected); 5049 } 5050 END_TEST 5051 5052 START_TEST(test_utf16_second_attr) { 5053 /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/> 5054 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 5055 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 5056 */ 5057 const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0" 5058 "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0"; 5059 const XML_Char *expected = XCS("1"); 5060 CharData storage; 5061 5062 CharData_Init(&storage); 5063 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5064 XML_SetUserData(g_parser, &storage); 5065 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5066 == XML_STATUS_ERROR) 5067 xml_failure(g_parser); 5068 CharData_CheckXMLChars(&storage, expected); 5069 } 5070 END_TEST 5071 5072 START_TEST(test_attr_after_solidus) { 5073 const char *text = "<doc attr1='a' / attr2='b'>"; 5074 5075 expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted"); 5076 } 5077 END_TEST 5078 5079 START_TEST(test_utf16_pe) { 5080 /* <!DOCTYPE doc [ 5081 * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'> 5082 * %{KHO KHWAI}{CHO CHAN}; 5083 * ]> 5084 * <doc></doc> 5085 * 5086 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 5087 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 5088 */ 5089 const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" 5090 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 " 5091 "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 " 5092 "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n" 5093 "\0%\x0e\x04\x0e\x08\0;\0\n" 5094 "\0]\0>\0\n" 5095 "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>"; 5096 #ifdef XML_UNICODE 5097 const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n"); 5098 #else 5099 const XML_Char *expected 5100 = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n"); 5101 #endif 5102 CharData storage; 5103 5104 CharData_Init(&storage); 5105 XML_SetUserData(g_parser, &storage); 5106 XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl); 5107 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5108 == XML_STATUS_ERROR) 5109 xml_failure(g_parser); 5110 CharData_CheckXMLChars(&storage, expected); 5111 } 5112 END_TEST 5113 5114 /* Test that duff attribute description keywords are rejected */ 5115 START_TEST(test_bad_attr_desc_keyword) { 5116 const char *text = "<!DOCTYPE doc [\n" 5117 " <!ATTLIST doc attr CDATA #!IMPLIED>\n" 5118 "]>\n" 5119 "<doc />"; 5120 5121 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5122 "Bad keyword !IMPLIED not faulted"); 5123 } 5124 END_TEST 5125 5126 /* Test that an invalid attribute description keyword consisting of 5127 * UTF-16 characters with their top bytes non-zero are correctly 5128 * faulted 5129 */ 5130 START_TEST(test_bad_attr_desc_keyword_utf16) { 5131 /* <!DOCTYPE d [ 5132 * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}> 5133 * ]><d/> 5134 * 5135 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 5136 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 5137 */ 5138 const char text[] 5139 = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" 5140 "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 " 5141 "\0#\x0e\x04\x0e\x08\0>\0\n" 5142 "\0]\0>\0<\0d\0/\0>"; 5143 5144 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5145 != XML_STATUS_ERROR) 5146 fail("Invalid UTF16 attribute keyword not faulted"); 5147 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX) 5148 xml_failure(g_parser); 5149 } 5150 END_TEST 5151 5152 /* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this 5153 * using prefix-encoding (see above) to trigger specific code paths 5154 */ 5155 START_TEST(test_bad_doctype) { 5156 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 5157 "<!DOCTYPE doc [ \x80\x44 ]><doc/>"; 5158 5159 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 5160 expect_failure(text, XML_ERROR_SYNTAX, 5161 "Invalid bytes in DOCTYPE not faulted"); 5162 } 5163 END_TEST 5164 5165 START_TEST(test_bad_doctype_utf8) { 5166 const char *text = "<!DOCTYPE \xDB\x25" 5167 "doc><doc/>"; // [1101 1011] [<0>010 0101] 5168 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5169 "Invalid UTF-8 in DOCTYPE not faulted"); 5170 } 5171 END_TEST 5172 5173 START_TEST(test_bad_doctype_utf16) { 5174 const char text[] = 5175 /* <!DOCTYPE doc [ \x06f2 ]><doc/> 5176 * 5177 * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number 5178 * (name character) but not a valid letter (name start character) 5179 */ 5180 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 " 5181 "\x06\xf2" 5182 "\0 \0]\0>\0<\0d\0o\0c\0/\0>"; 5183 5184 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5185 != XML_STATUS_ERROR) 5186 fail("Invalid bytes in DOCTYPE not faulted"); 5187 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX) 5188 xml_failure(g_parser); 5189 } 5190 END_TEST 5191 5192 START_TEST(test_bad_doctype_plus) { 5193 const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n" 5194 "<1+>&foo;</1+>"; 5195 5196 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5197 "'+' in document name not faulted"); 5198 } 5199 END_TEST 5200 5201 START_TEST(test_bad_doctype_star) { 5202 const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n" 5203 "<1*>&foo;</1*>"; 5204 5205 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5206 "'*' in document name not faulted"); 5207 } 5208 END_TEST 5209 5210 START_TEST(test_bad_doctype_query) { 5211 const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n" 5212 "<1?>&foo;</1?>"; 5213 5214 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5215 "'?' in document name not faulted"); 5216 } 5217 END_TEST 5218 5219 START_TEST(test_unknown_encoding_bad_ignore) { 5220 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>" 5221 "<!DOCTYPE doc SYSTEM 'foo'>" 5222 "<doc><e>&entity;</e></doc>"; 5223 ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>", 5224 "Invalid character not faulted", XCS("prefix-conv"), 5225 XML_ERROR_INVALID_TOKEN}; 5226 5227 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 5228 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5229 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 5230 XML_SetUserData(g_parser, &fault); 5231 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 5232 "Bad IGNORE section with unknown encoding not failed"); 5233 } 5234 END_TEST 5235 5236 START_TEST(test_entity_in_utf16_be_attr) { 5237 const char text[] = 5238 /* <e a='ä ä'></e> */ 5239 "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 " 5240 "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>"; 5241 #ifdef XML_UNICODE 5242 const XML_Char *expected = XCS("\x00e4 \x00e4"); 5243 #else 5244 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4"); 5245 #endif 5246 CharData storage; 5247 5248 CharData_Init(&storage); 5249 XML_SetUserData(g_parser, &storage); 5250 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5251 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5252 == XML_STATUS_ERROR) 5253 xml_failure(g_parser); 5254 CharData_CheckXMLChars(&storage, expected); 5255 } 5256 END_TEST 5257 5258 START_TEST(test_entity_in_utf16_le_attr) { 5259 const char text[] = 5260 /* <e a='ä ä'></e> */ 5261 "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0" 5262 "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0"; 5263 #ifdef XML_UNICODE 5264 const XML_Char *expected = XCS("\x00e4 \x00e4"); 5265 #else 5266 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4"); 5267 #endif 5268 CharData storage; 5269 5270 CharData_Init(&storage); 5271 XML_SetUserData(g_parser, &storage); 5272 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5273 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5274 == XML_STATUS_ERROR) 5275 xml_failure(g_parser); 5276 CharData_CheckXMLChars(&storage, expected); 5277 } 5278 END_TEST 5279 5280 START_TEST(test_entity_public_utf16_be) { 5281 const char text[] = 5282 /* <!DOCTYPE d [ */ 5283 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" 5284 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */ 5285 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 " 5286 "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n" 5287 /* %e; */ 5288 "\0%\0e\0;\0\n" 5289 /* ]> */ 5290 "\0]\0>\0\n" 5291 /* <d>&j;</d> */ 5292 "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>"; 5293 ExtTest2 test_data 5294 = {/* <!ENTITY j 'baz'> */ 5295 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL}; 5296 const XML_Char *expected = XCS("baz"); 5297 CharData storage; 5298 5299 CharData_Init(&storage); 5300 test_data.storage = &storage; 5301 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5302 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5303 XML_SetUserData(g_parser, &test_data); 5304 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5305 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5306 == XML_STATUS_ERROR) 5307 xml_failure(g_parser); 5308 CharData_CheckXMLChars(&storage, expected); 5309 } 5310 END_TEST 5311 5312 START_TEST(test_entity_public_utf16_le) { 5313 const char text[] = 5314 /* <!DOCTYPE d [ */ 5315 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0" 5316 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */ 5317 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0" 5318 "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0" 5319 /* %e; */ 5320 "%\0e\0;\0\n\0" 5321 /* ]> */ 5322 "]\0>\0\n\0" 5323 /* <d>&j;</d> */ 5324 "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0"; 5325 ExtTest2 test_data 5326 = {/* <!ENTITY j 'baz'> */ 5327 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL}; 5328 const XML_Char *expected = XCS("baz"); 5329 CharData storage; 5330 5331 CharData_Init(&storage); 5332 test_data.storage = &storage; 5333 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5334 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5335 XML_SetUserData(g_parser, &test_data); 5336 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5337 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5338 == XML_STATUS_ERROR) 5339 xml_failure(g_parser); 5340 CharData_CheckXMLChars(&storage, expected); 5341 } 5342 END_TEST 5343 5344 /* Test that a doctype with neither an internal nor external subset is 5345 * faulted 5346 */ 5347 START_TEST(test_short_doctype) { 5348 const char *text = "<!DOCTYPE doc></doc>"; 5349 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5350 "DOCTYPE without subset not rejected"); 5351 } 5352 END_TEST 5353 5354 START_TEST(test_short_doctype_2) { 5355 const char *text = "<!DOCTYPE doc PUBLIC></doc>"; 5356 expect_failure(text, XML_ERROR_SYNTAX, 5357 "DOCTYPE without Public ID not rejected"); 5358 } 5359 END_TEST 5360 5361 START_TEST(test_short_doctype_3) { 5362 const char *text = "<!DOCTYPE doc SYSTEM></doc>"; 5363 expect_failure(text, XML_ERROR_SYNTAX, 5364 "DOCTYPE without System ID not rejected"); 5365 } 5366 END_TEST 5367 5368 START_TEST(test_long_doctype) { 5369 const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>"; 5370 expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected"); 5371 } 5372 END_TEST 5373 5374 START_TEST(test_bad_entity) { 5375 const char *text = "<!DOCTYPE doc [\n" 5376 " <!ENTITY foo PUBLIC>\n" 5377 "]>\n" 5378 "<doc/>"; 5379 expect_failure(text, XML_ERROR_SYNTAX, 5380 "ENTITY without Public ID is not rejected"); 5381 } 5382 END_TEST 5383 5384 /* Test unquoted value is faulted */ 5385 START_TEST(test_bad_entity_2) { 5386 const char *text = "<!DOCTYPE doc [\n" 5387 " <!ENTITY % foo bar>\n" 5388 "]>\n" 5389 "<doc/>"; 5390 expect_failure(text, XML_ERROR_SYNTAX, 5391 "ENTITY without Public ID is not rejected"); 5392 } 5393 END_TEST 5394 5395 START_TEST(test_bad_entity_3) { 5396 const char *text = "<!DOCTYPE doc [\n" 5397 " <!ENTITY % foo PUBLIC>\n" 5398 "]>\n" 5399 "<doc/>"; 5400 expect_failure(text, XML_ERROR_SYNTAX, 5401 "Parameter ENTITY without Public ID is not rejected"); 5402 } 5403 END_TEST 5404 5405 START_TEST(test_bad_entity_4) { 5406 const char *text = "<!DOCTYPE doc [\n" 5407 " <!ENTITY % foo SYSTEM>\n" 5408 "]>\n" 5409 "<doc/>"; 5410 expect_failure(text, XML_ERROR_SYNTAX, 5411 "Parameter ENTITY without Public ID is not rejected"); 5412 } 5413 END_TEST 5414 5415 START_TEST(test_bad_notation) { 5416 const char *text = "<!DOCTYPE doc [\n" 5417 " <!NOTATION n SYSTEM>\n" 5418 "]>\n" 5419 "<doc/>"; 5420 expect_failure(text, XML_ERROR_SYNTAX, 5421 "Notation without System ID is not rejected"); 5422 } 5423 END_TEST 5424 5425 /* Test for issue #11, wrongly suppressed default handler */ 5426 START_TEST(test_default_doctype_handler) { 5427 const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n" 5428 " <!ENTITY foo 'bar'>\n" 5429 "]>\n" 5430 "<doc>&foo;</doc>"; 5431 DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE}, 5432 {XCS("'test.dtd'"), 10, XML_FALSE}, 5433 {NULL, 0, XML_FALSE}}; 5434 int i; 5435 5436 XML_SetUserData(g_parser, &test_data); 5437 XML_SetDefaultHandler(g_parser, checking_default_handler); 5438 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 5439 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5440 == XML_STATUS_ERROR) 5441 xml_failure(g_parser); 5442 for (i = 0; test_data[i].expected != NULL; i++) 5443 if (! test_data[i].seen) 5444 fail("Default handler not run for public !DOCTYPE"); 5445 } 5446 END_TEST 5447 5448 START_TEST(test_empty_element_abort) { 5449 const char *text = "<abort/>"; 5450 5451 XML_SetStartElementHandler(g_parser, start_element_suspender); 5452 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5453 != XML_STATUS_ERROR) 5454 fail("Expected to error on abort"); 5455 } 5456 END_TEST 5457 5458 /* Regression test for GH issue #612: unfinished m_declAttributeType 5459 * allocation in ->m_tempPool can corrupt following allocation. 5460 */ 5461 START_TEST(test_pool_integrity_with_unfinished_attr) { 5462 const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n" 5463 "<!DOCTYPE foo [\n" 5464 "<!ELEMENT foo ANY>\n" 5465 "<!ENTITY % entp SYSTEM \"external.dtd\">\n" 5466 "%entp;\n" 5467 "]>\n" 5468 "<a></a>\n"; 5469 const XML_Char *expected = XCS("COMMENT"); 5470 CharData storage; 5471 5472 CharData_Init(&storage); 5473 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5474 XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist); 5475 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 5476 XML_SetCommentHandler(g_parser, accumulate_comment); 5477 XML_SetUserData(g_parser, &storage); 5478 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5479 == XML_STATUS_ERROR) 5480 xml_failure(g_parser); 5481 CharData_CheckXMLChars(&storage, expected); 5482 } 5483 END_TEST 5484 5485 /* Test a possible early return location in internalEntityProcessor */ 5486 START_TEST(test_entity_ref_no_elements) { 5487 const char *const text = "<!DOCTYPE foo [\n" 5488 "<!ENTITY e1 \"test\">\n" 5489 "]> <foo>&e1;"; // intentionally missing newline 5490 5491 XML_Parser parser = XML_ParserCreate(NULL); 5492 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 5493 == XML_STATUS_ERROR); 5494 assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS); 5495 XML_ParserFree(parser); 5496 } 5497 END_TEST 5498 5499 /* Tests if chained entity references lead to unbounded recursion */ 5500 START_TEST(test_deep_nested_entity) { 5501 const size_t N_LINES = 60000; 5502 const size_t SIZE_PER_LINE = 50; 5503 5504 char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE); 5505 if (text == NULL) { 5506 fail("malloc failed"); 5507 } 5508 5509 char *textPtr = text; 5510 5511 // Create the XML 5512 textPtr += snprintf(textPtr, SIZE_PER_LINE, 5513 "<!DOCTYPE foo [\n" 5514 " <!ENTITY s0 'deepText'>\n"); 5515 5516 for (size_t i = 1; i < N_LINES; ++i) { 5517 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n", 5518 (long unsigned)i, (long unsigned)(i - 1)); 5519 } 5520 5521 snprintf(textPtr, SIZE_PER_LINE, "]> <foo>&s%lu;</foo>\n", 5522 (long unsigned)(N_LINES - 1)); 5523 5524 const XML_Char *const expected = XCS("deepText"); 5525 5526 CharData storage; 5527 CharData_Init(&storage); 5528 5529 XML_Parser parser = XML_ParserCreate(NULL); 5530 5531 XML_SetCharacterDataHandler(parser, accumulate_characters); 5532 XML_SetUserData(parser, &storage); 5533 5534 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 5535 == XML_STATUS_ERROR) 5536 xml_failure(parser); 5537 5538 CharData_CheckXMLChars(&storage, expected); 5539 XML_ParserFree(parser); 5540 free(text); 5541 } 5542 END_TEST 5543 5544 /* Tests if chained entity references in attributes 5545 lead to unbounded recursion */ 5546 START_TEST(test_deep_nested_attribute_entity) { 5547 const size_t N_LINES = 60000; 5548 const size_t SIZE_PER_LINE = 100; 5549 5550 char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE); 5551 if (text == NULL) { 5552 fail("malloc failed"); 5553 } 5554 5555 char *textPtr = text; 5556 5557 // Create the XML 5558 textPtr += snprintf(textPtr, SIZE_PER_LINE, 5559 "<!DOCTYPE foo [\n" 5560 " <!ENTITY s0 'deepText'>\n"); 5561 5562 for (size_t i = 1; i < N_LINES; ++i) { 5563 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n", 5564 (long unsigned)i, (long unsigned)(i - 1)); 5565 } 5566 5567 snprintf(textPtr, SIZE_PER_LINE, "]> <foo name='&s%lu;'>mainText</foo>\n", 5568 (long unsigned)(N_LINES - 1)); 5569 5570 AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}}; 5571 ElementInfo info[] = {{XCS("foo"), 1, NULL, NULL}, {NULL, 0, NULL, NULL}}; 5572 info[0].attributes = doc_info; 5573 5574 XML_Parser parser = XML_ParserCreate(NULL); 5575 ParserAndElementInfo parserPlusElemenInfo = {parser, info}; 5576 5577 XML_SetStartElementHandler(parser, counting_start_element_handler); 5578 XML_SetUserData(parser, &parserPlusElemenInfo); 5579 5580 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 5581 == XML_STATUS_ERROR) 5582 xml_failure(parser); 5583 5584 XML_ParserFree(parser); 5585 free(text); 5586 } 5587 END_TEST 5588 5589 START_TEST(test_deep_nested_entity_delayed_interpretation) { 5590 const size_t N_LINES = 70000; 5591 const size_t SIZE_PER_LINE = 100; 5592 5593 char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE); 5594 if (text == NULL) { 5595 fail("malloc failed"); 5596 } 5597 5598 char *textPtr = text; 5599 5600 // Create the XML 5601 textPtr += snprintf(textPtr, SIZE_PER_LINE, 5602 "<!DOCTYPE foo [\n" 5603 " <!ENTITY %% s0 'deepText'>\n"); 5604 5605 for (size_t i = 1; i < N_LINES; ++i) { 5606 textPtr += snprintf(textPtr, SIZE_PER_LINE, 5607 " <!ENTITY %% s%lu '%s%lu;'>\n", (long unsigned)i, 5608 (long unsigned)(i - 1)); 5609 } 5610 5611 snprintf(textPtr, SIZE_PER_LINE, 5612 " <!ENTITY %% define_g \"<!ENTITY g '%s%lu;'>\">\n" 5613 " %%define_g;\n" 5614 "]>\n" 5615 "<foo/>\n", 5616 (long unsigned)(N_LINES - 1)); 5617 5618 XML_Parser parser = XML_ParserCreate(NULL); 5619 5620 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5621 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 5622 == XML_STATUS_ERROR) 5623 xml_failure(parser); 5624 5625 XML_ParserFree(parser); 5626 free(text); 5627 } 5628 END_TEST 5629 5630 START_TEST(test_nested_entity_suspend) { 5631 const char *const text = "<!DOCTYPE a [\n" 5632 " <!ENTITY e1 '<!--e1-->'>\n" 5633 " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n" 5634 " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n" 5635 "]>\n" 5636 "<a><!--start-->&e3;<!--end--></a>"; 5637 const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head") 5638 XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end"); 5639 CharData storage; 5640 CharData_Init(&storage); 5641 XML_Parser parser = XML_ParserCreate(NULL); 5642 ParserPlusStorage parserPlusStorage = {parser, &storage}; 5643 5644 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5645 XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler); 5646 XML_SetUserData(parser, &parserPlusStorage); 5647 5648 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE); 5649 while (status == XML_STATUS_SUSPENDED) { 5650 status = XML_ResumeParser(parser); 5651 } 5652 if (status != XML_STATUS_OK) 5653 xml_failure(parser); 5654 5655 CharData_CheckXMLChars(&storage, expected); 5656 XML_ParserFree(parser); 5657 } 5658 END_TEST 5659 5660 START_TEST(test_nested_entity_suspend_2) { 5661 const char *const text = "<!DOCTYPE doc [\n" 5662 " <!ENTITY ge1 'head1Ztail1'>\n" 5663 " <!ENTITY ge2 'head2&ge1;tail2'>\n" 5664 " <!ENTITY ge3 'head3&ge2;tail3'>\n" 5665 "]>\n" 5666 "<doc>&ge3;</doc>"; 5667 const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1") 5668 XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3"); 5669 CharData storage; 5670 CharData_Init(&storage); 5671 XML_Parser parser = XML_ParserCreate(NULL); 5672 ParserPlusStorage parserPlusStorage = {parser, &storage}; 5673 5674 XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend); 5675 XML_SetUserData(parser, &parserPlusStorage); 5676 5677 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE); 5678 while (status == XML_STATUS_SUSPENDED) { 5679 status = XML_ResumeParser(parser); 5680 } 5681 if (status != XML_STATUS_OK) 5682 xml_failure(parser); 5683 5684 CharData_CheckXMLChars(&storage, expected); 5685 XML_ParserFree(parser); 5686 } 5687 END_TEST 5688 5689 /* Regression test for quadratic parsing on large tokens */ 5690 START_TEST(test_big_tokens_scale_linearly) { 5691 const struct { 5692 const char *pre; 5693 const char *post; 5694 } text[] = { 5695 {"<a>", "</a>"}, // assumed good, used as baseline 5696 {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch 5697 {"<c attr='", "'></c>"}, // big attribute, used to be O(N²) 5698 {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²) 5699 {"<e><", "/></e>"}, // big elem name, used to be O(N²) 5700 }; 5701 const int num_cases = sizeof(text) / sizeof(text[0]); 5702 char aaaaaa[4096]; 5703 const int fillsize = (int)sizeof(aaaaaa); 5704 const int fillcount = 100; 5705 const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post. 5706 const unsigned max_factor = 4; 5707 const unsigned max_scanned = max_factor * approx_bytes; 5708 5709 memset(aaaaaa, 'a', fillsize); 5710 5711 if (! g_reparseDeferralEnabledDefault) { 5712 return; // heuristic is disabled; we would get O(n^2) and fail. 5713 } 5714 5715 for (int i = 0; i < num_cases; ++i) { 5716 XML_Parser parser = XML_ParserCreate(NULL); 5717 assert_true(parser != NULL); 5718 enum XML_Status status; 5719 set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post); 5720 5721 // parse the start text 5722 g_bytesScanned = 0; 5723 status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre, 5724 (int)strlen(text[i].pre), XML_FALSE); 5725 if (status != XML_STATUS_OK) { 5726 xml_failure(parser); 5727 } 5728 5729 // parse lots of 'a', failing the test early if it takes too long 5730 unsigned past_max_count = 0; 5731 for (int f = 0; f < fillcount; ++f) { 5732 status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE); 5733 if (status != XML_STATUS_OK) { 5734 xml_failure(parser); 5735 } 5736 if (g_bytesScanned > max_scanned) { 5737 // We're not done, and have already passed the limit -- the test will 5738 // definitely fail. This block allows us to save time by failing early. 5739 const unsigned pushed 5740 = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize; 5741 fprintf( 5742 stderr, 5743 "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", 5744 f + 1, fillcount, pushed, g_bytesScanned, 5745 g_bytesScanned / (double)pushed, max_scanned, max_factor); 5746 past_max_count++; 5747 // We are failing, but allow a few log prints first. If we don't reach 5748 // a count of five, the test will fail after the loop instead. 5749 assert_true(past_max_count < 5); 5750 } 5751 } 5752 5753 // parse the end text 5754 status = _XML_Parse_SINGLE_BYTES(parser, text[i].post, 5755 (int)strlen(text[i].post), XML_TRUE); 5756 if (status != XML_STATUS_OK) { 5757 xml_failure(parser); 5758 } 5759 5760 assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working 5761 if (g_bytesScanned > max_scanned) { 5762 fprintf( 5763 stderr, 5764 "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", 5765 g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned, 5766 max_factor); 5767 fail("scanned too many bytes"); 5768 } 5769 5770 XML_ParserFree(parser); 5771 } 5772 } 5773 END_TEST 5774 5775 START_TEST(test_set_reparse_deferral) { 5776 const char *const pre = "<d>"; 5777 const char *const start = "<x attr='"; 5778 const char *const end = "'></x>"; 5779 char eeeeee[100]; 5780 const int fillsize = (int)sizeof(eeeeee); 5781 memset(eeeeee, 'e', fillsize); 5782 5783 for (int enabled = 0; enabled <= 1; enabled += 1) { 5784 set_subtest("deferral=%d", enabled); 5785 5786 XML_Parser parser = XML_ParserCreate(NULL); 5787 assert_true(parser != NULL); 5788 assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); 5789 // pre-grow the buffer to avoid reparsing due to almost-fullness 5790 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); 5791 5792 CharData storage; 5793 CharData_Init(&storage); 5794 XML_SetUserData(parser, &storage); 5795 XML_SetStartElementHandler(parser, start_element_event_handler); 5796 5797 enum XML_Status status; 5798 // parse the start text 5799 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 5800 if (status != XML_STATUS_OK) { 5801 xml_failure(parser); 5802 } 5803 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done 5804 5805 // ..and the start of the token 5806 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); 5807 if (status != XML_STATUS_OK) { 5808 xml_failure(parser); 5809 } 5810 CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one 5811 5812 // try to parse lots of 'e', but the token isn't finished 5813 for (int c = 0; c < 100; ++c) { 5814 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 5815 if (status != XML_STATUS_OK) { 5816 xml_failure(parser); 5817 } 5818 } 5819 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one 5820 5821 // end the <x> token. 5822 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 5823 if (status != XML_STATUS_OK) { 5824 xml_failure(parser); 5825 } 5826 5827 if (enabled) { 5828 // In general, we may need to push more data to trigger a reparse attempt, 5829 // but in this test, the data is constructed to always require it. 5830 CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect 5831 // 2x the token length should suffice; the +1 covers the start and end. 5832 for (int c = 0; c < 101; ++c) { 5833 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 5834 if (status != XML_STATUS_OK) { 5835 xml_failure(parser); 5836 } 5837 } 5838 } 5839 CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done 5840 5841 XML_ParserFree(parser); 5842 } 5843 } 5844 END_TEST 5845 5846 struct element_decl_data { 5847 XML_Parser parser; 5848 int count; 5849 }; 5850 5851 static void 5852 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) { 5853 UNUSED_P(name); 5854 struct element_decl_data *testdata = (struct element_decl_data *)userData; 5855 testdata->count += 1; 5856 XML_FreeContentModel(testdata->parser, model); 5857 } 5858 5859 static int 5860 external_inherited_parser(XML_Parser p, const XML_Char *context, 5861 const XML_Char *base, const XML_Char *systemId, 5862 const XML_Char *publicId) { 5863 UNUSED_P(base); 5864 UNUSED_P(systemId); 5865 UNUSED_P(publicId); 5866 const char *const pre = "<!ELEMENT document ANY>\n"; 5867 const char *const start = "<!ELEMENT "; 5868 const char *const end = " ANY>\n"; 5869 const char *const post = "<!ELEMENT xyz ANY>\n"; 5870 const int enabled = *(int *)XML_GetUserData(p); 5871 char eeeeee[100]; 5872 char spaces[100]; 5873 const int fillsize = (int)sizeof(eeeeee); 5874 assert_true(fillsize == (int)sizeof(spaces)); 5875 memset(eeeeee, 'e', fillsize); 5876 memset(spaces, ' ', fillsize); 5877 5878 XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL); 5879 assert_true(parser != NULL); 5880 // pre-grow the buffer to avoid reparsing due to almost-fullness 5881 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); 5882 5883 struct element_decl_data testdata; 5884 testdata.parser = parser; 5885 testdata.count = 0; 5886 XML_SetUserData(parser, &testdata); 5887 XML_SetElementDeclHandler(parser, element_decl_counter); 5888 5889 enum XML_Status status; 5890 // parse the initial text 5891 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 5892 if (status != XML_STATUS_OK) { 5893 xml_failure(parser); 5894 } 5895 assert_true(testdata.count == 1); // first element should be done 5896 5897 // ..and the start of the big token 5898 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); 5899 if (status != XML_STATUS_OK) { 5900 xml_failure(parser); 5901 } 5902 assert_true(testdata.count == 1); // still just the first one 5903 5904 // try to parse lots of 'e', but the token isn't finished 5905 for (int c = 0; c < 100; ++c) { 5906 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 5907 if (status != XML_STATUS_OK) { 5908 xml_failure(parser); 5909 } 5910 } 5911 assert_true(testdata.count == 1); // *still* just the first one 5912 5913 // end the big token. 5914 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 5915 if (status != XML_STATUS_OK) { 5916 xml_failure(parser); 5917 } 5918 5919 if (enabled) { 5920 // In general, we may need to push more data to trigger a reparse attempt, 5921 // but in this test, the data is constructed to always require it. 5922 assert_true(testdata.count == 1); // or the test is incorrect 5923 // 2x the token length should suffice; the +1 covers the start and end. 5924 for (int c = 0; c < 101; ++c) { 5925 status = XML_Parse(parser, spaces, fillsize, XML_FALSE); 5926 if (status != XML_STATUS_OK) { 5927 xml_failure(parser); 5928 } 5929 } 5930 } 5931 assert_true(testdata.count == 2); // the big token should be done 5932 5933 // parse the final text 5934 status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE); 5935 if (status != XML_STATUS_OK) { 5936 xml_failure(parser); 5937 } 5938 assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done 5939 5940 XML_ParserFree(parser); 5941 return XML_STATUS_OK; 5942 } 5943 5944 START_TEST(test_reparse_deferral_is_inherited) { 5945 const char *const text 5946 = "<!DOCTYPE document SYSTEM 'something.ext'><document/>"; 5947 for (int enabled = 0; enabled <= 1; ++enabled) { 5948 set_subtest("deferral=%d", enabled); 5949 5950 XML_Parser parser = XML_ParserCreate(NULL); 5951 assert_true(parser != NULL); 5952 XML_SetUserData(parser, (void *)&enabled); 5953 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5954 // this handler creates a sub-parser and checks that its deferral behavior 5955 // is what we expected, based on the value of `enabled` (in userdata). 5956 XML_SetExternalEntityRefHandler(parser, external_inherited_parser); 5957 assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); 5958 if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) 5959 xml_failure(parser); 5960 5961 XML_ParserFree(parser); 5962 } 5963 } 5964 END_TEST 5965 5966 START_TEST(test_set_reparse_deferral_on_null_parser) { 5967 assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE); 5968 assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE); 5969 assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE); 5970 assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE); 5971 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN) 5972 == XML_FALSE); 5973 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX) 5974 == XML_FALSE); 5975 } 5976 END_TEST 5977 5978 START_TEST(test_set_reparse_deferral_on_the_fly) { 5979 const char *const pre = "<d><x attr='"; 5980 const char *const end = "'></x>"; 5981 char iiiiii[100]; 5982 const int fillsize = (int)sizeof(iiiiii); 5983 memset(iiiiii, 'i', fillsize); 5984 5985 XML_Parser parser = XML_ParserCreate(NULL); 5986 assert_true(parser != NULL); 5987 assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE)); 5988 5989 CharData storage; 5990 CharData_Init(&storage); 5991 XML_SetUserData(parser, &storage); 5992 XML_SetStartElementHandler(parser, start_element_event_handler); 5993 5994 enum XML_Status status; 5995 // parse the start text 5996 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 5997 if (status != XML_STATUS_OK) { 5998 xml_failure(parser); 5999 } 6000 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done 6001 6002 // try to parse some 'i', but the token isn't finished 6003 status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE); 6004 if (status != XML_STATUS_OK) { 6005 xml_failure(parser); 6006 } 6007 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one 6008 6009 // end the <x> token. 6010 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 6011 if (status != XML_STATUS_OK) { 6012 xml_failure(parser); 6013 } 6014 CharData_CheckXMLChars(&storage, XCS("d")); // not yet. 6015 6016 // now change the heuristic setting and add *no* data 6017 assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE)); 6018 // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic. 6019 status = XML_Parse(parser, "", 0, XML_FALSE); 6020 if (status != XML_STATUS_OK) { 6021 xml_failure(parser); 6022 } 6023 CharData_CheckXMLChars(&storage, XCS("dx")); 6024 6025 XML_ParserFree(parser); 6026 } 6027 END_TEST 6028 6029 START_TEST(test_set_bad_reparse_option) { 6030 XML_Parser parser = XML_ParserCreate(NULL); 6031 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2)); 6032 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3)); 6033 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99)); 6034 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127)); 6035 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128)); 6036 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129)); 6037 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255)); 6038 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0)); 6039 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1)); 6040 XML_ParserFree(parser); 6041 } 6042 END_TEST 6043 6044 static size_t g_totalAlloc = 0; 6045 static size_t g_biggestAlloc = 0; 6046 6047 static void * 6048 counting_realloc(void *ptr, size_t size) { 6049 g_totalAlloc += size; 6050 if (size > g_biggestAlloc) { 6051 g_biggestAlloc = size; 6052 } 6053 return realloc(ptr, size); 6054 } 6055 6056 static void * 6057 counting_malloc(size_t size) { 6058 return counting_realloc(NULL, size); 6059 } 6060 6061 START_TEST(test_bypass_heuristic_when_close_to_bufsize) { 6062 if (g_chunkSize != 0) { 6063 // this test does not use SINGLE_BYTES, because it depends on very precise 6064 // buffer fills. 6065 return; 6066 } 6067 if (! g_reparseDeferralEnabledDefault) { 6068 return; // this test is irrelevant when the deferral heuristic is disabled. 6069 } 6070 6071 const int document_length = 65536; 6072 char *const document = (char *)malloc(document_length); 6073 assert_true(document != NULL); 6074 6075 const XML_Memory_Handling_Suite memfuncs = { 6076 counting_malloc, 6077 counting_realloc, 6078 free, 6079 }; 6080 6081 const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1}; 6082 const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1}; 6083 const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1}; 6084 6085 for (const int *leading = leading_list; *leading >= 0; leading++) { 6086 for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) { 6087 for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) { 6088 set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken, 6089 *fillsize); 6090 // start by checking that the test looks reasonably valid 6091 assert_true(*leading + *bigtoken <= document_length); 6092 6093 // put 'x' everywhere; some will be overwritten by elements. 6094 memset(document, 'x', document_length); 6095 // maybe add an initial tag 6096 if (*leading) { 6097 assert_true(*leading >= 3); // or the test case is invalid 6098 memcpy(document, "<a>", 3); 6099 } 6100 // add the large token 6101 document[*leading + 0] = '<'; 6102 document[*leading + 1] = 'b'; 6103 memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token 6104 document[*leading + *bigtoken - 1] = '>'; 6105 6106 // 1 for 'b', plus 1 or 0 depending on the presence of 'a' 6107 const int expected_elem_total = 1 + (*leading ? 1 : 0); 6108 6109 XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL); 6110 assert_true(parser != NULL); 6111 6112 CharData storage; 6113 CharData_Init(&storage); 6114 XML_SetUserData(parser, &storage); 6115 XML_SetStartElementHandler(parser, start_element_event_handler); 6116 6117 g_biggestAlloc = 0; 6118 g_totalAlloc = 0; 6119 int offset = 0; 6120 // fill data until the big token is covered (but not necessarily parsed) 6121 while (offset < *leading + *bigtoken) { 6122 assert_true(offset + *fillsize <= document_length); 6123 const enum XML_Status status 6124 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 6125 if (status != XML_STATUS_OK) { 6126 xml_failure(parser); 6127 } 6128 offset += *fillsize; 6129 } 6130 // Now, check that we've had a buffer allocation that could fit the 6131 // context bytes and our big token. In order to detect a special case, 6132 // we need to know how many bytes of our big token were included in the 6133 // first push that contained _any_ bytes of the big token: 6134 const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize); 6135 if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) { 6136 // Special case: we aren't saving any context, and the whole big token 6137 // was covered by a single fill, so Expat may have parsed directly 6138 // from our input pointer, without allocating an internal buffer. 6139 } else if (*leading < XML_CONTEXT_BYTES) { 6140 assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken); 6141 } else { 6142 assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken); 6143 } 6144 // fill data until the big token is actually parsed 6145 while (storage.count < expected_elem_total) { 6146 const size_t alloc_before = g_totalAlloc; 6147 assert_true(offset + *fillsize <= document_length); 6148 const enum XML_Status status 6149 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 6150 if (status != XML_STATUS_OK) { 6151 xml_failure(parser); 6152 } 6153 offset += *fillsize; 6154 // since all the bytes of the big token are already in the buffer, 6155 // the bufsize ceiling should make us finish its parsing without any 6156 // further buffer allocations. We assume that there will be no other 6157 // large allocations in this test. 6158 assert_true(g_totalAlloc - alloc_before < 4096); 6159 } 6160 // test-the-test: was our alloc even called? 6161 assert_true(g_totalAlloc > 0); 6162 // test-the-test: there shouldn't be any extra start elements 6163 assert_true(storage.count == expected_elem_total); 6164 6165 XML_ParserFree(parser); 6166 } 6167 } 6168 } 6169 free(document); 6170 } 6171 END_TEST 6172 6173 START_TEST(test_varying_buffer_fills) { 6174 const int KiB = 1024; 6175 const int MiB = 1024 * KiB; 6176 const int document_length = 16 * MiB; 6177 const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB 6178 6179 if (g_chunkSize != 0) { 6180 return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES(). 6181 } 6182 6183 char *const document = (char *)malloc(document_length); 6184 assert_true(document != NULL); 6185 memset(document, 'x', document_length); 6186 document[0] = '<'; 6187 document[1] = 't'; 6188 memset(&document[2], ' ', big - 2); // a very spacy token 6189 document[big - 1] = '>'; 6190 6191 // Each testcase is a list of buffer fill sizes, terminated by a value < 0. 6192 // When reparse deferral is enabled, the final (negated) value is the expected 6193 // maximum number of bytes scanned in parse attempts. 6194 const int testcases[][30] = { 6195 {8 * MiB, -8 * MiB}, 6196 {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total 6197 // zero-size fills shouldn't trigger the bypass 6198 {4 * MiB, 0, 4 * MiB, -12 * MiB}, 6199 {4 * MiB, 0, 0, 4 * MiB, -12 * MiB}, 6200 {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB}, 6201 // try to hit the buffer ceiling only once (at the end) 6202 {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB}, 6203 // try to hit the same buffer ceiling multiple times 6204 {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB}, 6205 6206 // try to hit every ceiling, by always landing 1K shy of the buffer size 6207 {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB, 6208 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB}, 6209 6210 // try to avoid every ceiling, by always landing 1B past the buffer size 6211 // the normal 2x heuristic threshold still forces parse attempts. 6212 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 6213 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 6214 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 6215 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 6216 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 6217 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 6218 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7 6219 -(10 * MiB + 682 * KiB + 7)}, 6220 // try to avoid every ceiling again, except on our last fill. 6221 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 6222 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 6223 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 6224 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 6225 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 6226 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 6227 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6 6228 -(10 * MiB + 682 * KiB + 6)}, 6229 6230 // try to hit ceilings on the way multiple times 6231 {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer 6232 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer 6233 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer 6234 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer 6235 // we'll make a parse attempt at every parse call 6236 -(45 * MiB + 12)}, 6237 }; 6238 const int testcount = sizeof(testcases) / sizeof(testcases[0]); 6239 for (int test_i = 0; test_i < testcount; test_i++) { 6240 const int *fillsize = testcases[test_i]; 6241 set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1], 6242 fillsize[2], fillsize[3]); 6243 XML_Parser parser = XML_ParserCreate(NULL); 6244 assert_true(parser != NULL); 6245 6246 CharData storage; 6247 CharData_Init(&storage); 6248 XML_SetUserData(parser, &storage); 6249 XML_SetStartElementHandler(parser, start_element_event_handler); 6250 6251 g_bytesScanned = 0; 6252 int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call) 6253 int offset = 0; 6254 while (*fillsize >= 0) { 6255 assert_true(offset + *fillsize <= document_length); // or test is invalid 6256 const enum XML_Status status 6257 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 6258 if (status != XML_STATUS_OK) { 6259 xml_failure(parser); 6260 } 6261 offset += *fillsize; 6262 fillsize++; 6263 assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow 6264 worstcase_bytes += offset; // we might've tried to parse all pending bytes 6265 } 6266 assert_true(storage.count == 1); // the big token should've been parsed 6267 assert_true(g_bytesScanned > 0); // test-the-test: does our counter work? 6268 if (g_reparseDeferralEnabledDefault) { 6269 // heuristic is enabled; some XML_Parse calls may have deferred reparsing 6270 const unsigned max_bytes_scanned = -*fillsize; 6271 if (g_bytesScanned > max_bytes_scanned) { 6272 fprintf(stderr, 6273 "bytes scanned in parse attempts: actual=%u limit=%u \n", 6274 g_bytesScanned, max_bytes_scanned); 6275 fail("too many bytes scanned in parse attempts"); 6276 } 6277 } 6278 assert_true(g_bytesScanned <= (unsigned)worstcase_bytes); 6279 6280 XML_ParserFree(parser); 6281 } 6282 free(document); 6283 } 6284 END_TEST 6285 6286 START_TEST(test_empty_ext_param_entity_in_value) { 6287 const char *text = "<!DOCTYPE r SYSTEM \"ext.dtd\"><r/>"; 6288 ExtOption options[] = { 6289 {XCS("ext.dtd"), "<!ENTITY % pe SYSTEM \"empty\">" 6290 "<!ENTITY ge \"%pe;\">"}, 6291 {XCS("empty"), ""}, 6292 {NULL, NULL}, 6293 }; 6294 6295 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 6296 XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner); 6297 XML_SetUserData(g_parser, options); 6298 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 6299 == XML_STATUS_ERROR) 6300 xml_failure(g_parser); 6301 } 6302 END_TEST 6303 6304 void 6305 make_basic_test_case(Suite *s) { 6306 TCase *tc_basic = tcase_create("basic tests"); 6307 6308 suite_add_tcase(s, tc_basic); 6309 tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown); 6310 6311 tcase_add_test(tc_basic, test_nul_byte); 6312 tcase_add_test(tc_basic, test_u0000_char); 6313 tcase_add_test(tc_basic, test_siphash_self); 6314 tcase_add_test(tc_basic, test_siphash_spec); 6315 tcase_add_test(tc_basic, test_bom_utf8); 6316 tcase_add_test(tc_basic, test_bom_utf16_be); 6317 tcase_add_test(tc_basic, test_bom_utf16_le); 6318 tcase_add_test(tc_basic, test_nobom_utf16_le); 6319 tcase_add_test(tc_basic, test_hash_collision); 6320 tcase_add_test(tc_basic, test_hash_salt_setter); 6321 tcase_add_test(tc_basic, test_illegal_utf8); 6322 tcase_add_test(tc_basic, test_utf8_auto_align); 6323 tcase_add_test(tc_basic, test_utf16); 6324 tcase_add_test(tc_basic, test_utf16_le_epilog_newline); 6325 tcase_add_test(tc_basic, test_not_utf16); 6326 tcase_add_test(tc_basic, test_bad_encoding); 6327 tcase_add_test(tc_basic, test_latin1_umlauts); 6328 tcase_add_test(tc_basic, test_long_utf8_character); 6329 tcase_add_test(tc_basic, test_long_latin1_attribute); 6330 tcase_add_test(tc_basic, test_long_ascii_attribute); 6331 /* Regression test for SF bug #491986. */ 6332 tcase_add_test(tc_basic, test_danish_latin1); 6333 /* Regression test for SF bug #514281. */ 6334 tcase_add_test(tc_basic, test_french_charref_hexidecimal); 6335 tcase_add_test(tc_basic, test_french_charref_decimal); 6336 tcase_add_test(tc_basic, test_french_latin1); 6337 tcase_add_test(tc_basic, test_french_utf8); 6338 tcase_add_test(tc_basic, test_utf8_false_rejection); 6339 tcase_add_test(tc_basic, test_line_number_after_parse); 6340 tcase_add_test(tc_basic, test_column_number_after_parse); 6341 tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers); 6342 tcase_add_test(tc_basic, test_line_number_after_error); 6343 tcase_add_test(tc_basic, test_column_number_after_error); 6344 tcase_add_test(tc_basic, test_really_long_lines); 6345 tcase_add_test(tc_basic, test_really_long_encoded_lines); 6346 tcase_add_test(tc_basic, test_end_element_events); 6347 tcase_add_test(tc_basic, test_helper_is_whitespace_normalized); 6348 tcase_add_test(tc_basic, test_attr_whitespace_normalization); 6349 tcase_add_test(tc_basic, test_xmldecl_misplaced); 6350 tcase_add_test(tc_basic, test_xmldecl_invalid); 6351 tcase_add_test(tc_basic, test_xmldecl_missing_attr); 6352 tcase_add_test(tc_basic, test_xmldecl_missing_value); 6353 tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity); 6354 tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity); 6355 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding); 6356 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler); 6357 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom); 6358 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding); 6359 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2); 6360 tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset); 6361 tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset); 6362 tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone); 6363 tcase_add_test(tc_basic, 6364 test_wfc_undeclared_entity_with_external_subset_standalone); 6365 tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone); 6366 tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset); 6367 tcase_add_test(tc_basic, test_not_standalone_handler_reject); 6368 tcase_add_test(tc_basic, test_not_standalone_handler_accept); 6369 tcase_add_test(tc_basic, test_entity_start_tag_level_greater_than_one); 6370 tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs); 6371 tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs); 6372 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse); 6373 tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling); 6374 tcase_add_test(tc_basic, test_dtd_attr_handling); 6375 tcase_add_test(tc_basic, test_empty_ns_without_namespaces); 6376 tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces); 6377 tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls); 6378 tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls); 6379 tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls); 6380 tcase_add_test(tc_basic, test_good_cdata_ascii); 6381 tcase_add_test(tc_basic, test_good_cdata_utf16); 6382 tcase_add_test(tc_basic, test_good_cdata_utf16_le); 6383 tcase_add_test(tc_basic, test_long_cdata_utf16); 6384 tcase_add_test(tc_basic, test_multichar_cdata_utf16); 6385 tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair); 6386 tcase_add_test(tc_basic, test_bad_cdata); 6387 tcase_add_test(tc_basic, test_bad_cdata_utf16); 6388 tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls); 6389 tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls); 6390 tcase_add_test(tc_basic, test_memory_allocation); 6391 tcase_add_test__if_xml_ge(tc_basic, test_default_current); 6392 tcase_add_test(tc_basic, test_dtd_elements); 6393 tcase_add_test(tc_basic, test_dtd_elements_nesting); 6394 tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd); 6395 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone); 6396 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd); 6397 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype); 6398 tcase_add_test__ifdef_xml_dtd(tc_basic, 6399 test_foreign_dtd_without_external_subset); 6400 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd); 6401 tcase_add_test(tc_basic, test_set_base); 6402 tcase_add_test(tc_basic, test_attributes); 6403 tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity); 6404 tcase_add_test(tc_basic, test_resume_invalid_parse); 6405 tcase_add_test(tc_basic, test_resume_resuspended); 6406 tcase_add_test(tc_basic, test_cdata_default); 6407 tcase_add_test(tc_basic, test_subordinate_reset); 6408 tcase_add_test(tc_basic, test_subordinate_suspend); 6409 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend); 6410 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort); 6411 tcase_add_test__ifdef_xml_dtd(tc_basic, 6412 test_ext_entity_invalid_suspended_parse); 6413 tcase_add_test(tc_basic, test_explicit_encoding); 6414 tcase_add_test(tc_basic, test_trailing_cr); 6415 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr); 6416 tcase_add_test(tc_basic, test_trailing_rsqb); 6417 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb); 6418 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata); 6419 tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters); 6420 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter); 6421 tcase_add_test(tc_basic, test_empty_parse); 6422 tcase_add_test(tc_basic, test_negative_len_parse); 6423 tcase_add_test(tc_basic, test_negative_len_parse_buffer); 6424 tcase_add_test(tc_basic, test_get_buffer_1); 6425 tcase_add_test(tc_basic, test_get_buffer_2); 6426 #if XML_CONTEXT_BYTES > 0 6427 tcase_add_test(tc_basic, test_get_buffer_3_overflow); 6428 #endif 6429 tcase_add_test(tc_basic, test_buffer_can_grow_to_max); 6430 tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len); 6431 tcase_add_test(tc_basic, test_byte_info_at_end); 6432 tcase_add_test(tc_basic, test_byte_info_at_error); 6433 tcase_add_test(tc_basic, test_byte_info_at_cdata); 6434 tcase_add_test(tc_basic, test_predefined_entities); 6435 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd); 6436 tcase_add_test(tc_basic, test_not_predefined_entities); 6437 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section); 6438 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16); 6439 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be); 6440 tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section); 6441 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed); 6442 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values); 6443 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone); 6444 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort); 6445 tcase_add_test(tc_basic, test_bad_public_doctype); 6446 tcase_add_test(tc_basic, test_attribute_enum_value); 6447 tcase_add_test(tc_basic, test_predefined_entity_redefinition); 6448 tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing); 6449 tcase_add_test(tc_basic, test_public_notation_no_sysid); 6450 tcase_add_test(tc_basic, test_nested_groups); 6451 tcase_add_test(tc_basic, test_group_choice); 6452 tcase_add_test(tc_basic, test_standalone_parameter_entity); 6453 tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity); 6454 tcase_add_test__ifdef_xml_dtd(tc_basic, 6455 test_recursive_external_parameter_entity); 6456 tcase_add_test__ifdef_xml_dtd(tc_basic, 6457 test_recursive_external_parameter_entity_2); 6458 tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd); 6459 tcase_add_test(tc_basic, test_suspend_xdecl); 6460 tcase_add_test(tc_basic, test_abort_epilog); 6461 tcase_add_test(tc_basic, test_abort_epilog_2); 6462 tcase_add_test(tc_basic, test_suspend_epilog); 6463 tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag); 6464 tcase_add_test(tc_basic, test_unfinished_epilog); 6465 tcase_add_test(tc_basic, test_partial_char_in_epilog); 6466 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity); 6467 tcase_add_test__ifdef_xml_dtd(tc_basic, 6468 test_suspend_resume_internal_entity_issue_629); 6469 tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error); 6470 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity); 6471 tcase_add_test(tc_basic, test_restart_on_error); 6472 tcase_add_test(tc_basic, test_reject_lt_in_attribute_value); 6473 tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value); 6474 tcase_add_test(tc_basic, test_trailing_cr_in_att_value); 6475 tcase_add_test(tc_basic, test_standalone_internal_entity); 6476 tcase_add_test(tc_basic, test_skipped_external_entity); 6477 tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity); 6478 tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity); 6479 tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr); 6480 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity); 6481 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2); 6482 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3); 6483 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4); 6484 tcase_add_test(tc_basic, test_pi_handled_in_default); 6485 tcase_add_test(tc_basic, test_comment_handled_in_default); 6486 tcase_add_test(tc_basic, test_pi_yml); 6487 tcase_add_test(tc_basic, test_pi_xnl); 6488 tcase_add_test(tc_basic, test_pi_xmm); 6489 tcase_add_test(tc_basic, test_utf16_pi); 6490 tcase_add_test(tc_basic, test_utf16_be_pi); 6491 tcase_add_test(tc_basic, test_utf16_be_comment); 6492 tcase_add_test(tc_basic, test_utf16_le_comment); 6493 tcase_add_test(tc_basic, test_missing_encoding_conversion_fn); 6494 tcase_add_test(tc_basic, test_failing_encoding_conversion_fn); 6495 tcase_add_test(tc_basic, test_unknown_encoding_success); 6496 tcase_add_test(tc_basic, test_unknown_encoding_bad_name); 6497 tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2); 6498 tcase_add_test(tc_basic, test_unknown_encoding_long_name_1); 6499 tcase_add_test(tc_basic, test_unknown_encoding_long_name_2); 6500 tcase_add_test(tc_basic, test_invalid_unknown_encoding); 6501 tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok); 6502 tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail); 6503 tcase_add_test(tc_basic, test_unknown_encoding_invalid_length); 6504 tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit); 6505 tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate); 6506 tcase_add_test(tc_basic, test_unknown_encoding_invalid_high); 6507 tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value); 6508 tcase_add_test(tc_basic, test_unknown_encoding_user_data_primary); 6509 tcase_add_test(tc_basic, test_unknown_encoding_user_data_secondary); 6510 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom); 6511 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom); 6512 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2); 6513 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2); 6514 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be); 6515 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le); 6516 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown); 6517 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom); 6518 tcase_add_test(tc_basic, test_utf8_in_cdata_section); 6519 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2); 6520 tcase_add_test(tc_basic, test_utf8_in_start_tags); 6521 tcase_add_test(tc_basic, test_trailing_spaces_in_elements); 6522 tcase_add_test(tc_basic, test_utf16_attribute); 6523 tcase_add_test(tc_basic, test_utf16_second_attr); 6524 tcase_add_test(tc_basic, test_attr_after_solidus); 6525 tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe); 6526 tcase_add_test(tc_basic, test_bad_attr_desc_keyword); 6527 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16); 6528 tcase_add_test(tc_basic, test_bad_doctype); 6529 tcase_add_test(tc_basic, test_bad_doctype_utf8); 6530 tcase_add_test(tc_basic, test_bad_doctype_utf16); 6531 tcase_add_test(tc_basic, test_bad_doctype_plus); 6532 tcase_add_test(tc_basic, test_bad_doctype_star); 6533 tcase_add_test(tc_basic, test_bad_doctype_query); 6534 tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore); 6535 tcase_add_test(tc_basic, test_entity_in_utf16_be_attr); 6536 tcase_add_test(tc_basic, test_entity_in_utf16_le_attr); 6537 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be); 6538 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le); 6539 tcase_add_test(tc_basic, test_short_doctype); 6540 tcase_add_test(tc_basic, test_short_doctype_2); 6541 tcase_add_test(tc_basic, test_short_doctype_3); 6542 tcase_add_test(tc_basic, test_long_doctype); 6543 tcase_add_test(tc_basic, test_bad_entity); 6544 tcase_add_test(tc_basic, test_bad_entity_2); 6545 tcase_add_test(tc_basic, test_bad_entity_3); 6546 tcase_add_test(tc_basic, test_bad_entity_4); 6547 tcase_add_test(tc_basic, test_bad_notation); 6548 tcase_add_test(tc_basic, test_default_doctype_handler); 6549 tcase_add_test(tc_basic, test_empty_element_abort); 6550 tcase_add_test__ifdef_xml_dtd(tc_basic, 6551 test_pool_integrity_with_unfinished_attr); 6552 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_ext_param_entity_in_value); 6553 tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements); 6554 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity); 6555 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity); 6556 tcase_add_test__if_xml_ge(tc_basic, 6557 test_deep_nested_entity_delayed_interpretation); 6558 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend); 6559 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2); 6560 tcase_add_test(tc_basic, test_big_tokens_scale_linearly); 6561 tcase_add_test(tc_basic, test_set_reparse_deferral); 6562 tcase_add_test(tc_basic, test_reparse_deferral_is_inherited); 6563 tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser); 6564 tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly); 6565 tcase_add_test(tc_basic, test_set_bad_reparse_option); 6566 tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize); 6567 tcase_add_test(tc_basic, test_varying_buffer_fills); 6568 } 6569