1 /* Tests in the "basic" test case for the Expat test suite 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> 11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> 13 Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> 14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 15 Copyright (c) 2017 Joe Orton <jorton@redhat.com> 16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> 17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com> 20 Copyright (c) 2021 Donghee Na <donghee.na@python.org> 21 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com> 22 Copyright (c) 2024-2026 Berkay Eren Ürün <berkay.ueruen@siemens.com> 23 Copyright (c) 2026 Francesco Bertolaccini 24 Copyright (c) 2026 Matthew Fernandez <matthew.fernandez@gmail.com> 25 Licensed under the MIT license: 26 27 Permission is hereby granted, free of charge, to any person obtaining 28 a copy of this software and associated documentation files (the 29 "Software"), to deal in the Software without restriction, including 30 without limitation the rights to use, copy, modify, merge, publish, 31 distribute, sublicense, and/or sell copies of the Software, and to permit 32 persons to whom the Software is furnished to do so, subject to the 33 following conditions: 34 35 The above copyright notice and this permission notice shall be included 36 in all copies or substantial portions of the Software. 37 38 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 39 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 40 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 41 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 42 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 43 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 44 USE OR OTHER DEALINGS IN THE SOFTWARE. 45 */ 46 47 #if defined(NDEBUG) 48 # undef NDEBUG /* because test suite relies on assert(...) at the moment */ 49 #endif 50 51 #include "expat_config.h" 52 53 #include <assert.h> 54 55 #include <stdbool.h> 56 #include <stdio.h> 57 #include <string.h> 58 #include <time.h> 59 60 #include "expat.h" 61 #include "internal.h" 62 #include "minicheck.h" 63 #include "structdata.h" 64 #include "common.h" 65 #include "dummy.h" 66 #include "handlers.h" 67 #include "siphash.h" 68 #include "basic_tests.h" 69 70 static void 71 basic_setup(void) { 72 g_parser = XML_ParserCreate(NULL); 73 if (g_parser == NULL) 74 fail("Parser not created."); 75 } 76 77 /* 78 * Character & encoding tests. 79 */ 80 81 START_TEST(test_nul_byte) { 82 char text[] = "<doc>\0</doc>"; 83 84 /* test that a NUL byte (in US-ASCII data) is an error */ 85 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 86 == XML_STATUS_OK) 87 fail("Parser did not report error on NUL-byte."); 88 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 89 xml_failure(g_parser); 90 } 91 END_TEST 92 93 START_TEST(test_u0000_char) { 94 /* test that a NUL byte (in US-ASCII data) is an error */ 95 expect_failure("<doc>�</doc>", XML_ERROR_BAD_CHAR_REF, 96 "Parser did not report error on NUL-byte."); 97 } 98 END_TEST 99 100 START_TEST(test_siphash_self) { 101 if (! sip24_valid()) 102 fail("SipHash self-test failed"); 103 } 104 END_TEST 105 106 START_TEST(test_siphash_spec) { 107 /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */ 108 const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" 109 "\x0a\x0b\x0c\x0d\x0e"; 110 const size_t len = sizeof(message) - 1; 111 const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U); 112 struct siphash state; 113 struct sipkey key; 114 115 sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" 116 "\x0a\x0b\x0c\x0d\x0e\x0f"); 117 sip24_init(&state, &key); 118 119 /* Cover spread across calls */ 120 sip24_update(&state, message, 4); 121 sip24_update(&state, message + 4, len - 4); 122 123 /* Cover null length */ 124 sip24_update(&state, message, 0); 125 126 if (sip24_final(&state) != expected) 127 fail("sip24_final failed spec test\n"); 128 129 /* Cover wrapper */ 130 if (siphash24(message, len, &key) != expected) 131 fail("siphash24 failed spec test\n"); 132 } 133 END_TEST 134 135 START_TEST(test_bom_utf8) { 136 /* This test is really just making sure we don't core on a UTF-8 BOM. */ 137 const char *text = "\357\273\277<e/>"; 138 139 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 140 == XML_STATUS_ERROR) 141 xml_failure(g_parser); 142 } 143 END_TEST 144 145 START_TEST(test_bom_utf16_be) { 146 char text[] = "\376\377\0<\0e\0/\0>"; 147 148 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 149 == XML_STATUS_ERROR) 150 xml_failure(g_parser); 151 } 152 END_TEST 153 154 START_TEST(test_bom_utf16_le) { 155 char text[] = "\377\376<\0e\0/\0>\0"; 156 157 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 158 == XML_STATUS_ERROR) 159 xml_failure(g_parser); 160 } 161 END_TEST 162 163 START_TEST(test_nobom_utf16_le) { 164 char text[] = " \0<\0e\0/\0>\0"; 165 166 if (g_chunkSize == 1) { 167 // TODO: with just the first byte, we can't tell the difference between 168 // UTF-16-LE and UTF-8. Avoid the failure for now. 169 return; 170 } 171 172 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 173 == XML_STATUS_ERROR) 174 xml_failure(g_parser); 175 } 176 END_TEST 177 178 START_TEST(test_hash_collision) { 179 /* For full coverage of the lookup routine, we need to ensure a 180 * hash collision even though we can only tell that we have one 181 * through breakpoint debugging or coverage statistics. The 182 * following will cause a hash collision on machines with a 64-bit 183 * long type; others will have to experiment. The full coverage 184 * tests invoked from qa.sh usually provide a hash collision, but 185 * not always. This is an attempt to provide insurance. 186 */ 187 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U) 188 const char *text 189 = "<doc>\n" 190 "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n" 191 "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n" 192 "<b5></b5><b6></b6><b7></b7><b8></b8>\n" 193 "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n" 194 "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n" 195 "<d8>This triggers the table growth and collides with b2</d8>\n" 196 "</doc>\n"; 197 198 XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT); 199 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 200 == XML_STATUS_ERROR) 201 xml_failure(g_parser); 202 } 203 END_TEST 204 #undef COLLIDING_HASH_SALT 205 206 START_TEST(test_hash_salt_setter) { 207 const uint8_t entropy[16] = {'0', '1', '2', '3', '4', '5', '6', '7', 208 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; 209 XML_Parser parser = XML_ParserCreate(NULL); 210 211 // NULL parser should be rejected 212 assert_true(XML_SetHashSalt16Bytes(NULL, entropy) == XML_FALSE); 213 214 // NULL entropy should be rejected 215 assert_true(XML_SetHashSalt16Bytes(parser, NULL) == XML_FALSE); 216 217 // Setting should be allowed more than once 218 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE); 219 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE); 220 221 // But not after parsing has started 222 assert_true(XML_Parse(parser, "", 0, XML_FALSE /* isFinal */) 223 == XML_STATUS_OK); 224 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_FALSE); 225 226 XML_ParserFree(parser); 227 } 228 END_TEST 229 230 /* Regression test for SF bug #491986. */ 231 START_TEST(test_danish_latin1) { 232 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 233 "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>"; 234 #ifdef XML_UNICODE 235 const XML_Char *expected 236 = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5"); 237 #else 238 const XML_Char *expected 239 = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85"); 240 #endif 241 run_character_check(text, expected); 242 } 243 END_TEST 244 245 /* Regression test for SF bug #514281. */ 246 START_TEST(test_french_charref_hexidecimal) { 247 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 248 "<doc>éèàçêÈ</doc>"; 249 #ifdef XML_UNICODE 250 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 251 #else 252 const XML_Char *expected 253 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 254 #endif 255 run_character_check(text, expected); 256 } 257 END_TEST 258 259 START_TEST(test_french_charref_decimal) { 260 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 261 "<doc>éèàçêÈ</doc>"; 262 #ifdef XML_UNICODE 263 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 264 #else 265 const XML_Char *expected 266 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 267 #endif 268 run_character_check(text, expected); 269 } 270 END_TEST 271 272 START_TEST(test_french_latin1) { 273 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 274 "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>"; 275 #ifdef XML_UNICODE 276 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 277 #else 278 const XML_Char *expected 279 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 280 #endif 281 run_character_check(text, expected); 282 } 283 END_TEST 284 285 START_TEST(test_french_utf8) { 286 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n" 287 "<doc>\xC3\xA9</doc>"; 288 #ifdef XML_UNICODE 289 const XML_Char *expected = XCS("\x00e9"); 290 #else 291 const XML_Char *expected = XCS("\xC3\xA9"); 292 #endif 293 run_character_check(text, expected); 294 } 295 END_TEST 296 297 /* Regression test for SF bug #600479. 298 XXX There should be a test that exercises all legal XML Unicode 299 characters as PCDATA and attribute value content, and XML Name 300 characters as part of element and attribute names. 301 */ 302 START_TEST(test_utf8_false_rejection) { 303 const char *text = "<doc>\xEF\xBA\xBF</doc>"; 304 #ifdef XML_UNICODE 305 const XML_Char *expected = XCS("\xfebf"); 306 #else 307 const XML_Char *expected = XCS("\xEF\xBA\xBF"); 308 #endif 309 run_character_check(text, expected); 310 } 311 END_TEST 312 313 /* Regression test for SF bug #477667. 314 This test assures that any 8-bit character followed by a 7-bit 315 character will not be mistakenly interpreted as a valid UTF-8 316 sequence. 317 */ 318 START_TEST(test_illegal_utf8) { 319 char text[100]; 320 int i; 321 322 for (i = 128; i <= 255; ++i) { 323 snprintf(text, sizeof(text), "<e>%ccd</e>", i); 324 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 325 == XML_STATUS_OK) { 326 snprintf(text, sizeof(text), 327 "expected token error for '%c' (ordinal %d) in UTF-8 text", i, 328 i); 329 fail(text); 330 } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 331 xml_failure(g_parser); 332 /* Reset the parser since we use the same parser repeatedly. */ 333 XML_ParserReset(g_parser, NULL); 334 } 335 } 336 END_TEST 337 338 /* Examples, not masks: */ 339 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */ 340 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */ 341 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */ 342 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */ 343 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */ 344 345 START_TEST(test_utf8_auto_align) { 346 struct TestCase { 347 ptrdiff_t expectedMovementInChars; 348 const char *input; 349 }; 350 351 struct TestCase cases[] = { 352 {00, ""}, 353 354 {00, UTF8_LEAD_1}, 355 356 {-1, UTF8_LEAD_2}, 357 {00, UTF8_LEAD_2 UTF8_FOLLOW}, 358 359 {-1, UTF8_LEAD_3}, 360 {-2, UTF8_LEAD_3 UTF8_FOLLOW}, 361 {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW}, 362 363 {-1, UTF8_LEAD_4}, 364 {-2, UTF8_LEAD_4 UTF8_FOLLOW}, 365 {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW}, 366 {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW}, 367 }; 368 369 size_t i = 0; 370 bool success = true; 371 for (; i < sizeof(cases) / sizeof(*cases); i++) { 372 const char *fromLim = cases[i].input + strlen(cases[i].input); 373 const char *const fromLimInitially = fromLim; 374 ptrdiff_t actualMovementInChars; 375 376 _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim); 377 378 actualMovementInChars = (fromLim - fromLimInitially); 379 if (actualMovementInChars != cases[i].expectedMovementInChars) { 380 size_t j = 0; 381 success = false; 382 printf("[-] UTF-8 case %2u: Expected movement by %2d chars" 383 ", actually moved by %2d chars: \"", 384 (unsigned)(i + 1), (int)cases[i].expectedMovementInChars, 385 (int)actualMovementInChars); 386 for (; j < strlen(cases[i].input); j++) { 387 printf("\\x%02x", (unsigned char)cases[i].input[j]); 388 } 389 printf("\"\n"); 390 } 391 } 392 393 if (! success) { 394 fail("UTF-8 auto-alignment is not bullet-proof\n"); 395 } 396 } 397 END_TEST 398 399 START_TEST(test_utf16) { 400 /* <?xml version="1.0" encoding="UTF-16"?> 401 * <doc a='123'>some {A} text</doc> 402 * 403 * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A 404 */ 405 char text[] 406 = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o" 407 "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o" 408 "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066" 409 "\000'\000?\000>\000\n" 410 "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>" 411 "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000" 412 "<\000/\000d\000o\000c\000>"; 413 #ifdef XML_UNICODE 414 const XML_Char *expected = XCS("some \xff21 text"); 415 #else 416 const XML_Char *expected = XCS("some \357\274\241 text"); 417 #endif 418 CharData storage; 419 420 CharData_Init(&storage); 421 XML_SetUserData(g_parser, &storage); 422 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 423 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 424 == XML_STATUS_ERROR) 425 xml_failure(g_parser); 426 CharData_CheckXMLChars(&storage, expected); 427 } 428 END_TEST 429 430 START_TEST(test_utf16_le_epilog_newline) { 431 unsigned int first_chunk_bytes = 17; 432 char text[] = "\xFF\xFE" /* BOM */ 433 "<\000e\000/\000>\000" /* document element */ 434 "\r\000\n\000\r\000\n\000"; /* epilog */ 435 436 if (first_chunk_bytes >= sizeof(text) - 1) 437 fail("bad value of first_chunk_bytes"); 438 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)first_chunk_bytes, XML_FALSE) 439 == XML_STATUS_ERROR) 440 xml_failure(g_parser); 441 else { 442 enum XML_Status rc; 443 rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes, 444 (int)(sizeof(text) - first_chunk_bytes - 1), 445 XML_TRUE); 446 if (rc == XML_STATUS_ERROR) 447 xml_failure(g_parser); 448 } 449 } 450 END_TEST 451 452 /* Test that an outright lie in the encoding is faulted */ 453 START_TEST(test_not_utf16) { 454 const char *text = "<?xml version='1.0' encoding='utf-16'?>" 455 "<doc>Hi</doc>"; 456 457 /* Use a handler to provoke the appropriate code paths */ 458 XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler); 459 expect_failure(text, XML_ERROR_INCORRECT_ENCODING, 460 "UTF-16 declared in UTF-8 not faulted"); 461 } 462 END_TEST 463 464 /* Test that an unknown encoding is rejected */ 465 START_TEST(test_bad_encoding) { 466 const char *text = "<doc>Hi</doc>"; 467 468 if (! XML_SetEncoding(g_parser, XCS("unknown-encoding"))) 469 fail("XML_SetEncoding failed"); 470 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 471 "Unknown encoding not faulted"); 472 } 473 END_TEST 474 475 /* Regression test for SF bug #481609, #774028. */ 476 START_TEST(test_latin1_umlauts) { 477 const char *text 478 = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 479 "<e a='\xE4 \xF6 \xFC ä ö ü ä ö ü >'\n" 480 " >\xE4 \xF6 \xFC ä ö ü ä ö ü ></e>"; 481 #ifdef XML_UNICODE 482 /* Expected results in UTF-16 */ 483 const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ") 484 XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >"); 485 #else 486 /* Expected results in UTF-8 */ 487 const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") 488 XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >"); 489 #endif 490 491 run_character_check(text, expected); 492 XML_ParserReset(g_parser, NULL); 493 run_attribute_check(text, expected); 494 /* Repeat with a default handler */ 495 XML_ParserReset(g_parser, NULL); 496 XML_SetDefaultHandler(g_parser, dummy_default_handler); 497 run_character_check(text, expected); 498 XML_ParserReset(g_parser, NULL); 499 XML_SetDefaultHandler(g_parser, dummy_default_handler); 500 run_attribute_check(text, expected); 501 } 502 END_TEST 503 504 /* Test that an element name with a 4-byte UTF-8 character is rejected */ 505 START_TEST(test_long_utf8_character) { 506 const char *text 507 = "<?xml version='1.0' encoding='utf-8'?>\n" 508 /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */ 509 "<do\xf0\x90\x80\x80/>"; 510 expect_failure(text, XML_ERROR_INVALID_TOKEN, 511 "4-byte UTF-8 character in element name not faulted"); 512 } 513 END_TEST 514 515 /* Test that a long latin-1 attribute (too long to convert in one go) 516 * is correctly converted 517 */ 518 START_TEST(test_long_latin1_attribute) { 519 const char *text 520 = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 521 "<doc att='" 522 /* 64 characters per line */ 523 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 524 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 525 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 526 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 527 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 528 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 529 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 530 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 531 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 532 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 533 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 534 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 535 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 536 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 537 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 538 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" 539 /* Last character splits across a buffer boundary */ 540 "\xe4'>\n</doc>"; 541 542 const XML_Char *expected = 543 /* 64 characters per line */ 544 /* clang-format off */ 545 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 546 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 547 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 548 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 549 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 550 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 551 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 552 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 553 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 554 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 555 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 556 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 557 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 558 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 559 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 560 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO") 561 /* clang-format on */ 562 #ifdef XML_UNICODE 563 XCS("\x00e4"); 564 #else 565 XCS("\xc3\xa4"); 566 #endif 567 568 run_attribute_check(text, expected); 569 } 570 END_TEST 571 572 /* Test that a long ASCII attribute (too long to convert in one go) 573 * is correctly converted 574 */ 575 START_TEST(test_long_ascii_attribute) { 576 const char *text 577 = "<?xml version='1.0' encoding='us-ascii'?>\n" 578 "<doc att='" 579 /* 64 characters per line */ 580 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 581 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 582 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 583 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 584 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 585 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 586 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 587 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 588 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 589 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 590 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 591 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 592 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 593 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 594 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 595 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 596 "01234'>\n</doc>"; 597 const XML_Char *expected = 598 /* 64 characters per line */ 599 /* clang-format off */ 600 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 601 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 602 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 603 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 604 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 605 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 606 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 607 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 608 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 609 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 610 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 611 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 612 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 613 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 614 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 615 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 616 XCS("01234"); 617 /* clang-format on */ 618 619 run_attribute_check(text, expected); 620 } 621 END_TEST 622 623 /* Regression test #1 for SF bug #653180. */ 624 START_TEST(test_line_number_after_parse) { 625 const char *text = "<tag>\n" 626 "\n" 627 "\n</tag>"; 628 XML_Size lineno; 629 630 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 631 == XML_STATUS_ERROR) 632 xml_failure(g_parser); 633 lineno = XML_GetCurrentLineNumber(g_parser); 634 if (lineno != 4) { 635 char buffer[100]; 636 snprintf(buffer, sizeof(buffer), 637 "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno); 638 fail(buffer); 639 } 640 } 641 END_TEST 642 643 /* Regression test #2 for SF bug #653180. */ 644 START_TEST(test_column_number_after_parse) { 645 const char *text = "<tag></tag>"; 646 XML_Size colno; 647 648 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 649 == XML_STATUS_ERROR) 650 xml_failure(g_parser); 651 colno = XML_GetCurrentColumnNumber(g_parser); 652 if (colno != 11) { 653 char buffer[100]; 654 snprintf(buffer, sizeof(buffer), 655 "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno); 656 fail(buffer); 657 } 658 } 659 END_TEST 660 661 /* Regression test #3 for SF bug #653180. */ 662 START_TEST(test_line_and_column_numbers_inside_handlers) { 663 const char *text = "<a>\n" /* Unix end-of-line */ 664 " <b>\r\n" /* Windows end-of-line */ 665 " <c/>\r" /* Mac OS end-of-line */ 666 " </b>\n" 667 " <d>\n" 668 " <f/>\n" 669 " </d>\n" 670 "</a>"; 671 const StructDataEntry expected[] 672 = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG}, 673 {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG}, 674 {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG}, 675 {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG}, 676 {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}}; 677 const int expected_count = sizeof(expected) / sizeof(StructDataEntry); 678 StructData storage; 679 680 StructData_Init(&storage); 681 XML_SetUserData(g_parser, &storage); 682 XML_SetStartElementHandler(g_parser, start_element_event_handler2); 683 XML_SetEndElementHandler(g_parser, end_element_event_handler2); 684 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 685 == XML_STATUS_ERROR) 686 xml_failure(g_parser); 687 688 StructData_CheckItems(&storage, expected, expected_count); 689 StructData_Dispose(&storage); 690 } 691 END_TEST 692 693 /* Regression test #4 for SF bug #653180. */ 694 START_TEST(test_line_number_after_error) { 695 const char *text = "<a>\n" 696 " <b>\n" 697 " </a>"; /* missing </b> */ 698 XML_Size lineno; 699 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 700 != XML_STATUS_ERROR) 701 fail("Expected a parse error"); 702 703 lineno = XML_GetCurrentLineNumber(g_parser); 704 if (lineno != 3) { 705 char buffer[100]; 706 snprintf(buffer, sizeof(buffer), 707 "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno); 708 fail(buffer); 709 } 710 } 711 END_TEST 712 713 /* Regression test #5 for SF bug #653180. */ 714 START_TEST(test_column_number_after_error) { 715 const char *text = "<a>\n" 716 " <b>\n" 717 " </a>"; /* missing </b> */ 718 XML_Size colno; 719 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 720 != XML_STATUS_ERROR) 721 fail("Expected a parse error"); 722 723 colno = XML_GetCurrentColumnNumber(g_parser); 724 if (colno != 4) { 725 char buffer[100]; 726 snprintf(buffer, sizeof(buffer), 727 "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno); 728 fail(buffer); 729 } 730 } 731 END_TEST 732 733 /* Regression test for SF bug #478332. */ 734 START_TEST(test_really_long_lines) { 735 /* This parses an input line longer than INIT_DATA_BUF_SIZE 736 characters long (defined to be 1024 in xmlparse.c). We take a 737 really cheesy approach to building the input buffer, because 738 this avoids writing bugs in buffer-filling code. 739 */ 740 const char *text 741 = "<e>" 742 /* 64 chars */ 743 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 744 /* until we have at least 1024 characters on the line: */ 745 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 746 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 747 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 748 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 749 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 750 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 751 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 752 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 753 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 754 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 755 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 756 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 757 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 758 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 759 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 760 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 761 "</e>"; 762 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 763 == XML_STATUS_ERROR) 764 xml_failure(g_parser); 765 } 766 END_TEST 767 768 /* Test cdata processing across a buffer boundary */ 769 START_TEST(test_really_long_encoded_lines) { 770 /* As above, except that we want to provoke an output buffer 771 * overflow with a non-trivial encoding. For this we need to pass 772 * the whole cdata in one go, not byte-by-byte. 773 */ 774 void *buffer; 775 const char *text 776 = "<?xml version='1.0' encoding='iso-8859-1'?>" 777 "<e>" 778 /* 64 chars */ 779 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 780 /* until we have at least 1024 characters on the line: */ 781 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 782 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 783 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 784 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 785 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 786 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 787 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 788 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 789 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 790 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 791 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 792 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 793 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 794 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 795 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 796 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 797 "</e>"; 798 int parse_len = (int)strlen(text); 799 800 /* Need a cdata handler to provoke the code path we want to test */ 801 XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler); 802 buffer = XML_GetBuffer(g_parser, parse_len); 803 if (buffer == NULL) 804 fail("Could not allocate parse buffer"); 805 assert(buffer != NULL); 806 memcpy(buffer, text, parse_len); 807 if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR) 808 xml_failure(g_parser); 809 } 810 END_TEST 811 812 /* 813 * Element event tests. 814 */ 815 816 START_TEST(test_end_element_events) { 817 const char *text = "<a><b><c/></b><d><f/></d></a>"; 818 const XML_Char *expected = XCS("/c/b/f/d/a"); 819 CharData storage; 820 821 CharData_Init(&storage); 822 XML_SetUserData(g_parser, &storage); 823 XML_SetEndElementHandler(g_parser, end_element_event_handler); 824 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 825 == XML_STATUS_ERROR) 826 xml_failure(g_parser); 827 CharData_CheckXMLChars(&storage, expected); 828 } 829 END_TEST 830 831 /* 832 * Attribute tests. 833 */ 834 835 /* Helper used by the following tests; this checks any "attr" and "refs" 836 attributes to make sure whitespace has been normalized. 837 838 Return true if whitespace has been normalized in a string, using 839 the rules for attribute value normalization. The 'is_cdata' flag 840 is needed since CDATA attributes don't need to have multiple 841 whitespace characters collapsed to a single space, while other 842 attribute data types do. (Section 3.3.3 of the recommendation.) 843 */ 844 static int 845 is_whitespace_normalized(const XML_Char *s, int is_cdata) { 846 int blanks = 0; 847 int at_start = 1; 848 while (*s) { 849 if (*s == XCS(' ')) 850 ++blanks; 851 else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r')) 852 return 0; 853 else { 854 if (at_start) { 855 at_start = 0; 856 if (blanks && ! is_cdata) 857 /* illegal leading blanks */ 858 return 0; 859 } else if (blanks > 1 && ! is_cdata) 860 return 0; 861 blanks = 0; 862 } 863 ++s; 864 } 865 if (blanks && ! is_cdata) 866 return 0; 867 return 1; 868 } 869 870 /* Check the attribute whitespace checker: */ 871 START_TEST(test_helper_is_whitespace_normalized) { 872 assert(is_whitespace_normalized(XCS("abc"), 0)); 873 assert(is_whitespace_normalized(XCS("abc"), 1)); 874 assert(is_whitespace_normalized(XCS("abc def ghi"), 0)); 875 assert(is_whitespace_normalized(XCS("abc def ghi"), 1)); 876 assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0)); 877 assert(is_whitespace_normalized(XCS(" abc def ghi"), 1)); 878 assert(! is_whitespace_normalized(XCS("abc def ghi"), 0)); 879 assert(is_whitespace_normalized(XCS("abc def ghi"), 1)); 880 assert(! is_whitespace_normalized(XCS("abc def ghi "), 0)); 881 assert(is_whitespace_normalized(XCS("abc def ghi "), 1)); 882 assert(! is_whitespace_normalized(XCS(" "), 0)); 883 assert(is_whitespace_normalized(XCS(" "), 1)); 884 assert(! is_whitespace_normalized(XCS("\t"), 0)); 885 assert(! is_whitespace_normalized(XCS("\t"), 1)); 886 assert(! is_whitespace_normalized(XCS("\n"), 0)); 887 assert(! is_whitespace_normalized(XCS("\n"), 1)); 888 assert(! is_whitespace_normalized(XCS("\r"), 0)); 889 assert(! is_whitespace_normalized(XCS("\r"), 1)); 890 assert(! is_whitespace_normalized(XCS("abc\t def"), 1)); 891 } 892 END_TEST 893 894 static void XMLCALL 895 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name, 896 const XML_Char **atts) { 897 int i; 898 UNUSED_P(userData); 899 UNUSED_P(name); 900 for (i = 0; atts[i] != NULL; i += 2) { 901 const XML_Char *attrname = atts[i]; 902 const XML_Char *value = atts[i + 1]; 903 if (xcstrcmp(XCS("attr"), attrname) == 0 904 || xcstrcmp(XCS("ents"), attrname) == 0 905 || xcstrcmp(XCS("refs"), attrname) == 0) { 906 if (! is_whitespace_normalized(value, 0)) { 907 char buffer[256]; 908 snprintf(buffer, sizeof(buffer), 909 "attribute value not normalized: %" XML_FMT_STR 910 "='%" XML_FMT_STR "'", 911 attrname, value); 912 fail(buffer); 913 } 914 } 915 } 916 } 917 918 START_TEST(test_attr_whitespace_normalization) { 919 const char *text 920 = "<!DOCTYPE doc [\n" 921 " <!ATTLIST doc\n" 922 " attr NMTOKENS #REQUIRED\n" 923 " ents ENTITIES #REQUIRED\n" 924 " refs IDREFS #REQUIRED>\n" 925 "]>\n" 926 "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n" 927 " ents=' ent-1 \t\r\n" 928 " ent-2 ' >\n" 929 " <e id='id-1'/>\n" 930 " <e id='id-2'/>\n" 931 "</doc>"; 932 933 XML_SetStartElementHandler(g_parser, 934 check_attr_contains_normalized_whitespace); 935 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 936 == XML_STATUS_ERROR) 937 xml_failure(g_parser); 938 } 939 END_TEST 940 941 /* 942 * XML declaration tests. 943 */ 944 945 START_TEST(test_xmldecl_misplaced) { 946 expect_failure("\n" 947 "<?xml version='1.0'?>\n" 948 "<a/>", 949 XML_ERROR_MISPLACED_XML_PI, 950 "failed to report misplaced XML declaration"); 951 } 952 END_TEST 953 954 START_TEST(test_xmldecl_invalid) { 955 expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL, 956 "Failed to report invalid XML declaration"); 957 } 958 END_TEST 959 960 START_TEST(test_xmldecl_missing_attr) { 961 expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL, 962 "Failed to report missing XML declaration attribute"); 963 } 964 END_TEST 965 966 START_TEST(test_xmldecl_missing_value) { 967 expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n" 968 "<doc/>", 969 XML_ERROR_XML_DECL, 970 "Failed to report missing attribute value"); 971 } 972 END_TEST 973 974 /* Regression test for SF bug #584832. */ 975 START_TEST(test_unknown_encoding_internal_entity) { 976 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n" 977 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n" 978 "<test a='&foo;'/>"; 979 980 XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL); 981 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 982 == XML_STATUS_ERROR) 983 xml_failure(g_parser); 984 } 985 END_TEST 986 987 /* Test unrecognised encoding handler */ 988 START_TEST(test_unrecognised_encoding_internal_entity) { 989 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n" 990 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n" 991 "<test a='&foo;'/>"; 992 993 XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL); 994 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 995 != XML_STATUS_ERROR) 996 fail("Unrecognised encoding not rejected"); 997 } 998 END_TEST 999 1000 /* Regression test for SF bug #620106. */ 1001 START_TEST(test_ext_entity_set_encoding) { 1002 const char *text = "<!DOCTYPE doc [\n" 1003 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1004 "]>\n" 1005 "<doc>&en;</doc>"; 1006 ExtTest test_data 1007 = {/* This text says it's an unsupported encoding, but it's really 1008 UTF-8, which we tell Expat using XML_SetEncoding(). 1009 */ 1010 "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL}; 1011 #ifdef XML_UNICODE 1012 const XML_Char *expected = XCS("\x00e9"); 1013 #else 1014 const XML_Char *expected = XCS("\xc3\xa9"); 1015 #endif 1016 1017 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1018 run_ext_character_check(text, &test_data, expected); 1019 } 1020 END_TEST 1021 1022 /* Test external entities with no handler */ 1023 START_TEST(test_ext_entity_no_handler) { 1024 const char *text = "<!DOCTYPE doc [\n" 1025 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1026 "]>\n" 1027 "<doc>&en;</doc>"; 1028 1029 XML_SetDefaultHandler(g_parser, dummy_default_handler); 1030 run_character_check(text, XCS("")); 1031 } 1032 END_TEST 1033 1034 /* Test UTF-8 BOM is accepted */ 1035 START_TEST(test_ext_entity_set_bom) { 1036 const char *text = "<!DOCTYPE doc [\n" 1037 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1038 "]>\n" 1039 "<doc>&en;</doc>"; 1040 ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */ 1041 "<?xml encoding='iso-8859-3'?>" 1042 "\xC3\xA9", 1043 XCS("utf-8"), NULL}; 1044 #ifdef XML_UNICODE 1045 const XML_Char *expected = XCS("\x00e9"); 1046 #else 1047 const XML_Char *expected = XCS("\xc3\xa9"); 1048 #endif 1049 1050 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1051 run_ext_character_check(text, &test_data, expected); 1052 } 1053 END_TEST 1054 1055 /* Test that bad encodings are faulted */ 1056 START_TEST(test_ext_entity_bad_encoding) { 1057 const char *text = "<!DOCTYPE doc [\n" 1058 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1059 "]>\n" 1060 "<doc>&en;</doc>"; 1061 ExtFaults fault 1062 = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted", 1063 XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING}; 1064 1065 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1066 XML_SetUserData(g_parser, &fault); 1067 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1068 "Bad encoding should not have been accepted"); 1069 } 1070 END_TEST 1071 1072 /* Try handing an invalid encoding to an external entity parser */ 1073 START_TEST(test_ext_entity_bad_encoding_2) { 1074 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1075 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1076 "<doc>&entity;</doc>"; 1077 ExtFaults fault 1078 = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted", 1079 XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING}; 1080 1081 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1082 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1083 XML_SetUserData(g_parser, &fault); 1084 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1085 "Bad encoding not faulted in external entity handler"); 1086 } 1087 END_TEST 1088 1089 /* Test that no error is reported for unknown entities if we don't 1090 read an external subset. This was fixed in Expat 1.95.5. 1091 */ 1092 START_TEST(test_wfc_undeclared_entity_unread_external_subset) { 1093 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 1094 "<doc>&entity;</doc>"; 1095 1096 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1097 == XML_STATUS_ERROR) 1098 xml_failure(g_parser); 1099 } 1100 END_TEST 1101 1102 /* Test that an error is reported for unknown entities if we don't 1103 have an external subset. 1104 */ 1105 START_TEST(test_wfc_undeclared_entity_no_external_subset) { 1106 expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY, 1107 "Parser did not report undefined entity w/out a DTD."); 1108 } 1109 END_TEST 1110 1111 /* Test that an error is reported for unknown entities if we don't 1112 read an external subset, but have been declared standalone. 1113 */ 1114 START_TEST(test_wfc_undeclared_entity_standalone) { 1115 const char *text 1116 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1117 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1118 "<doc>&entity;</doc>"; 1119 1120 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1121 "Parser did not report undefined entity (standalone)."); 1122 } 1123 END_TEST 1124 1125 /* Test that an error is reported for unknown entities if we have read 1126 an external subset, and standalone is true. 1127 */ 1128 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) { 1129 const char *text 1130 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1131 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1132 "<doc>&entity;</doc>"; 1133 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1134 1135 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1136 XML_SetUserData(g_parser, &test_data); 1137 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1138 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1139 "Parser did not report undefined entity (external DTD)."); 1140 } 1141 END_TEST 1142 1143 /* Test that external entity handling is not done if the parsing flag 1144 * is set to UNLESS_STANDALONE 1145 */ 1146 START_TEST(test_entity_with_external_subset_unless_standalone) { 1147 const char *text 1148 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1149 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1150 "<doc>&entity;</doc>"; 1151 ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL}; 1152 1153 XML_SetParamEntityParsing(g_parser, 1154 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); 1155 XML_SetUserData(g_parser, &test_data); 1156 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1157 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1158 "Parser did not report undefined entity"); 1159 } 1160 END_TEST 1161 1162 /* Test that no error is reported for unknown entities if we have read 1163 an external subset, and standalone is false. 1164 */ 1165 START_TEST(test_wfc_undeclared_entity_with_external_subset) { 1166 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1167 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1168 "<doc>&entity;</doc>"; 1169 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1170 1171 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1172 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1173 run_ext_character_check(text, &test_data, XCS("")); 1174 } 1175 END_TEST 1176 1177 /* Test that an error is reported if our NotStandalone handler fails */ 1178 START_TEST(test_not_standalone_handler_reject) { 1179 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1180 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1181 "<doc>&entity;</doc>"; 1182 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1183 1184 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1185 XML_SetUserData(g_parser, &test_data); 1186 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1187 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 1188 expect_failure(text, XML_ERROR_NOT_STANDALONE, 1189 "NotStandalone handler failed to reject"); 1190 1191 /* Try again but without external entity handling */ 1192 XML_ParserReset(g_parser, NULL); 1193 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 1194 expect_failure(text, XML_ERROR_NOT_STANDALONE, 1195 "NotStandalone handler failed to reject"); 1196 } 1197 END_TEST 1198 1199 /* Test that no error is reported if our NotStandalone handler succeeds */ 1200 START_TEST(test_not_standalone_handler_accept) { 1201 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1202 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1203 "<doc>&entity;</doc>"; 1204 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1205 1206 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1207 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1208 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler); 1209 run_ext_character_check(text, &test_data, XCS("")); 1210 1211 /* Repeat without the external entity handler */ 1212 XML_ParserReset(g_parser, NULL); 1213 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler); 1214 run_character_check(text, XCS("")); 1215 } 1216 END_TEST 1217 1218 START_TEST(test_entity_start_tag_level_greater_than_one) { 1219 const char *const text = "<!DOCTYPE t1 [\n" 1220 " <!ENTITY e1 'hello'>\n" 1221 "]>\n" 1222 "<t1>\n" 1223 " <t2>&e1;</t2>\n" 1224 "</t1>\n"; 1225 1226 XML_Parser parser = XML_ParserCreate(NULL); 1227 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), 1228 /*isFinal*/ XML_TRUE) 1229 == XML_STATUS_OK); 1230 XML_ParserFree(parser); 1231 } 1232 END_TEST 1233 1234 START_TEST(test_wfc_no_recursive_entity_refs) { 1235 const char *text = "<!DOCTYPE doc [\n" 1236 " <!ENTITY entity '&entity;'>\n" 1237 "]>\n" 1238 "<doc>&entity;</doc>"; 1239 1240 expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF, 1241 "Parser did not report recursive entity reference."); 1242 } 1243 END_TEST 1244 1245 START_TEST(test_no_indirectly_recursive_entity_refs) { 1246 struct TestCase { 1247 const char *doc; 1248 bool usesParameterEntities; 1249 }; 1250 1251 const struct TestCase cases[] = { 1252 // general entity + character data 1253 {"<!DOCTYPE a [\n" 1254 " <!ENTITY e1 '&e2;'>\n" 1255 " <!ENTITY e2 '&e1;'>\n" 1256 "]><a>&e2;</a>\n", 1257 false}, 1258 1259 // general entity + attribute value 1260 {"<!DOCTYPE a [\n" 1261 " <!ENTITY e1 '&e2;'>\n" 1262 " <!ENTITY e2 '&e1;'>\n" 1263 "]><a k1='&e2;' />\n", 1264 false}, 1265 1266 // parameter entity 1267 {"<!DOCTYPE doc [\n" 1268 " <!ENTITY % p1 '%p2;'>\n" 1269 " <!ENTITY % p2 '%p1;'>\n" 1270 " <!ENTITY % define_g \"<!ENTITY g '%p2;'>\">\n" 1271 " %define_g;\n" 1272 "]>\n" 1273 "<doc/>\n", 1274 true}, 1275 }; 1276 const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE}; 1277 1278 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { 1279 for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]); 1280 j++) { 1281 const XML_Bool reset_wanted = reset_or_not[j]; 1282 const char *const doc = cases[i].doc; 1283 const bool usesParameterEntities = cases[i].usesParameterEntities; 1284 1285 set_subtest("[%i,reset=%i] %s", (int)i, (int)j, doc); 1286 1287 #ifdef XML_DTD // both GE and DTD 1288 const bool rejection_expected = true; 1289 #elif XML_GE == 1 // GE but not DTD 1290 const bool rejection_expected = ! usesParameterEntities; 1291 #else // neither DTD nor GE 1292 const bool rejection_expected = false; 1293 #endif 1294 1295 XML_Parser parser = XML_ParserCreate(NULL); 1296 1297 #ifdef XML_DTD 1298 if (usesParameterEntities) { 1299 assert_true( 1300 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS) 1301 == 1); 1302 } 1303 #else 1304 UNUSED_P(usesParameterEntities); 1305 #endif // XML_DTD 1306 1307 const enum XML_Status status 1308 = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), 1309 /*isFinal*/ XML_TRUE); 1310 1311 if (rejection_expected) { 1312 assert_true(status == XML_STATUS_ERROR); 1313 assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF); 1314 } else { 1315 assert_true(status == XML_STATUS_OK); 1316 } 1317 1318 if (reset_wanted) { 1319 // This covers free'ing of (eventually) all three open entity lists by 1320 // XML_ParserReset. 1321 XML_ParserReset(parser, NULL); 1322 } 1323 1324 // This covers free'ing of (eventually) all three open entity lists by 1325 // XML_ParserFree (unless XML_ParserReset has already done that above). 1326 XML_ParserFree(parser); 1327 } 1328 } 1329 } 1330 END_TEST 1331 1332 START_TEST(test_recursive_external_parameter_entity_2) { 1333 struct TestCase { 1334 const char *doc; 1335 enum XML_Status expectedStatus; 1336 }; 1337 1338 struct TestCase cases[] = { 1339 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR}, 1340 {"<!ENTITY % p1 '%p1;'>" 1341 "<!ENTITY % p1 'first declaration wins'>", 1342 XML_STATUS_ERROR}, 1343 {"<!ENTITY % p1 'first declaration wins'>" 1344 "<!ENTITY % p1 '%p1;'>", 1345 XML_STATUS_OK}, 1346 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_OK}, 1347 }; 1348 1349 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { 1350 const char *const doc = cases[i].doc; 1351 const enum XML_Status expectedStatus = cases[i].expectedStatus; 1352 set_subtest("%s", doc); 1353 1354 XML_Parser parser = XML_ParserCreate(NULL); 1355 assert_true(parser != NULL); 1356 1357 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); 1358 assert_true(ext_parser != NULL); 1359 1360 const enum XML_Status actualStatus 1361 = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE); 1362 1363 assert_true(actualStatus == expectedStatus); 1364 if (actualStatus != XML_STATUS_OK) { 1365 assert_true(XML_GetErrorCode(ext_parser) 1366 == XML_ERROR_RECURSIVE_ENTITY_REF); 1367 } 1368 1369 XML_ParserFree(ext_parser); 1370 XML_ParserFree(parser); 1371 } 1372 } 1373 END_TEST 1374 1375 /* Test incomplete external entities are faulted */ 1376 START_TEST(test_ext_entity_invalid_parse) { 1377 const char *text = "<!DOCTYPE doc [\n" 1378 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1379 "]>\n" 1380 "<doc>&en;</doc>"; 1381 const ExtFaults faults[] 1382 = {{"<", "Incomplete element declaration not faulted", NULL, 1383 XML_ERROR_UNCLOSED_TOKEN}, 1384 {"<\xe2\x82", /* First two bytes of a three-byte char */ 1385 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, 1386 {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL, 1387 XML_ERROR_PARTIAL_CHAR}, 1388 {NULL, NULL, NULL, XML_ERROR_NONE}}; 1389 const ExtFaults *fault = faults; 1390 1391 for (; fault->parse_text != NULL; fault++) { 1392 set_subtest("\"%s\"", fault->parse_text); 1393 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1394 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1395 XML_SetUserData(g_parser, (void *)fault); 1396 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1397 "Parser did not report external entity error"); 1398 XML_ParserReset(g_parser, NULL); 1399 } 1400 } 1401 END_TEST 1402 1403 /* Regression test for SF bug #483514. */ 1404 START_TEST(test_dtd_default_handling) { 1405 const char *text = "<!DOCTYPE doc [\n" 1406 "<!ENTITY e SYSTEM 'http://example.org/e'>\n" 1407 "<!NOTATION n SYSTEM 'http://example.org/n'>\n" 1408 "<!ELEMENT doc EMPTY>\n" 1409 "<!ATTLIST doc a CDATA #IMPLIED>\n" 1410 "<?pi in dtd?>\n" 1411 "<!--comment in dtd-->\n" 1412 "]><doc/>"; 1413 1414 XML_SetDefaultHandler(g_parser, accumulate_characters); 1415 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 1416 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 1417 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 1418 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); 1419 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 1420 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 1421 XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler); 1422 XML_SetCommentHandler(g_parser, dummy_comment_handler); 1423 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler); 1424 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler); 1425 run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>")); 1426 } 1427 END_TEST 1428 1429 /* Test handling of attribute declarations */ 1430 START_TEST(test_dtd_attr_handling) { 1431 const char *prolog = "<!DOCTYPE doc [\n" 1432 "<!ELEMENT doc EMPTY>\n"; 1433 AttTest attr_data[] 1434 = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n" 1435 "]>" 1436 "<doc a='two'/>", 1437 XCS("doc"), XCS("a"), 1438 XCS("(one|two|three)"), /* Extraneous spaces will be removed */ 1439 NULL, XML_TRUE}, 1440 {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n" 1441 "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n" 1442 "]>" 1443 "<doc/>", 1444 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE}, 1445 {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n" 1446 "]>" 1447 "<doc/>", 1448 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE}, 1449 {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n" 1450 "]>" 1451 "<doc/>", 1452 XCS("doc"), XCS("a"), XCS("CDATA"), 1453 #ifdef XML_UNICODE 1454 XCS("\x06f2"), 1455 #else 1456 XCS("\xdb\xb2"), 1457 #endif 1458 XML_FALSE}, 1459 {NULL, NULL, NULL, NULL, NULL, XML_FALSE}}; 1460 AttTest *test; 1461 1462 for (test = attr_data; test->definition != NULL; test++) { 1463 set_subtest("%s", test->definition); 1464 XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler); 1465 XML_SetUserData(g_parser, test); 1466 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog), 1467 XML_FALSE) 1468 == XML_STATUS_ERROR) 1469 xml_failure(g_parser); 1470 if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition, 1471 (int)strlen(test->definition), XML_TRUE) 1472 == XML_STATUS_ERROR) 1473 xml_failure(g_parser); 1474 XML_ParserReset(g_parser, NULL); 1475 } 1476 } 1477 END_TEST 1478 1479 /* See related SF bug #673791. 1480 When namespace processing is enabled, setting the namespace URI for 1481 a prefix is not allowed; this test ensures that it *is* allowed 1482 when namespace processing is not enabled. 1483 (See Namespaces in XML, section 2.) 1484 */ 1485 START_TEST(test_empty_ns_without_namespaces) { 1486 const char *text = "<doc xmlns:prefix='http://example.org/'>\n" 1487 " <e xmlns:prefix=''/>\n" 1488 "</doc>"; 1489 1490 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1491 == XML_STATUS_ERROR) 1492 xml_failure(g_parser); 1493 } 1494 END_TEST 1495 1496 /* Regression test for SF bug #824420. 1497 Checks that an xmlns:prefix attribute set in an attribute's default 1498 value isn't misinterpreted. 1499 */ 1500 START_TEST(test_ns_in_attribute_default_without_namespaces) { 1501 const char *text = "<!DOCTYPE e:element [\n" 1502 " <!ATTLIST e:element\n" 1503 " xmlns:e CDATA 'http://example.org/'>\n" 1504 " ]>\n" 1505 "<e:element/>"; 1506 1507 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1508 == XML_STATUS_ERROR) 1509 xml_failure(g_parser); 1510 } 1511 END_TEST 1512 1513 /* Regression test for SF bug #1515266: missing check of stopped 1514 parser in doContext() 'for' loop. */ 1515 START_TEST(test_stop_parser_between_char_data_calls) { 1516 /* The sample data must be big enough that there are two calls to 1517 the character data handler from within the inner "for" loop of 1518 the XML_TOK_DATA_CHARS case in doContent(), and the character 1519 handler must stop the parser and clear the character data 1520 handler. 1521 */ 1522 const char *text = long_character_data_text; 1523 1524 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1525 g_resumable = XML_FALSE; 1526 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1527 != XML_STATUS_ERROR) 1528 xml_failure(g_parser); 1529 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED) 1530 xml_failure(g_parser); 1531 } 1532 END_TEST 1533 1534 /* Regression test for SF bug #1515266: missing check of stopped 1535 parser in doContext() 'for' loop. */ 1536 START_TEST(test_suspend_parser_between_char_data_calls) { 1537 /* The sample data must be big enough that there are two calls to 1538 the character data handler from within the inner "for" loop of 1539 the XML_TOK_DATA_CHARS case in doContent(), and the character 1540 handler must stop the parser and clear the character data 1541 handler. 1542 */ 1543 const char *text = long_character_data_text; 1544 1545 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1546 g_resumable = XML_TRUE; 1547 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 1548 // we won't know exactly how much input we actually managed to give Expat. 1549 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 1550 != XML_STATUS_SUSPENDED) 1551 xml_failure(g_parser); 1552 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 1553 xml_failure(g_parser); 1554 /* Try parsing directly */ 1555 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1556 != XML_STATUS_ERROR) 1557 fail("Attempt to continue parse while suspended not faulted"); 1558 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) 1559 fail("Suspended parse not faulted with correct error"); 1560 } 1561 END_TEST 1562 1563 /* Test repeated calls to XML_StopParser are handled correctly */ 1564 START_TEST(test_repeated_stop_parser_between_char_data_calls) { 1565 const char *text = long_character_data_text; 1566 1567 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1568 g_resumable = XML_FALSE; 1569 g_abortable = XML_FALSE; 1570 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1571 != XML_STATUS_ERROR) 1572 fail("Failed to double-stop parser"); 1573 1574 XML_ParserReset(g_parser, NULL); 1575 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1576 g_resumable = XML_TRUE; 1577 g_abortable = XML_FALSE; 1578 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 1579 // we won't know exactly how much input we actually managed to give Expat. 1580 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 1581 != XML_STATUS_SUSPENDED) 1582 fail("Failed to double-suspend parser"); 1583 1584 XML_ParserReset(g_parser, NULL); 1585 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1586 g_resumable = XML_TRUE; 1587 g_abortable = XML_TRUE; 1588 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1589 != XML_STATUS_ERROR) 1590 fail("Failed to suspend-abort parser"); 1591 } 1592 END_TEST 1593 1594 START_TEST(test_good_cdata_ascii) { 1595 const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>"; 1596 const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>"); 1597 1598 CharData storage; 1599 CharData_Init(&storage); 1600 XML_SetUserData(g_parser, &storage); 1601 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1602 /* Add start and end handlers for coverage */ 1603 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler); 1604 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler); 1605 1606 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1607 == XML_STATUS_ERROR) 1608 xml_failure(g_parser); 1609 CharData_CheckXMLChars(&storage, expected); 1610 1611 /* Try again, this time with a default handler */ 1612 XML_ParserReset(g_parser, NULL); 1613 CharData_Init(&storage); 1614 XML_SetUserData(g_parser, &storage); 1615 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1616 XML_SetDefaultHandler(g_parser, dummy_default_handler); 1617 1618 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1619 == XML_STATUS_ERROR) 1620 xml_failure(g_parser); 1621 CharData_CheckXMLChars(&storage, expected); 1622 } 1623 END_TEST 1624 1625 START_TEST(test_good_cdata_utf16) { 1626 /* Test data is: 1627 * <?xml version='1.0' encoding='utf-16'?> 1628 * <a><![CDATA[hello]]></a> 1629 */ 1630 const char text[] 1631 = "\0<\0?\0x\0m\0l\0" 1632 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1633 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1634 "1\0" 1635 "6\0'" 1636 "\0?\0>\0\n" 1637 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>"; 1638 const XML_Char *expected = XCS("hello"); 1639 1640 CharData storage; 1641 CharData_Init(&storage); 1642 XML_SetUserData(g_parser, &storage); 1643 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1644 1645 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1646 == XML_STATUS_ERROR) 1647 xml_failure(g_parser); 1648 CharData_CheckXMLChars(&storage, expected); 1649 } 1650 END_TEST 1651 1652 START_TEST(test_good_cdata_utf16_le) { 1653 /* Test data is: 1654 * <?xml version='1.0' encoding='utf-16'?> 1655 * <a><![CDATA[hello]]></a> 1656 */ 1657 const char text[] 1658 = "<\0?\0x\0m\0l\0" 1659 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1660 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1661 "1\0" 1662 "6\0'" 1663 "\0?\0>\0\n" 1664 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0"; 1665 const XML_Char *expected = XCS("hello"); 1666 1667 CharData storage; 1668 CharData_Init(&storage); 1669 XML_SetUserData(g_parser, &storage); 1670 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1671 1672 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1673 == XML_STATUS_ERROR) 1674 xml_failure(g_parser); 1675 CharData_CheckXMLChars(&storage, expected); 1676 } 1677 END_TEST 1678 1679 /* Test UTF16 conversion of a long cdata string */ 1680 1681 /* 16 characters: handy macro to reduce visual clutter */ 1682 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P" 1683 1684 START_TEST(test_long_cdata_utf16) { 1685 /* Test data is: 1686 * <?xlm version='1.0' encoding='utf-16'?> 1687 * <a><![CDATA[ 1688 * ABCDEFGHIJKLMNOP 1689 * ]]></a> 1690 */ 1691 const char text[] 1692 = "\0<\0?\0x\0m\0l\0 " 1693 "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 " 1694 "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>" 1695 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1696 /* 64 characters per line */ 1697 /* clang-format off */ 1698 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1699 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1700 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1701 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1702 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1703 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1704 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1705 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1706 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1707 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1708 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1709 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1710 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1711 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1712 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1713 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1714 A_TO_P_IN_UTF16 1715 /* clang-format on */ 1716 "\0]\0]\0>\0<\0/\0a\0>"; 1717 const XML_Char *expected = 1718 /* clang-format off */ 1719 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1720 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1721 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1722 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1723 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1724 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1725 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1726 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1727 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1728 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1729 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1730 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1731 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1732 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1733 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1734 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1735 XCS("ABCDEFGHIJKLMNOP"); 1736 /* clang-format on */ 1737 CharData storage; 1738 void *buffer; 1739 1740 CharData_Init(&storage); 1741 XML_SetUserData(g_parser, &storage); 1742 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1743 buffer = XML_GetBuffer(g_parser, sizeof(text) - 1); 1744 if (buffer == NULL) 1745 fail("Could not allocate parse buffer"); 1746 assert(buffer != NULL); 1747 memcpy(buffer, text, sizeof(text) - 1); 1748 if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) 1749 xml_failure(g_parser); 1750 CharData_CheckXMLChars(&storage, expected); 1751 } 1752 END_TEST 1753 1754 /* Test handling of multiple unit UTF-16 characters */ 1755 START_TEST(test_multichar_cdata_utf16) { 1756 /* Test data is: 1757 * <?xml version='1.0' encoding='utf-16'?> 1758 * <a><![CDATA[{MINIM}{CROTCHET}]]></a> 1759 * 1760 * where {MINIM} is U+1d15e (a minim or half-note) 1761 * UTF-16: 0xd834 0xdd5e 1762 * UTF-8: 0xf0 0x9d 0x85 0x9e 1763 * and {CROTCHET} is U+1d15f (a crotchet or quarter-note) 1764 * UTF-16: 0xd834 0xdd5f 1765 * UTF-8: 0xf0 0x9d 0x85 0x9f 1766 */ 1767 const char text[] = "\0<\0?\0x\0m\0l\0" 1768 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1769 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1770 "1\0" 1771 "6\0'" 1772 "\0?\0>\0\n" 1773 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1774 "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f" 1775 "\0]\0]\0>\0<\0/\0a\0>"; 1776 #ifdef XML_UNICODE 1777 const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f"); 1778 #else 1779 const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f"); 1780 #endif 1781 CharData storage; 1782 1783 CharData_Init(&storage); 1784 XML_SetUserData(g_parser, &storage); 1785 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1786 1787 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1788 == XML_STATUS_ERROR) 1789 xml_failure(g_parser); 1790 CharData_CheckXMLChars(&storage, expected); 1791 } 1792 END_TEST 1793 1794 /* Test that an element name with a UTF-16 surrogate pair is rejected */ 1795 START_TEST(test_utf16_bad_surrogate_pair) { 1796 /* Test data is: 1797 * <?xml version='1.0' encoding='utf-16'?> 1798 * <a><![CDATA[{BADLINB}]]></a> 1799 * 1800 * where {BADLINB} is U+10000 (the first Linear B character) 1801 * with the UTF-16 surrogate pair in the wrong order, i.e. 1802 * 0xdc00 0xd800 1803 */ 1804 const char text[] = "\0<\0?\0x\0m\0l\0" 1805 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1806 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1807 "1\0" 1808 "6\0'" 1809 "\0?\0>\0\n" 1810 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1811 "\xdc\x00\xd8\x00" 1812 "\0]\0]\0>\0<\0/\0a\0>"; 1813 1814 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1815 != XML_STATUS_ERROR) 1816 fail("Reversed UTF-16 surrogate pair not faulted"); 1817 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 1818 xml_failure(g_parser); 1819 } 1820 END_TEST 1821 1822 START_TEST(test_bad_cdata) { 1823 struct CaseData { 1824 const char *text; 1825 enum XML_Error expectedError; 1826 }; 1827 1828 struct CaseData cases[] 1829 = {{"<a><", XML_ERROR_UNCLOSED_TOKEN}, 1830 {"<a><!", XML_ERROR_UNCLOSED_TOKEN}, 1831 {"<a><![", XML_ERROR_UNCLOSED_TOKEN}, 1832 {"<a><![C", XML_ERROR_UNCLOSED_TOKEN}, 1833 {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN}, 1834 {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN}, 1835 {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN}, 1836 {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN}, 1837 1838 {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1839 {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1840 {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1841 1842 {"<a><!<a/>", XML_ERROR_INVALID_TOKEN}, 1843 {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ 1844 {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ 1845 {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN}, 1846 {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN}, 1847 {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN}, 1848 {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN}, 1849 1850 {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1851 {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1852 {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}}; 1853 1854 size_t i = 0; 1855 for (; i < sizeof(cases) / sizeof(struct CaseData); i++) { 1856 set_subtest("%s", cases[i].text); 1857 const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES( 1858 g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE); 1859 const enum XML_Error actualError = XML_GetErrorCode(g_parser); 1860 1861 assert(actualStatus == XML_STATUS_ERROR); 1862 1863 if (actualError != cases[i].expectedError) { 1864 char message[100]; 1865 snprintf(message, sizeof(message), 1866 "Expected error %d but got error %d for case %u: \"%s\"\n", 1867 cases[i].expectedError, actualError, (unsigned int)i + 1, 1868 cases[i].text); 1869 fail(message); 1870 } 1871 1872 XML_ParserReset(g_parser, NULL); 1873 } 1874 } 1875 END_TEST 1876 1877 /* Test failures in UTF-16 CDATA */ 1878 START_TEST(test_bad_cdata_utf16) { 1879 struct CaseData { 1880 size_t text_bytes; 1881 const char *text; 1882 enum XML_Error expected_error; 1883 }; 1884 1885 const char prolog[] = "\0<\0?\0x\0m\0l\0" 1886 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1887 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1888 "1\0" 1889 "6\0'" 1890 "\0?\0>\0\n" 1891 "\0<\0a\0>"; 1892 struct CaseData cases[] = { 1893 {1, "\0", XML_ERROR_UNCLOSED_TOKEN}, 1894 {2, "\0<", XML_ERROR_UNCLOSED_TOKEN}, 1895 {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN}, 1896 {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN}, 1897 {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN}, 1898 {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN}, 1899 {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN}, 1900 {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN}, 1901 {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN}, 1902 {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN}, 1903 {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN}, 1904 {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN}, 1905 {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN}, 1906 {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN}, 1907 {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN}, 1908 {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN}, 1909 {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN}, 1910 {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1911 {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1912 {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1913 /* Now add a four-byte UTF-16 character */ 1914 {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8", 1915 XML_ERROR_UNCLOSED_CDATA_SECTION}, 1916 {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR}, 1917 {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd", 1918 XML_ERROR_PARTIAL_CHAR}, 1919 {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e", 1920 XML_ERROR_UNCLOSED_CDATA_SECTION}}; 1921 size_t i; 1922 1923 for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) { 1924 set_subtest("case %lu", (long unsigned)(i + 1)); 1925 enum XML_Status actual_status; 1926 enum XML_Error actual_error; 1927 1928 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1, 1929 XML_FALSE) 1930 == XML_STATUS_ERROR) 1931 xml_failure(g_parser); 1932 actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text, 1933 (int)cases[i].text_bytes, XML_TRUE); 1934 assert(actual_status == XML_STATUS_ERROR); 1935 actual_error = XML_GetErrorCode(g_parser); 1936 if (actual_error != cases[i].expected_error) { 1937 char message[1024]; 1938 1939 snprintf(message, sizeof(message), 1940 "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR 1941 ") for case %lu\n", 1942 cases[i].expected_error, 1943 XML_ErrorString(cases[i].expected_error), actual_error, 1944 XML_ErrorString(actual_error), (long unsigned)(i + 1)); 1945 fail(message); 1946 } 1947 XML_ParserReset(g_parser, NULL); 1948 } 1949 } 1950 END_TEST 1951 1952 /* Test stopping the parser in cdata handler */ 1953 START_TEST(test_stop_parser_between_cdata_calls) { 1954 const char *text = long_cdata_text; 1955 1956 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1957 g_resumable = XML_FALSE; 1958 expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler"); 1959 } 1960 END_TEST 1961 1962 /* Test suspending the parser in cdata handler */ 1963 START_TEST(test_suspend_parser_between_cdata_calls) { 1964 if (g_chunkSize != 0) { 1965 // this test does not use SINGLE_BYTES, because of suspension 1966 return; 1967 } 1968 1969 const char *text = long_cdata_text; 1970 enum XML_Status result; 1971 1972 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1973 g_resumable = XML_TRUE; 1974 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 1975 // we won't know exactly how much input we actually managed to give Expat. 1976 result = XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE); 1977 if (result != XML_STATUS_SUSPENDED) { 1978 if (result == XML_STATUS_ERROR) 1979 xml_failure(g_parser); 1980 fail("Parse not suspended in CDATA handler"); 1981 } 1982 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 1983 xml_failure(g_parser); 1984 } 1985 END_TEST 1986 1987 /* Test memory allocation functions */ 1988 START_TEST(test_memory_allocation) { 1989 char *buffer = (char *)XML_MemMalloc(g_parser, 256); 1990 char *p; 1991 1992 if (buffer == NULL) { 1993 fail("Allocation failed"); 1994 } else { 1995 /* Try writing to memory; some OSes try to cheat! */ 1996 buffer[0] = 'T'; 1997 buffer[1] = 'E'; 1998 buffer[2] = 'S'; 1999 buffer[3] = 'T'; 2000 buffer[4] = '\0'; 2001 if (strcmp(buffer, "TEST") != 0) { 2002 fail("Memory not writable"); 2003 } else { 2004 p = (char *)XML_MemRealloc(g_parser, buffer, 512); 2005 if (p == NULL) { 2006 fail("Reallocation failed"); 2007 } else { 2008 /* Write again, just to be sure */ 2009 buffer = p; 2010 buffer[0] = 'V'; 2011 if (strcmp(buffer, "VEST") != 0) { 2012 fail("Reallocated memory not writable"); 2013 } 2014 } 2015 } 2016 XML_MemFree(g_parser, buffer); 2017 } 2018 } 2019 END_TEST 2020 2021 /* Test XML_DefaultCurrent() passes handling on correctly */ 2022 START_TEST(test_default_current) { 2023 const char *text = "<doc>hell]</doc>"; 2024 const char *entity_text = "<!DOCTYPE doc [\n" 2025 "<!ENTITY entity '%'>\n" 2026 "]>\n" 2027 "<doc>&entity;</doc>"; 2028 2029 set_subtest("with defaulting"); 2030 { 2031 struct handler_record_list storage; 2032 storage.count = 0; 2033 XML_SetDefaultHandler(g_parser, record_default_handler); 2034 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2035 XML_SetUserData(g_parser, &storage); 2036 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2037 == XML_STATUS_ERROR) 2038 xml_failure(g_parser); 2039 int i = 0; 2040 assert_record_handler_called(&storage, i++, "record_default_handler", 5); 2041 // we should have gotten one or more cdata callbacks, totaling 5 chars 2042 int cdata_len_remaining = 5; 2043 while (cdata_len_remaining > 0) { 2044 const struct handler_record_entry *c_entry 2045 = handler_record_get(&storage, i++); 2046 assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0); 2047 assert_true(c_entry->arg > 0); 2048 assert_true(c_entry->arg <= cdata_len_remaining); 2049 cdata_len_remaining -= c_entry->arg; 2050 // default handler must follow, with the exact same len argument. 2051 assert_record_handler_called(&storage, i++, "record_default_handler", 2052 c_entry->arg); 2053 } 2054 assert_record_handler_called(&storage, i++, "record_default_handler", 6); 2055 assert_true(storage.count == i); 2056 } 2057 2058 /* Again, without the defaulting */ 2059 set_subtest("no defaulting"); 2060 { 2061 struct handler_record_list storage; 2062 storage.count = 0; 2063 XML_ParserReset(g_parser, NULL); 2064 XML_SetDefaultHandler(g_parser, record_default_handler); 2065 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); 2066 XML_SetUserData(g_parser, &storage); 2067 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2068 == XML_STATUS_ERROR) 2069 xml_failure(g_parser); 2070 int i = 0; 2071 assert_record_handler_called(&storage, i++, "record_default_handler", 5); 2072 // we should have gotten one or more cdata callbacks, totaling 5 chars 2073 int cdata_len_remaining = 5; 2074 while (cdata_len_remaining > 0) { 2075 const struct handler_record_entry *c_entry 2076 = handler_record_get(&storage, i++); 2077 assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0); 2078 assert_true(c_entry->arg > 0); 2079 assert_true(c_entry->arg <= cdata_len_remaining); 2080 cdata_len_remaining -= c_entry->arg; 2081 } 2082 assert_record_handler_called(&storage, i++, "record_default_handler", 6); 2083 assert_true(storage.count == i); 2084 } 2085 2086 /* Now with an internal entity to complicate matters */ 2087 set_subtest("with internal entity"); 2088 { 2089 struct handler_record_list storage; 2090 storage.count = 0; 2091 XML_ParserReset(g_parser, NULL); 2092 XML_SetDefaultHandler(g_parser, record_default_handler); 2093 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2094 XML_SetUserData(g_parser, &storage); 2095 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2096 XML_TRUE) 2097 == XML_STATUS_ERROR) 2098 xml_failure(g_parser); 2099 /* The default handler suppresses the entity */ 2100 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2101 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2102 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2103 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2104 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2105 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2106 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2107 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2108 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2109 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2110 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2111 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2112 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2113 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2114 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2115 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2116 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2117 assert_record_handler_called(&storage, 17, "record_default_handler", 8); 2118 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2119 assert_true(storage.count == 19); 2120 } 2121 2122 /* Again, with a skip handler */ 2123 set_subtest("with skip handler"); 2124 { 2125 struct handler_record_list storage; 2126 storage.count = 0; 2127 XML_ParserReset(g_parser, NULL); 2128 XML_SetDefaultHandler(g_parser, record_default_handler); 2129 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2130 XML_SetSkippedEntityHandler(g_parser, record_skip_handler); 2131 XML_SetUserData(g_parser, &storage); 2132 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2133 XML_TRUE) 2134 == XML_STATUS_ERROR) 2135 xml_failure(g_parser); 2136 /* The default handler suppresses the entity */ 2137 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2138 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2139 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2140 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2141 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2142 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2143 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2144 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2145 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2146 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2147 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2148 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2149 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2150 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2151 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2152 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2153 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2154 assert_record_handler_called(&storage, 17, "record_skip_handler", 0); 2155 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2156 assert_true(storage.count == 19); 2157 } 2158 2159 /* This time, allow the entity through */ 2160 set_subtest("allow entity"); 2161 { 2162 struct handler_record_list storage; 2163 storage.count = 0; 2164 XML_ParserReset(g_parser, NULL); 2165 XML_SetDefaultHandlerExpand(g_parser, record_default_handler); 2166 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2167 XML_SetUserData(g_parser, &storage); 2168 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2169 XML_TRUE) 2170 == XML_STATUS_ERROR) 2171 xml_failure(g_parser); 2172 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2173 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2174 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2175 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2176 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2177 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2178 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2179 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2180 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2181 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2182 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2183 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2184 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2185 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2186 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2187 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2188 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2189 assert_record_handler_called(&storage, 17, "record_cdata_handler", 1); 2190 assert_record_handler_called(&storage, 18, "record_default_handler", 1); 2191 assert_record_handler_called(&storage, 19, "record_default_handler", 6); 2192 assert_true(storage.count == 20); 2193 } 2194 2195 /* Finally, without passing the cdata to the default handler */ 2196 set_subtest("not passing cdata"); 2197 { 2198 struct handler_record_list storage; 2199 storage.count = 0; 2200 XML_ParserReset(g_parser, NULL); 2201 XML_SetDefaultHandlerExpand(g_parser, record_default_handler); 2202 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); 2203 XML_SetUserData(g_parser, &storage); 2204 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2205 XML_TRUE) 2206 == XML_STATUS_ERROR) 2207 xml_failure(g_parser); 2208 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2209 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2210 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2211 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2212 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2213 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2214 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2215 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2216 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2217 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2218 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2219 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2220 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2221 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2222 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2223 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2224 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2225 assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler", 2226 1); 2227 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2228 assert_true(storage.count == 19); 2229 } 2230 } 2231 END_TEST 2232 2233 /* Test DTD element parsing code paths */ 2234 START_TEST(test_dtd_elements) { 2235 const char *text = "<!DOCTYPE doc [\n" 2236 "<!ELEMENT doc (chapter)>\n" 2237 "<!ELEMENT chapter (#PCDATA)>\n" 2238 "]>\n" 2239 "<doc><chapter>Wombats are go</chapter></doc>"; 2240 2241 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 2242 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2243 == XML_STATUS_ERROR) 2244 xml_failure(g_parser); 2245 } 2246 END_TEST 2247 2248 static void XMLCALL 2249 element_decl_check_model(void *userData, const XML_Char *name, 2250 XML_Content *model) { 2251 UNUSED_P(userData); 2252 uint32_t errorFlags = 0; 2253 2254 /* Expected model array structure is this: 2255 * [0] (type 6, quant 0) 2256 * [1] (type 5, quant 0) 2257 * [3] (type 4, quant 0, name "bar") 2258 * [4] (type 4, quant 0, name "foo") 2259 * [5] (type 4, quant 3, name "xyz") 2260 * [2] (type 4, quant 2, name "zebra") 2261 */ 2262 errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0)); 2263 errorFlags |= ((model != NULL) ? 0 : (1u << 1)); 2264 2265 if (model != NULL) { 2266 errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2)); 2267 errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3)); 2268 errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4)); 2269 errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5)); 2270 errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6)); 2271 2272 errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7)); 2273 errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8)); 2274 errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9)); 2275 errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10)); 2276 errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11)); 2277 2278 errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12)); 2279 errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13)); 2280 errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14)); 2281 errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15)); 2282 errorFlags 2283 |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16)); 2284 2285 errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17)); 2286 errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18)); 2287 errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19)); 2288 errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20)); 2289 errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21)); 2290 2291 errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22)); 2292 errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23)); 2293 errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24)); 2294 errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25)); 2295 errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26)); 2296 2297 errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27)); 2298 errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28)); 2299 errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29)); 2300 errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30)); 2301 errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31)); 2302 } 2303 2304 XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags); 2305 XML_FreeContentModel(g_parser, model); 2306 } 2307 2308 START_TEST(test_dtd_elements_nesting) { 2309 // Payload inspired by a test in Perl's XML::Parser 2310 const char *text = "<!DOCTYPE foo [\n" 2311 "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n" 2312 "]>\n" 2313 "<foo/>"; 2314 2315 XML_SetUserData(g_parser, (void *)(uintptr_t)-1); 2316 2317 XML_SetElementDeclHandler(g_parser, element_decl_check_model); 2318 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2319 == XML_STATUS_ERROR) 2320 xml_failure(g_parser); 2321 2322 if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0) 2323 fail("Element declaration model regression detected"); 2324 } 2325 END_TEST 2326 2327 /* Test foreign DTD handling */ 2328 START_TEST(test_set_foreign_dtd) { 2329 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"; 2330 const char *text2 = "<doc>&entity;</doc>"; 2331 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2332 2333 /* Check hash salt is passed through too */ 2334 XML_SetHashSalt(g_parser, 0x12345678); 2335 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2336 XML_SetUserData(g_parser, &test_data); 2337 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2338 /* Add a default handler to exercise more code paths */ 2339 XML_SetDefaultHandler(g_parser, dummy_default_handler); 2340 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2341 fail("Could not set foreign DTD"); 2342 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2343 == XML_STATUS_ERROR) 2344 xml_failure(g_parser); 2345 2346 /* Ensure that trying to set the DTD after parsing has started 2347 * is faulted, even if it's the same setting. 2348 */ 2349 if (XML_UseForeignDTD(g_parser, XML_TRUE) 2350 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) 2351 fail("Failed to reject late foreign DTD setting"); 2352 /* Ditto for the hash salt */ 2353 if (XML_SetHashSalt(g_parser, 0x23456789)) 2354 fail("Failed to reject late hash salt change"); 2355 2356 /* Now finish the parse */ 2357 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2358 == XML_STATUS_ERROR) 2359 xml_failure(g_parser); 2360 } 2361 END_TEST 2362 2363 /* Test foreign DTD handling with a failing NotStandalone handler */ 2364 START_TEST(test_foreign_dtd_not_standalone) { 2365 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2366 "<doc>&entity;</doc>"; 2367 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2368 2369 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2370 XML_SetUserData(g_parser, &test_data); 2371 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2372 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 2373 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2374 fail("Could not set foreign DTD"); 2375 expect_failure(text, XML_ERROR_NOT_STANDALONE, 2376 "NotStandalonehandler failed to reject"); 2377 } 2378 END_TEST 2379 2380 /* Test invalid character in a foreign DTD is faulted */ 2381 START_TEST(test_invalid_foreign_dtd) { 2382 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2383 "<doc>&entity;</doc>"; 2384 ExtFaults test_data 2385 = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN}; 2386 2387 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2388 XML_SetUserData(g_parser, &test_data); 2389 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 2390 XML_UseForeignDTD(g_parser, XML_TRUE); 2391 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 2392 "Bad DTD should not have been accepted"); 2393 } 2394 END_TEST 2395 2396 /* Test foreign DTD use with a doctype */ 2397 START_TEST(test_foreign_dtd_with_doctype) { 2398 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n" 2399 "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n"; 2400 const char *text2 = "<doc>&entity;</doc>"; 2401 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2402 2403 /* Check hash salt is passed through too */ 2404 XML_SetHashSalt(g_parser, 0x12345678); 2405 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2406 XML_SetUserData(g_parser, &test_data); 2407 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2408 /* Add a default handler to exercise more code paths */ 2409 XML_SetDefaultHandler(g_parser, dummy_default_handler); 2410 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2411 fail("Could not set foreign DTD"); 2412 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2413 == XML_STATUS_ERROR) 2414 xml_failure(g_parser); 2415 2416 /* Ensure that trying to set the DTD after parsing has started 2417 * is faulted, even if it's the same setting. 2418 */ 2419 if (XML_UseForeignDTD(g_parser, XML_TRUE) 2420 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) 2421 fail("Failed to reject late foreign DTD setting"); 2422 /* Ditto for the hash salt */ 2423 if (XML_SetHashSalt(g_parser, 0x23456789)) 2424 fail("Failed to reject late hash salt change"); 2425 2426 /* Now finish the parse */ 2427 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2428 == XML_STATUS_ERROR) 2429 xml_failure(g_parser); 2430 } 2431 END_TEST 2432 2433 /* Test XML_UseForeignDTD with no external subset present */ 2434 START_TEST(test_foreign_dtd_without_external_subset) { 2435 const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n" 2436 "<doc>&foo;</doc>"; 2437 2438 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2439 XML_SetUserData(g_parser, NULL); 2440 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); 2441 XML_UseForeignDTD(g_parser, XML_TRUE); 2442 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2443 == XML_STATUS_ERROR) 2444 xml_failure(g_parser); 2445 } 2446 END_TEST 2447 2448 START_TEST(test_empty_foreign_dtd) { 2449 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2450 "<doc>&entity;</doc>"; 2451 2452 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2453 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); 2454 XML_UseForeignDTD(g_parser, XML_TRUE); 2455 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 2456 "Undefined entity not faulted"); 2457 } 2458 END_TEST 2459 2460 /* Test XML Base is set and unset appropriately */ 2461 START_TEST(test_set_base) { 2462 const XML_Char *old_base; 2463 const XML_Char *new_base = XCS("/local/file/name.xml"); 2464 2465 old_base = XML_GetBase(g_parser); 2466 if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK) 2467 fail("Unable to set base"); 2468 if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0) 2469 fail("Base setting not correct"); 2470 if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK) 2471 fail("Unable to NULL base"); 2472 if (XML_GetBase(g_parser) != NULL) 2473 fail("Base setting not nulled"); 2474 XML_SetBase(g_parser, old_base); 2475 } 2476 END_TEST 2477 2478 /* Test attribute counts, indexing, etc */ 2479 START_TEST(test_attributes) { 2480 const char *text = "<!DOCTYPE doc [\n" 2481 "<!ELEMENT doc (tag)>\n" 2482 "<!ATTLIST doc id ID #REQUIRED>\n" 2483 "]>" 2484 "<doc a='1' id='one' b='2'>" 2485 "<tag c='3'/>" 2486 "</doc>"; 2487 AttrInfo doc_info[] = {{XCS("a"), XCS("1")}, 2488 {XCS("b"), XCS("2")}, 2489 {XCS("id"), XCS("one")}, 2490 {NULL, NULL}}; 2491 AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}}; 2492 ElementInfo info[] = {{XCS("doc"), 3, 0, XCS("id"), doc_info}, 2493 {XCS("tag"), 1, 0, NULL, tag_info}, 2494 {NULL, 0, 0, NULL, NULL}}; 2495 2496 XML_Parser parser = XML_ParserCreate(NULL); 2497 assert_true(parser != NULL); 2498 ParserAndElementInfo parserAndElementInfos = { 2499 parser, 2500 info, 2501 }; 2502 2503 XML_SetStartElementHandler(parser, counting_start_element_handler); 2504 XML_SetUserData(parser, &parserAndElementInfos); 2505 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2506 == XML_STATUS_ERROR) 2507 xml_failure(parser); 2508 2509 XML_ParserFree(parser); 2510 } 2511 END_TEST 2512 2513 START_TEST(test_duplicate_cdata_attribute) { 2514 /* 2515 https://www.w3.org/TR/xml/#attdecls 2516 2517 Test the following statement from the linked specification: 2518 When more than one definition is provided for the same attribute of a given 2519 element type, the first declaration is binding and later declarations are 2520 ignored. 2521 */ 2522 2523 const char *text 2524 = "<!DOCTYPE doc [\n" 2525 " <!ATTLIST doc attribute CDATA 'expected' attribute CDATA 'ignored'>\n" 2526 "]>\n" 2527 "<doc/>\n"; 2528 AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected")}, {NULL, NULL}}; 2529 ElementInfo info[] 2530 = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}}; 2531 2532 XML_Parser parser = XML_ParserCreate(NULL); 2533 assert_true(parser != NULL); 2534 2535 ParserAndElementInfo parserAndElementInfos = { 2536 parser, 2537 info, 2538 }; 2539 2540 XML_SetStartElementHandler(parser, counting_start_element_handler); 2541 XML_SetUserData(parser, &parserAndElementInfos); 2542 2543 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2544 != XML_STATUS_OK) 2545 xml_failure(parser); 2546 2547 XML_ParserFree(parser); 2548 } 2549 END_TEST 2550 2551 START_TEST(test_duplicate_id_attribute_1) { 2552 /* 2553 https://www.w3.org/TR/xml/#attdecls 2554 2555 Test the following statement from the linked specification: 2556 When more than one definition is provided for the same attribute of a given 2557 element type, the first declaration is binding and later declarations are 2558 ignored. 2559 */ 2560 2561 const char *text 2562 = "<!DOCTYPE doc [\n" 2563 " <!ATTLIST doc identifier CDATA 'expected' identifier ID #REQUIRED>\n" 2564 "]>\n" 2565 "<doc/>\n"; 2566 AttrInfo doc_info[] = {{XCS("identifier"), XCS("expected")}, {NULL, NULL}}; 2567 ElementInfo info[] 2568 = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}}; 2569 2570 XML_Parser parser = XML_ParserCreate(NULL); 2571 assert_true(parser != NULL); 2572 2573 ParserAndElementInfo parserAndElementInfos = { 2574 parser, 2575 info, 2576 }; 2577 2578 XML_SetStartElementHandler(parser, counting_start_element_handler); 2579 XML_SetUserData(parser, &parserAndElementInfos); 2580 2581 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2582 != XML_STATUS_OK) 2583 xml_failure(parser); 2584 2585 XML_ParserFree(parser); 2586 } 2587 END_TEST 2588 2589 START_TEST(test_duplicate_id_attribute_2) { 2590 /* 2591 https://www.w3.org/TR/xml/#attdecls 2592 2593 Test the following statement from the linked specification: 2594 When more than one definition is provided for the same attribute of a given 2595 element type, the first declaration is binding and later declarations are 2596 ignored. 2597 */ 2598 2599 const char *text 2600 = "<!DOCTYPE doc [\n" 2601 " <!ATTLIST doc identifier ID #REQUIRED identifier CDATA 'unexpected'>\n" 2602 "]>\n" 2603 "<doc/>\n"; 2604 AttrInfo doc_info[] = {{NULL, NULL}}; 2605 2606 ElementInfo info[] 2607 = {{XCS("doc"), 0, 0, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}}; 2608 2609 XML_Parser parser = XML_ParserCreate(NULL); 2610 assert_true(parser != NULL); 2611 2612 ParserAndElementInfo parserAndElementInfos = { 2613 parser, 2614 info, 2615 }; 2616 2617 XML_SetStartElementHandler(parser, counting_start_element_handler); 2618 XML_SetUserData(parser, &parserAndElementInfos); 2619 2620 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2621 != XML_STATUS_OK) 2622 xml_failure(parser); 2623 2624 XML_ParserFree(parser); 2625 } 2626 END_TEST 2627 2628 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl) { 2629 /* 2630 https://www.w3.org/TR/xml/#attdecls 2631 2632 Test the following statement from the linked specification: 2633 When more than one AttlistDecl is provided for a given element type, 2634 the contents of all those provided are merged. 2635 */ 2636 const char *text = "<!DOCTYPE doc [\n" 2637 " <!ATTLIST doc attribute CDATA 'expected'>\n" 2638 " <!ATTLIST doc attribute CDATA 'ignored'>\n" 2639 "]>\n" 2640 "<doc/>\n"; 2641 AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected")}, {NULL, NULL}}; 2642 ElementInfo info[] 2643 = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}}; 2644 2645 XML_Parser parser = XML_ParserCreate(NULL); 2646 assert_true(parser != NULL); 2647 2648 ParserAndElementInfo parserAndElementInfos = { 2649 parser, 2650 info, 2651 }; 2652 2653 XML_SetStartElementHandler(parser, counting_start_element_handler); 2654 XML_SetUserData(parser, &parserAndElementInfos); 2655 2656 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2657 != XML_STATUS_OK) 2658 xml_failure(parser); 2659 2660 XML_ParserFree(parser); 2661 } 2662 END_TEST 2663 2664 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_2) { 2665 /* 2666 https://www.w3.org/TR/xml/#attdecls 2667 2668 Test the following statement from the linked specification: 2669 When more than one AttlistDecl is provided for a given element type, 2670 the contents of all those provided are merged. 2671 */ 2672 const char *text = "<!DOCTYPE doc [\n" 2673 " <!ATTLIST doc attribute CDATA 'expected_doc'>\n" 2674 " <!ATTLIST tag attribute CDATA 'expected_tag'>\n" 2675 " <!ATTLIST doc attribute CDATA 'ignored_doc'>\n" 2676 "]>\n" 2677 "<doc><tag></tag></doc>\n"; 2678 AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected_doc")}, {NULL, NULL}}; 2679 AttrInfo tag_info[] = {{XCS("attribute"), XCS("expected_tag")}, {NULL, NULL}}; 2680 ElementInfo info[] = {{XCS("doc"), 0, 1, NULL, doc_info}, 2681 {XCS("tag"), 0, 1, NULL, tag_info}, 2682 {NULL, 0, 0, NULL, NULL}}; 2683 2684 XML_Parser parser = XML_ParserCreate(NULL); 2685 assert_true(parser != NULL); 2686 2687 ParserAndElementInfo parserAndElementInfos = { 2688 parser, 2689 info, 2690 }; 2691 2692 XML_SetStartElementHandler(parser, counting_start_element_handler); 2693 XML_SetUserData(parser, &parserAndElementInfos); 2694 2695 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2696 != XML_STATUS_OK) 2697 xml_failure(parser); 2698 2699 XML_ParserFree(parser); 2700 } 2701 END_TEST 2702 2703 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_3) { 2704 /* 2705 https://www.w3.org/TR/xml/#attdecls 2706 2707 Test the following statement from the linked specification: 2708 When more than one AttlistDecl is provided for a given element type, 2709 the contents of all those provided are merged. 2710 */ 2711 const char *text 2712 = "<!DOCTYPE doc [\n" 2713 " <!ATTLIST doc attribute CDATA 'expected_doc'>\n" 2714 " <!ATTLIST tag attribute CDATA 'expected_tag'>\n" 2715 " <!ATTLIST doc second_attribute CDATA 'second_expected_doc' attribute CDATA 'ignored_doc'>\n" 2716 "]>\n" 2717 "<doc><tag></tag></doc>\n"; 2718 AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected_doc")}, 2719 {XCS("second_attribute"), XCS("second_expected_doc")}, 2720 {NULL, NULL}}; 2721 AttrInfo tag_info[] = {{XCS("attribute"), XCS("expected_tag")}, {NULL, NULL}}; 2722 ElementInfo info[] = {{XCS("doc"), 0, 2, NULL, doc_info}, 2723 {XCS("tag"), 0, 1, NULL, tag_info}, 2724 {NULL, 0, 0, NULL, NULL}}; 2725 2726 XML_Parser parser = XML_ParserCreate(NULL); 2727 assert_true(parser != NULL); 2728 2729 ParserAndElementInfo parserAndElementInfos = { 2730 parser, 2731 info, 2732 }; 2733 2734 XML_SetStartElementHandler(parser, counting_start_element_handler); 2735 XML_SetUserData(parser, &parserAndElementInfos); 2736 2737 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2738 != XML_STATUS_OK) 2739 xml_failure(parser); 2740 2741 XML_ParserFree(parser); 2742 } 2743 END_TEST 2744 2745 START_TEST(test_duplicate_id_attribute_multiple_attlistdecl) { 2746 /* 2747 https://www.w3.org/TR/xml/#attdecls 2748 2749 Test the following statement from the linked specification: 2750 When more than one AttlistDecl is provided for a given element type, 2751 the contents of all those provided are merged. 2752 */ 2753 const char *text = "<!DOCTYPE doc [\n" 2754 " <!ATTLIST doc identifier ID #REQUIRED>\n" 2755 " <!ATTLIST tag identifier CDATA 'identifier_tag'>\n" 2756 " <!ATTLIST doc identifier CDATA 'ignored'>\n" 2757 "]>\n" 2758 "<doc identifier='doc_identity'><tag></tag></doc>\n"; 2759 AttrInfo doc_info[] 2760 = {{XCS("identifier"), XCS("doc_identity")}, {NULL, NULL}}; 2761 AttrInfo tag_info[] 2762 = {{XCS("identifier"), XCS("identifier_tag")}, {NULL, NULL}}; 2763 ElementInfo info[] = {{XCS("doc"), 1, 0, XCS("identifier"), doc_info}, 2764 {XCS("tag"), 0, 1, NULL, tag_info}, 2765 {NULL, 0, 0, NULL, NULL}}; 2766 2767 XML_Parser parser = XML_ParserCreate(NULL); 2768 assert_true(parser != NULL); 2769 2770 ParserAndElementInfo parserAndElementInfos = { 2771 parser, 2772 info, 2773 }; 2774 2775 XML_SetStartElementHandler(parser, counting_start_element_handler); 2776 XML_SetUserData(parser, &parserAndElementInfos); 2777 2778 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2779 != XML_STATUS_OK) 2780 xml_failure(parser); 2781 2782 XML_ParserFree(parser); 2783 } 2784 END_TEST 2785 2786 /* Test reset works correctly in the middle of processing an internal 2787 * entity. Exercises some obscure code in XML_ParserReset(). 2788 */ 2789 START_TEST(test_reset_in_entity) { 2790 if (g_chunkSize != 0) { 2791 // this test does not use SINGLE_BYTES, because of suspension 2792 return; 2793 } 2794 2795 const char *text = "<!DOCTYPE doc [\n" 2796 "<!ENTITY wombat 'wom'>\n" 2797 "<!ENTITY entity 'hi &wom; there'>\n" 2798 "]>\n" 2799 "<doc>&entity;</doc>"; 2800 XML_ParsingStatus status; 2801 2802 g_resumable = XML_TRUE; 2803 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2804 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 2805 // we won't know exactly how much input we actually managed to give Expat. 2806 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2807 == XML_STATUS_ERROR) 2808 xml_failure(g_parser); 2809 XML_GetParsingStatus(g_parser, &status); 2810 if (status.parsing != XML_SUSPENDED) 2811 fail("Parsing status not SUSPENDED"); 2812 XML_ParserReset(g_parser, NULL); 2813 XML_GetParsingStatus(g_parser, &status); 2814 if (status.parsing != XML_INITIALIZED) 2815 fail("Parsing status doesn't reset to INITIALIZED"); 2816 } 2817 END_TEST 2818 2819 /* Test that resume correctly passes through parse errors */ 2820 START_TEST(test_resume_invalid_parse) { 2821 const char *text = "<doc>Hello</doc"; /* Missing closing wedge */ 2822 2823 g_resumable = XML_TRUE; 2824 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2825 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2826 == XML_STATUS_ERROR) 2827 xml_failure(g_parser); 2828 if (XML_ResumeParser(g_parser) == XML_STATUS_OK) 2829 fail("Resumed invalid parse not faulted"); 2830 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN) 2831 fail("Invalid parse not correctly faulted"); 2832 } 2833 END_TEST 2834 2835 /* Test that re-suspended parses are correctly passed through */ 2836 START_TEST(test_resume_resuspended) { 2837 const char *text = "<doc>Hello<meep/>world</doc>"; 2838 2839 g_resumable = XML_TRUE; 2840 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2841 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2842 == XML_STATUS_ERROR) 2843 xml_failure(g_parser); 2844 g_resumable = XML_TRUE; 2845 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2846 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED) 2847 fail("Resumption not suspended"); 2848 /* This one should succeed and finish up */ 2849 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 2850 xml_failure(g_parser); 2851 } 2852 END_TEST 2853 2854 /* Test that CDATA shows up correctly through a default handler */ 2855 START_TEST(test_cdata_default) { 2856 const char *text = "<doc><![CDATA[Hello\nworld]]></doc>"; 2857 const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>"); 2858 CharData storage; 2859 2860 CharData_Init(&storage); 2861 XML_SetUserData(g_parser, &storage); 2862 XML_SetDefaultHandler(g_parser, accumulate_characters); 2863 2864 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2865 == XML_STATUS_ERROR) 2866 xml_failure(g_parser); 2867 CharData_CheckXMLChars(&storage, expected); 2868 } 2869 END_TEST 2870 2871 /* Test resetting a subordinate parser does exactly nothing */ 2872 START_TEST(test_subordinate_reset) { 2873 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2874 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2875 "<doc>&entity;</doc>"; 2876 2877 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2878 XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter); 2879 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2880 == XML_STATUS_ERROR) 2881 xml_failure(g_parser); 2882 } 2883 END_TEST 2884 2885 /* Test suspending a subordinate parser */ 2886 START_TEST(test_subordinate_suspend) { 2887 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2888 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2889 "<doc>&entity;</doc>"; 2890 2891 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2892 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender); 2893 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2894 == XML_STATUS_ERROR) 2895 xml_failure(g_parser); 2896 } 2897 END_TEST 2898 2899 /* Test suspending a subordinate parser from an XML declaration */ 2900 /* Increases code coverage of the tests */ 2901 2902 START_TEST(test_subordinate_xdecl_suspend) { 2903 const char *text 2904 = "<!DOCTYPE doc [\n" 2905 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n" 2906 "]>\n" 2907 "<doc>&entity;</doc>"; 2908 2909 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2910 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl); 2911 g_resumable = XML_TRUE; 2912 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2913 == XML_STATUS_ERROR) 2914 xml_failure(g_parser); 2915 } 2916 END_TEST 2917 2918 START_TEST(test_subordinate_xdecl_abort) { 2919 const char *text 2920 = "<!DOCTYPE doc [\n" 2921 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n" 2922 "]>\n" 2923 "<doc>&entity;</doc>"; 2924 2925 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2926 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl); 2927 g_resumable = XML_FALSE; 2928 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2929 == XML_STATUS_ERROR) 2930 xml_failure(g_parser); 2931 } 2932 END_TEST 2933 2934 /* Test external entity fault handling with suspension */ 2935 START_TEST(test_ext_entity_invalid_suspended_parse) { 2936 const char *text = "<!DOCTYPE doc [\n" 2937 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2938 "]>\n" 2939 "<doc>&en;</doc>"; 2940 ExtFaults faults[] 2941 = {{"<?xml version='1.0' encoding='us-ascii'?><", 2942 "Incomplete element declaration not faulted", NULL, 2943 XML_ERROR_UNCLOSED_TOKEN}, 2944 {/* First two bytes of a three-byte char */ 2945 "<?xml version='1.0' encoding='utf-8'?>\xe2\x82", 2946 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, 2947 {NULL, NULL, NULL, XML_ERROR_NONE}}; 2948 ExtFaults *fault; 2949 2950 for (fault = &faults[0]; fault->parse_text != NULL; fault++) { 2951 set_subtest("%s", fault->parse_text); 2952 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2953 XML_SetExternalEntityRefHandler(g_parser, 2954 external_entity_suspending_faulter); 2955 XML_SetUserData(g_parser, fault); 2956 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 2957 "Parser did not report external entity error"); 2958 XML_ParserReset(g_parser, NULL); 2959 } 2960 } 2961 END_TEST 2962 2963 /* Test setting an explicit encoding */ 2964 START_TEST(test_explicit_encoding) { 2965 const char *text1 = "<doc>Hello "; 2966 const char *text2 = " World</doc>"; 2967 2968 /* Just check that we can set the encoding to NULL before starting */ 2969 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK) 2970 fail("Failed to initialise encoding to NULL"); 2971 /* Say we are UTF-8 */ 2972 if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK) 2973 fail("Failed to set explicit encoding"); 2974 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2975 == XML_STATUS_ERROR) 2976 xml_failure(g_parser); 2977 /* Try to switch encodings mid-parse */ 2978 if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR) 2979 fail("Allowed encoding change"); 2980 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2981 == XML_STATUS_ERROR) 2982 xml_failure(g_parser); 2983 /* Try now the parse is over */ 2984 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK) 2985 fail("Failed to unset encoding"); 2986 } 2987 END_TEST 2988 2989 /* Test handling of trailing CR (rather than newline) */ 2990 START_TEST(test_trailing_cr) { 2991 const char *text = "<doc>\r"; 2992 int found_cr; 2993 2994 /* Try with a character handler, for code coverage */ 2995 XML_SetCharacterDataHandler(g_parser, cr_cdata_handler); 2996 XML_SetUserData(g_parser, &found_cr); 2997 found_cr = 0; 2998 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2999 == XML_STATUS_OK) 3000 fail("Failed to fault unclosed doc"); 3001 if (found_cr == 0) 3002 fail("Did not catch the carriage return"); 3003 XML_ParserReset(g_parser, NULL); 3004 3005 /* Now with a default handler instead */ 3006 XML_SetDefaultHandler(g_parser, cr_cdata_handler); 3007 XML_SetUserData(g_parser, &found_cr); 3008 found_cr = 0; 3009 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3010 == XML_STATUS_OK) 3011 fail("Failed to fault unclosed doc"); 3012 if (found_cr == 0) 3013 fail("Did not catch default carriage return"); 3014 } 3015 END_TEST 3016 3017 /* Test trailing CR in an external entity parse */ 3018 START_TEST(test_ext_entity_trailing_cr) { 3019 const char *text = "<!DOCTYPE doc [\n" 3020 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 3021 "]>\n" 3022 "<doc>&en;</doc>"; 3023 int found_cr; 3024 3025 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3026 XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher); 3027 XML_SetUserData(g_parser, &found_cr); 3028 found_cr = 0; 3029 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3030 != XML_STATUS_OK) 3031 xml_failure(g_parser); 3032 if (found_cr == 0) 3033 fail("No carriage return found"); 3034 XML_ParserReset(g_parser, NULL); 3035 3036 /* Try again with a different trailing CR */ 3037 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3038 XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher); 3039 XML_SetUserData(g_parser, &found_cr); 3040 found_cr = 0; 3041 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3042 != XML_STATUS_OK) 3043 xml_failure(g_parser); 3044 if (found_cr == 0) 3045 fail("No carriage return found"); 3046 } 3047 END_TEST 3048 3049 /* Test handling of trailing square bracket */ 3050 START_TEST(test_trailing_rsqb) { 3051 const char *text8 = "<doc>]"; 3052 const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000"; 3053 int found_rsqb; 3054 int text8_len = (int)strlen(text8); 3055 3056 XML_SetCharacterDataHandler(g_parser, rsqb_handler); 3057 XML_SetUserData(g_parser, &found_rsqb); 3058 found_rsqb = 0; 3059 if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE) 3060 == XML_STATUS_OK) 3061 fail("Failed to fault unclosed doc"); 3062 if (found_rsqb == 0) 3063 fail("Did not catch the right square bracket"); 3064 3065 /* Try again with a different encoding */ 3066 XML_ParserReset(g_parser, NULL); 3067 XML_SetCharacterDataHandler(g_parser, rsqb_handler); 3068 XML_SetUserData(g_parser, &found_rsqb); 3069 found_rsqb = 0; 3070 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1, 3071 XML_TRUE) 3072 == XML_STATUS_OK) 3073 fail("Failed to fault unclosed doc"); 3074 if (found_rsqb == 0) 3075 fail("Did not catch the right square bracket"); 3076 3077 /* And finally with a default handler */ 3078 XML_ParserReset(g_parser, NULL); 3079 XML_SetDefaultHandler(g_parser, rsqb_handler); 3080 XML_SetUserData(g_parser, &found_rsqb); 3081 found_rsqb = 0; 3082 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1, 3083 XML_TRUE) 3084 == XML_STATUS_OK) 3085 fail("Failed to fault unclosed doc"); 3086 if (found_rsqb == 0) 3087 fail("Did not catch the right square bracket"); 3088 } 3089 END_TEST 3090 3091 /* Test trailing right square bracket in an external entity parse */ 3092 START_TEST(test_ext_entity_trailing_rsqb) { 3093 const char *text = "<!DOCTYPE doc [\n" 3094 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 3095 "]>\n" 3096 "<doc>&en;</doc>"; 3097 int found_rsqb; 3098 3099 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3100 XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher); 3101 XML_SetUserData(g_parser, &found_rsqb); 3102 found_rsqb = 0; 3103 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3104 != XML_STATUS_OK) 3105 xml_failure(g_parser); 3106 if (found_rsqb == 0) 3107 fail("No right square bracket found"); 3108 } 3109 END_TEST 3110 3111 /* Test CDATA handling in an external entity */ 3112 START_TEST(test_ext_entity_good_cdata) { 3113 const char *text = "<!DOCTYPE doc [\n" 3114 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 3115 "]>\n" 3116 "<doc>&en;</doc>"; 3117 3118 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3119 XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii); 3120 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3121 != XML_STATUS_OK) 3122 xml_failure(g_parser); 3123 } 3124 END_TEST 3125 3126 /* Test user parameter settings */ 3127 START_TEST(test_user_parameters) { 3128 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 3129 "<!-- Primary parse -->\n" 3130 "<!DOCTYPE doc SYSTEM 'foo'>\n" 3131 "<doc>&entity;"; 3132 const char *epilog = "<!-- Back to primary parser -->\n" 3133 "</doc>"; 3134 3135 g_comment_count = 0; 3136 g_skip_count = 0; 3137 g_xdecl_count = 0; 3138 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3139 XML_SetXmlDeclHandler(g_parser, xml_decl_handler); 3140 XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker); 3141 XML_SetCommentHandler(g_parser, data_check_comment_handler); 3142 XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler); 3143 XML_UseParserAsHandlerArg(g_parser); 3144 XML_SetUserData(g_parser, (void *)1); 3145 g_handler_data = g_parser; 3146 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 3147 == XML_STATUS_ERROR) 3148 xml_failure(g_parser); 3149 /* Ensure we can't change policy mid-parse */ 3150 if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER)) 3151 fail("Changed param entity parsing policy while parsing"); 3152 if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE) 3153 == XML_STATUS_ERROR) 3154 xml_failure(g_parser); 3155 if (g_comment_count != 3) 3156 fail("Comment handler not invoked enough times"); 3157 if (g_skip_count != 1) 3158 fail("Skip handler not invoked enough times"); 3159 if (g_xdecl_count != 1) 3160 fail("XML declaration handler not invoked"); 3161 } 3162 END_TEST 3163 3164 /* Test that an explicit external entity handler argument replaces 3165 * the parser as the first argument. 3166 * 3167 * We do not call the first parameter to the external entity handler 3168 * 'parser' for once, since the first time the handler is called it 3169 * will actually be a text string. We need to be able to access the 3170 * global 'parser' variable to create our external entity parser from, 3171 * since there are code paths we need to ensure get executed. 3172 */ 3173 START_TEST(test_ext_entity_ref_parameter) { 3174 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 3175 "<!DOCTYPE doc SYSTEM 'foo'>\n" 3176 "<doc>&entity;</doc>"; 3177 3178 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3179 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker); 3180 /* Set a handler arg that is not NULL and not parser (which is 3181 * what NULL would cause to be passed. 3182 */ 3183 XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text); 3184 g_handler_data = text; 3185 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3186 == XML_STATUS_ERROR) 3187 xml_failure(g_parser); 3188 3189 /* Now try again with unset args */ 3190 XML_ParserReset(g_parser, NULL); 3191 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3192 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker); 3193 XML_SetExternalEntityRefHandlerArg(g_parser, NULL); 3194 g_handler_data = g_parser; 3195 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3196 == XML_STATUS_ERROR) 3197 xml_failure(g_parser); 3198 } 3199 END_TEST 3200 3201 /* Test the parsing of an empty string */ 3202 START_TEST(test_empty_parse) { 3203 const char *text = "<doc></doc>"; 3204 const char *partial = "<doc>"; 3205 3206 if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR) 3207 fail("Parsing empty string faulted"); 3208 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 3209 fail("Parsing final empty string not faulted"); 3210 if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS) 3211 fail("Parsing final empty string faulted for wrong reason"); 3212 3213 /* Now try with valid text before the empty end */ 3214 XML_ParserReset(g_parser, NULL); 3215 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 3216 == XML_STATUS_ERROR) 3217 xml_failure(g_parser); 3218 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR) 3219 fail("Parsing final empty string faulted"); 3220 3221 /* Now try with invalid text before the empty end */ 3222 XML_ParserReset(g_parser, NULL); 3223 if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial), 3224 XML_FALSE) 3225 == XML_STATUS_ERROR) 3226 xml_failure(g_parser); 3227 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 3228 fail("Parsing final incomplete empty string not faulted"); 3229 } 3230 END_TEST 3231 3232 /* Test XML_Parse for len < 0 */ 3233 START_TEST(test_negative_len_parse) { 3234 const char *const doc = "<root/>"; 3235 for (int isFinal = 0; isFinal < 2; isFinal++) { 3236 set_subtest("isFinal=%d", isFinal); 3237 3238 XML_Parser parser = XML_ParserCreate(NULL); 3239 3240 if (XML_GetErrorCode(parser) != XML_ERROR_NONE) 3241 fail("There was not supposed to be any initial parse error."); 3242 3243 const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal); 3244 3245 if (status != XML_STATUS_ERROR) 3246 fail("Negative len was expected to fail the parse but did not."); 3247 3248 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT) 3249 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT."); 3250 3251 XML_ParserFree(parser); 3252 } 3253 } 3254 END_TEST 3255 3256 /* Test XML_ParseBuffer for len < 0 */ 3257 START_TEST(test_negative_len_parse_buffer) { 3258 const char *const doc = "<root/>"; 3259 for (int isFinal = 0; isFinal < 2; isFinal++) { 3260 set_subtest("isFinal=%d", isFinal); 3261 3262 XML_Parser parser = XML_ParserCreate(NULL); 3263 3264 if (XML_GetErrorCode(parser) != XML_ERROR_NONE) 3265 fail("There was not supposed to be any initial parse error."); 3266 3267 void *const buffer = XML_GetBuffer(parser, (int)strlen(doc)); 3268 3269 if (buffer == NULL) 3270 fail("XML_GetBuffer failed."); 3271 3272 memcpy(buffer, doc, strlen(doc)); 3273 3274 const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal); 3275 3276 if (status != XML_STATUS_ERROR) 3277 fail("Negative len was expected to fail the parse but did not."); 3278 3279 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT) 3280 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT."); 3281 3282 XML_ParserFree(parser); 3283 } 3284 } 3285 END_TEST 3286 3287 /* Test odd corners of the XML_GetBuffer interface */ 3288 static enum XML_Status 3289 get_feature(enum XML_FeatureEnum feature_id, long *presult) { 3290 const XML_Feature *feature = XML_GetFeatureList(); 3291 3292 if (feature == NULL) 3293 return XML_STATUS_ERROR; 3294 for (; feature->feature != XML_FEATURE_END; feature++) { 3295 if (feature->feature == feature_id) { 3296 *presult = feature->value; 3297 return XML_STATUS_OK; 3298 } 3299 } 3300 return XML_STATUS_ERROR; 3301 } 3302 3303 /* Test odd corners of the XML_GetBuffer interface */ 3304 START_TEST(test_get_buffer_1) { 3305 const char *text = get_buffer_test_text; 3306 long context_bytes; 3307 3308 /* Attempt to allocate a negative length buffer */ 3309 if (XML_GetBuffer(g_parser, -12) != NULL) 3310 fail("Negative length buffer not failed"); 3311 3312 /* Now get a small buffer and extend it past valid length */ 3313 void *const buffer = XML_GetBuffer(g_parser, 1536); 3314 if (buffer == NULL) 3315 fail("1.5K buffer failed"); 3316 assert(buffer != NULL); 3317 memcpy(buffer, text, strlen(text)); 3318 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE) 3319 == XML_STATUS_ERROR) 3320 xml_failure(g_parser); 3321 if (XML_GetBuffer(g_parser, INT_MAX) != NULL) 3322 fail("INT_MAX buffer not failed"); 3323 3324 /* Now try extending it a more reasonable but still too large 3325 * amount. The allocator in XML_GetBuffer() doubles the buffer 3326 * size until it exceeds the requested amount or INT_MAX. If it 3327 * exceeds INT_MAX, it rejects the request, so we want a request 3328 * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable, 3329 * with an extra byte just to ensure that the request is off any 3330 * boundary. The request will be inflated internally by 3331 * XML_CONTEXT_BYTES (if >=1), so we subtract that from our 3332 * request. 3333 */ 3334 if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK) 3335 context_bytes = 0; 3336 if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL) 3337 fail("INT_MAX- buffer not failed"); 3338 3339 /* Now try extending it a carefully crafted amount */ 3340 if (XML_GetBuffer(g_parser, 1000) == NULL) 3341 fail("1000 buffer failed"); 3342 } 3343 END_TEST 3344 3345 /* Test more corners of the XML_GetBuffer interface */ 3346 START_TEST(test_get_buffer_2) { 3347 const char *text = get_buffer_test_text; 3348 3349 /* Now get a decent buffer */ 3350 void *const buffer = XML_GetBuffer(g_parser, 1536); 3351 if (buffer == NULL) 3352 fail("1.5K buffer failed"); 3353 assert(buffer != NULL); 3354 memcpy(buffer, text, strlen(text)); 3355 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE) 3356 == XML_STATUS_ERROR) 3357 xml_failure(g_parser); 3358 3359 /* Extend it, to catch a different code path */ 3360 if (XML_GetBuffer(g_parser, 1024) == NULL) 3361 fail("1024 buffer failed"); 3362 } 3363 END_TEST 3364 3365 /* Test for signed integer overflow CVE-2022-23852 */ 3366 #if XML_CONTEXT_BYTES > 0 3367 START_TEST(test_get_buffer_3_overflow) { 3368 XML_Parser parser = XML_ParserCreate(NULL); 3369 assert(parser != NULL); 3370 3371 const char *const text = "\n"; 3372 const int expectedKeepValue = (int)strlen(text); 3373 3374 // After this call, variable "keep" in XML_GetBuffer will 3375 // have value expectedKeepValue 3376 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), 3377 XML_FALSE /* isFinal */) 3378 == XML_STATUS_ERROR) 3379 xml_failure(parser); 3380 3381 assert(expectedKeepValue > 0); 3382 if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL) 3383 fail("enlarging buffer not failed"); 3384 3385 XML_ParserFree(parser); 3386 } 3387 END_TEST 3388 #endif // XML_CONTEXT_BYTES > 0 3389 3390 START_TEST(test_buffer_can_grow_to_max) { 3391 const char *const prefixes[] = { 3392 "", 3393 "<", 3394 "<x a='", 3395 "<doc><x a='", 3396 "<document><x a='", 3397 "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand" 3398 "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif" 3399 "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin" 3400 "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping" 3401 "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"}; 3402 const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]); 3403 int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow 3404 #if defined(__MINGW32__) && ! defined(__MINGW64__) 3405 // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB 3406 // Can we make a big allocation? 3407 for (int i = 1; i <= 2; i++) { 3408 void *const big = malloc(maxbuf); 3409 if (big != NULL) { 3410 free(big); 3411 break; 3412 } 3413 // The big allocation failed. Let's be a little lenient. 3414 maxbuf = maxbuf / 2; 3415 fprintf(stderr, "Reducing maxbuf to %d...\n", maxbuf); 3416 } 3417 #endif 3418 3419 for (int i = 0; i < num_prefixes; ++i) { 3420 set_subtest("\"%s\"", prefixes[i]); 3421 XML_Parser parser = XML_ParserCreate(NULL); 3422 #if XML_GE == 1 3423 assert_true(XML_SetAllocTrackerActivationThreshold(parser, (size_t)-1) 3424 == XML_TRUE); // i.e. deactivate 3425 #endif 3426 const int prefix_len = (int)strlen(prefixes[i]); 3427 const enum XML_Status s 3428 = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE); 3429 if (s != XML_STATUS_OK) 3430 xml_failure(parser); 3431 3432 // XML_CONTEXT_BYTES of the prefix may remain in the buffer; 3433 // subtracting the whole prefix is easiest, and close enough. 3434 assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL); 3435 // The limit should be consistent; no prefix should allow us to 3436 // reach above the max buffer size. 3437 assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL); 3438 XML_ParserFree(parser); 3439 } 3440 } 3441 END_TEST 3442 3443 START_TEST(test_getbuffer_allocates_on_zero_len) { 3444 for (int first_len = 1; first_len >= 0; first_len--) { 3445 set_subtest("with len=%d first", first_len); 3446 XML_Parser parser = XML_ParserCreate(NULL); 3447 assert_true(parser != NULL); 3448 assert_true(XML_GetBuffer(parser, first_len) != NULL); 3449 assert_true(XML_GetBuffer(parser, 0) != NULL); 3450 if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK) 3451 xml_failure(parser); 3452 XML_ParserFree(parser); 3453 } 3454 } 3455 END_TEST 3456 3457 /* Test position information macros */ 3458 START_TEST(test_byte_info_at_end) { 3459 const char *text = "<doc></doc>"; 3460 3461 if (XML_GetCurrentByteIndex(g_parser) != -1 3462 || XML_GetCurrentByteCount(g_parser) != 0) 3463 fail("Byte index/count incorrect at start of parse"); 3464 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3465 == XML_STATUS_ERROR) 3466 xml_failure(g_parser); 3467 /* At end, the count will be zero and the index the end of string */ 3468 if (XML_GetCurrentByteCount(g_parser) != 0) 3469 fail("Terminal byte count incorrect"); 3470 if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text)) 3471 fail("Terminal byte index incorrect"); 3472 } 3473 END_TEST 3474 3475 /* Test position information from errors */ 3476 #define PRE_ERROR_STR "<doc></" 3477 #define POST_ERROR_STR "wombat></doc>" 3478 START_TEST(test_byte_info_at_error) { 3479 const char *text = PRE_ERROR_STR POST_ERROR_STR; 3480 3481 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3482 == XML_STATUS_OK) 3483 fail("Syntax error not faulted"); 3484 if (XML_GetCurrentByteCount(g_parser) != 0) 3485 fail("Error byte count incorrect"); 3486 if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR)) 3487 fail("Error byte index incorrect"); 3488 } 3489 END_TEST 3490 #undef PRE_ERROR_STR 3491 #undef POST_ERROR_STR 3492 3493 /* Test position information in handler */ 3494 #define START_ELEMENT "<e>" 3495 #define CDATA_TEXT "Hello" 3496 #define END_ELEMENT "</e>" 3497 START_TEST(test_byte_info_at_cdata) { 3498 const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT; 3499 int offset, size; 3500 ByteTestData data; 3501 3502 /* Check initial context is empty */ 3503 if (XML_GetInputContext(g_parser, &offset, &size) != NULL) 3504 fail("Unexpected context at start of parse"); 3505 3506 data.start_element_len = (int)strlen(START_ELEMENT); 3507 data.cdata_len = (int)strlen(CDATA_TEXT); 3508 data.total_string_len = (int)strlen(text); 3509 XML_SetCharacterDataHandler(g_parser, byte_character_handler); 3510 XML_SetUserData(g_parser, &data); 3511 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) 3512 xml_failure(g_parser); 3513 } 3514 END_TEST 3515 #undef START_ELEMENT 3516 #undef CDATA_TEXT 3517 #undef END_ELEMENT 3518 3519 /* Test predefined entities are correctly recognised */ 3520 START_TEST(test_predefined_entities) { 3521 const char *text = "<doc><>&"'</doc>"; 3522 const XML_Char *expected = XCS("<doc><>&"'</doc>"); 3523 const XML_Char *result = XCS("<>&\"'"); 3524 CharData storage; 3525 3526 XML_SetDefaultHandler(g_parser, accumulate_characters); 3527 /* run_character_check uses XML_SetCharacterDataHandler(), which 3528 * unfortunately heads off a code path that we need to exercise. 3529 */ 3530 CharData_Init(&storage); 3531 XML_SetUserData(g_parser, &storage); 3532 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3533 == XML_STATUS_ERROR) 3534 xml_failure(g_parser); 3535 /* The default handler doesn't translate the entities */ 3536 CharData_CheckXMLChars(&storage, expected); 3537 3538 /* Now try again and check the translation */ 3539 XML_ParserReset(g_parser, NULL); 3540 run_character_check(text, result); 3541 } 3542 END_TEST 3543 3544 /* Regression test that an invalid tag in an external parameter 3545 * reference in an external DTD is correctly faulted. 3546 * 3547 * Only a few specific tags are legal in DTDs ignoring comments and 3548 * processing instructions, all of which begin with an exclamation 3549 * mark. "<el/>" is not one of them, so the parser should raise an 3550 * error on encountering it. 3551 */ 3552 START_TEST(test_invalid_tag_in_dtd) { 3553 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3554 "<doc></doc>\n"; 3555 3556 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3557 XML_SetExternalEntityRefHandler(g_parser, external_entity_param); 3558 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3559 "Invalid tag IN DTD external param not rejected"); 3560 } 3561 END_TEST 3562 3563 /* Test entities not quite the predefined ones are not mis-recognised */ 3564 START_TEST(test_not_predefined_entities) { 3565 const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>", 3566 "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL}; 3567 int i = 0; 3568 3569 while (text[i] != NULL) { 3570 expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY, 3571 "Undefined entity not rejected"); 3572 XML_ParserReset(g_parser, NULL); 3573 i++; 3574 } 3575 } 3576 END_TEST 3577 3578 /* Test conditional inclusion (IGNORE) */ 3579 START_TEST(test_ignore_section) { 3580 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3581 "<doc><e>&entity;</e></doc>"; 3582 const XML_Char *expected 3583 = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;"); 3584 CharData storage; 3585 3586 CharData_Init(&storage); 3587 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3588 XML_SetUserData(g_parser, &storage); 3589 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore); 3590 XML_SetDefaultHandler(g_parser, accumulate_characters); 3591 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3592 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3593 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3594 XML_SetStartElementHandler(g_parser, dummy_start_element); 3595 XML_SetEndElementHandler(g_parser, dummy_end_element); 3596 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3597 == XML_STATUS_ERROR) 3598 xml_failure(g_parser); 3599 CharData_CheckXMLChars(&storage, expected); 3600 } 3601 END_TEST 3602 3603 START_TEST(test_ignore_section_utf16) { 3604 const char text[] = 3605 /* <!DOCTYPE d SYSTEM 's'> */ 3606 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " 3607 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0" 3608 /* <d><e>&en;</e></d> */ 3609 "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0"; 3610 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;"); 3611 CharData storage; 3612 3613 CharData_Init(&storage); 3614 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3615 XML_SetUserData(g_parser, &storage); 3616 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16); 3617 XML_SetDefaultHandler(g_parser, accumulate_characters); 3618 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3619 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3620 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3621 XML_SetStartElementHandler(g_parser, dummy_start_element); 3622 XML_SetEndElementHandler(g_parser, dummy_end_element); 3623 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 3624 == XML_STATUS_ERROR) 3625 xml_failure(g_parser); 3626 CharData_CheckXMLChars(&storage, expected); 3627 } 3628 END_TEST 3629 3630 START_TEST(test_ignore_section_utf16_be) { 3631 const char text[] = 3632 /* <!DOCTYPE d SYSTEM 's'> */ 3633 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " 3634 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n" 3635 /* <d><e>&en;</e></d> */ 3636 "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>"; 3637 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;"); 3638 CharData storage; 3639 3640 CharData_Init(&storage); 3641 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3642 XML_SetUserData(g_parser, &storage); 3643 XML_SetExternalEntityRefHandler(g_parser, 3644 external_entity_load_ignore_utf16_be); 3645 XML_SetDefaultHandler(g_parser, accumulate_characters); 3646 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3647 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3648 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3649 XML_SetStartElementHandler(g_parser, dummy_start_element); 3650 XML_SetEndElementHandler(g_parser, dummy_end_element); 3651 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 3652 == XML_STATUS_ERROR) 3653 xml_failure(g_parser); 3654 CharData_CheckXMLChars(&storage, expected); 3655 } 3656 END_TEST 3657 3658 /* Test mis-formatted conditional exclusion */ 3659 START_TEST(test_bad_ignore_section) { 3660 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3661 "<doc><e>&entity;</e></doc>"; 3662 ExtFaults faults[] 3663 = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL, 3664 XML_ERROR_SYNTAX}, 3665 {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL, 3666 XML_ERROR_INVALID_TOKEN}, 3667 {/* FIrst two bytes of a three-byte char */ 3668 "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL, 3669 XML_ERROR_PARTIAL_CHAR}, 3670 {NULL, NULL, NULL, XML_ERROR_NONE}}; 3671 ExtFaults *fault; 3672 3673 for (fault = &faults[0]; fault->parse_text != NULL; fault++) { 3674 set_subtest("%s", fault->parse_text); 3675 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3676 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 3677 XML_SetUserData(g_parser, fault); 3678 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3679 "Incomplete IGNORE section not failed"); 3680 XML_ParserReset(g_parser, NULL); 3681 } 3682 } 3683 END_TEST 3684 3685 struct bom_testdata { 3686 const char *external; 3687 int split; 3688 XML_Bool nested_callback_happened; 3689 }; 3690 3691 static int XMLCALL 3692 external_bom_checker(XML_Parser parser, const XML_Char *context, 3693 const XML_Char *base, const XML_Char *systemId, 3694 const XML_Char *publicId) { 3695 const char *text; 3696 UNUSED_P(base); 3697 UNUSED_P(systemId); 3698 UNUSED_P(publicId); 3699 3700 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); 3701 if (ext_parser == NULL) 3702 fail("Could not create external entity parser"); 3703 3704 if (! xcstrcmp(systemId, XCS("004-2.ent"))) { 3705 struct bom_testdata *const testdata = XML_GetUserData(parser); 3706 const char *const external = testdata->external; 3707 const int split = testdata->split; 3708 testdata->nested_callback_happened = XML_TRUE; 3709 3710 if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE) 3711 != XML_STATUS_OK) { 3712 xml_failure(ext_parser); 3713 } 3714 text = external + split; // the parse below will continue where we left off. 3715 } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) { 3716 text = "<!ELEMENT doc EMPTY>\n" 3717 "<!ENTITY % e1 SYSTEM '004-2.ent'>\n" 3718 "<!ENTITY % e2 '%e1;'>\n"; 3719 } else { 3720 fail("unknown systemId"); 3721 } 3722 3723 if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) 3724 != XML_STATUS_OK) 3725 xml_failure(ext_parser); 3726 3727 XML_ParserFree(ext_parser); 3728 return XML_STATUS_OK; 3729 } 3730 3731 /* regression test: BOM should be consumed when followed by a partial token. */ 3732 START_TEST(test_external_bom_consumed) { 3733 const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3734 "<doc></doc>\n"; 3735 const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>"; 3736 const int len = (int)strlen(external); 3737 for (int split = 0; split <= len; ++split) { 3738 set_subtest("split at byte %d", split); 3739 3740 struct bom_testdata testdata; 3741 testdata.external = external; 3742 testdata.split = split; 3743 testdata.nested_callback_happened = XML_FALSE; 3744 3745 XML_Parser parser = XML_ParserCreate(NULL); 3746 if (parser == NULL) { 3747 fail("Couldn't create parser"); 3748 } 3749 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3750 XML_SetExternalEntityRefHandler(parser, external_bom_checker); 3751 XML_SetUserData(parser, &testdata); 3752 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 3753 == XML_STATUS_ERROR) 3754 xml_failure(parser); 3755 if (! testdata.nested_callback_happened) { 3756 fail("ref handler not called"); 3757 } 3758 XML_ParserFree(parser); 3759 } 3760 } 3761 END_TEST 3762 3763 /* Test recursive parsing */ 3764 START_TEST(test_external_entity_values) { 3765 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3766 "<doc></doc>\n"; 3767 ExtFaults data_004_2[] = { 3768 {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE}, 3769 {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL, 3770 XML_ERROR_INVALID_TOKEN}, 3771 {"'wombat", "Unterminated string not faulted", NULL, 3772 XML_ERROR_UNCLOSED_TOKEN}, 3773 {"\xe2\x82", "Partial UTF-8 character not faulted", NULL, 3774 XML_ERROR_PARTIAL_CHAR}, 3775 {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE}, 3776 {"<?xml?>", "Malformed XML declaration not faulted", NULL, 3777 XML_ERROR_XML_DECL}, 3778 {/* UTF-8 BOM */ 3779 "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, 3780 XML_ERROR_NONE}, 3781 {"<?xml version='1.0' encoding='utf-8'?>\n$", 3782 "Invalid token after text declaration not faulted", NULL, 3783 XML_ERROR_INVALID_TOKEN}, 3784 {"<?xml version='1.0' encoding='utf-8'?>\n'wombat", 3785 "Unterminated string after text decl not faulted", NULL, 3786 XML_ERROR_UNCLOSED_TOKEN}, 3787 {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82", 3788 "Partial UTF-8 character after text decl not faulted", NULL, 3789 XML_ERROR_PARTIAL_CHAR}, 3790 {"%e1;", "Recursive parameter entity not faulted", NULL, 3791 XML_ERROR_RECURSIVE_ENTITY_REF}, 3792 {NULL, NULL, NULL, XML_ERROR_NONE}}; 3793 int i; 3794 3795 for (i = 0; data_004_2[i].parse_text != NULL; i++) { 3796 set_subtest("%s", data_004_2[i].parse_text); 3797 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3798 XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer); 3799 XML_SetUserData(g_parser, &data_004_2[i]); 3800 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3801 == XML_STATUS_ERROR) 3802 xml_failure(g_parser); 3803 XML_ParserReset(g_parser, NULL); 3804 } 3805 } 3806 END_TEST 3807 3808 /* Test the recursive parse interacts with a not standalone handler */ 3809 START_TEST(test_ext_entity_not_standalone) { 3810 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3811 "<doc></doc>"; 3812 3813 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3814 XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone); 3815 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3816 "Standalone rejection not caught"); 3817 } 3818 END_TEST 3819 3820 START_TEST(test_ext_entity_value_abort) { 3821 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3822 "<doc></doc>\n"; 3823 3824 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3825 XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter); 3826 g_resumable = XML_FALSE; 3827 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3828 == XML_STATUS_ERROR) 3829 xml_failure(g_parser); 3830 } 3831 END_TEST 3832 3833 START_TEST(test_bad_public_doctype) { 3834 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n" 3835 "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n" 3836 "<doc></doc>"; 3837 3838 /* Setting a handler provokes a particular code path */ 3839 XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler, 3840 dummy_end_doctype_handler); 3841 expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed"); 3842 } 3843 END_TEST 3844 3845 /* Test based on ibm/valid/P32/ibm32v04.xml */ 3846 START_TEST(test_attribute_enum_value) { 3847 const char *text = "<?xml version='1.0' standalone='no'?>\n" 3848 "<!DOCTYPE animal SYSTEM 'test.dtd'>\n" 3849 "<animal>This is a \n <a/> \n\nyellow tiger</animal>"; 3850 ExtTest dtd_data 3851 = {"<!ELEMENT animal (#PCDATA|a)*>\n" 3852 "<!ELEMENT a EMPTY>\n" 3853 "<!ATTLIST animal xml:space (default|preserve) 'preserve'>", 3854 NULL, NULL}; 3855 const XML_Char *expected = XCS("This is a \n \n\nyellow tiger"); 3856 3857 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3858 XML_SetUserData(g_parser, &dtd_data); 3859 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3860 /* An attribute list handler provokes a different code path */ 3861 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 3862 run_ext_character_check(text, &dtd_data, expected); 3863 } 3864 END_TEST 3865 3866 /* Slightly bizarrely, the library seems to silently ignore entity 3867 * definitions for predefined entities, even when they are wrong. The 3868 * language of the XML 1.0 spec is somewhat unhelpful as to what ought 3869 * to happen, so this is currently treated as acceptable. 3870 */ 3871 START_TEST(test_predefined_entity_redefinition) { 3872 const char *text = "<!DOCTYPE doc [\n" 3873 "<!ENTITY apos 'foo'>\n" 3874 "]>\n" 3875 "<doc>'</doc>"; 3876 run_character_check(text, XCS("'")); 3877 } 3878 END_TEST 3879 3880 /* Test that the parser stops processing the DTD after an unresolved 3881 * parameter entity is encountered. 3882 */ 3883 START_TEST(test_dtd_stop_processing) { 3884 const char *text = "<!DOCTYPE doc [\n" 3885 "%foo;\n" 3886 "<!ENTITY bar 'bas'>\n" 3887 "]><doc/>"; 3888 3889 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 3890 init_dummy_handlers(); 3891 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3892 == XML_STATUS_ERROR) 3893 xml_failure(g_parser); 3894 if (get_dummy_handler_flags() != 0) 3895 fail("DTD processing still going after undefined PE"); 3896 } 3897 END_TEST 3898 3899 /* Test public notations with no system ID */ 3900 START_TEST(test_public_notation_no_sysid) { 3901 const char *text = "<!DOCTYPE doc [\n" 3902 "<!NOTATION note PUBLIC 'foo'>\n" 3903 "<!ELEMENT doc EMPTY>\n" 3904 "]>\n<doc/>"; 3905 3906 init_dummy_handlers(); 3907 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); 3908 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3909 == XML_STATUS_ERROR) 3910 xml_failure(g_parser); 3911 if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG) 3912 fail("Notation declaration handler not called"); 3913 } 3914 END_TEST 3915 3916 START_TEST(test_nested_groups) { 3917 const char *text 3918 = "<!DOCTYPE doc [\n" 3919 "<!ELEMENT doc " 3920 /* Sixteen elements per line */ 3921 "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?," 3922 "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?" 3923 "))))))))))))))))))))))))))))))))>\n" 3924 "<!ELEMENT e EMPTY>" 3925 "]>\n" 3926 "<doc><e/></doc>"; 3927 CharData storage; 3928 3929 CharData_Init(&storage); 3930 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3931 XML_SetStartElementHandler(g_parser, record_element_start_handler); 3932 XML_SetUserData(g_parser, &storage); 3933 init_dummy_handlers(); 3934 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3935 == XML_STATUS_ERROR) 3936 xml_failure(g_parser); 3937 CharData_CheckXMLChars(&storage, XCS("doce")); 3938 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG) 3939 fail("Element handler not fired"); 3940 } 3941 END_TEST 3942 3943 START_TEST(test_group_choice) { 3944 const char *text = "<!DOCTYPE doc [\n" 3945 "<!ELEMENT doc (a|b|c)+>\n" 3946 "<!ELEMENT a EMPTY>\n" 3947 "<!ELEMENT b (#PCDATA)>\n" 3948 "<!ELEMENT c ANY>\n" 3949 "]>\n" 3950 "<doc>\n" 3951 "<a/>\n" 3952 "<b attr='foo'>This is a foo</b>\n" 3953 "<c></c>\n" 3954 "</doc>\n"; 3955 3956 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3957 init_dummy_handlers(); 3958 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3959 == XML_STATUS_ERROR) 3960 xml_failure(g_parser); 3961 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG) 3962 fail("Element handler flag not raised"); 3963 } 3964 END_TEST 3965 3966 START_TEST(test_standalone_parameter_entity) { 3967 const char *text = "<?xml version='1.0' standalone='yes'?>\n" 3968 "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n" 3969 "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n" 3970 "%entity;\n" 3971 "]>\n" 3972 "<doc></doc>"; 3973 char dtd_data[] = "<!ENTITY % e1 'foo'>\n"; 3974 3975 XML_SetUserData(g_parser, dtd_data); 3976 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3977 XML_SetExternalEntityRefHandler(g_parser, external_entity_public); 3978 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3979 == XML_STATUS_ERROR) 3980 xml_failure(g_parser); 3981 } 3982 END_TEST 3983 3984 /* Test skipping of parameter entity in an external DTD */ 3985 /* Derived from ibm/invalid/P69/ibm69i01.xml */ 3986 START_TEST(test_skipped_parameter_entity) { 3987 const char *text = "<?xml version='1.0'?>\n" 3988 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n" 3989 "<!ELEMENT root (#PCDATA|a)* >\n" 3990 "]>\n" 3991 "<root></root>"; 3992 ExtTest dtd_data = {"%pe2;", NULL, NULL}; 3993 3994 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3995 XML_SetUserData(g_parser, &dtd_data); 3996 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3997 XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler); 3998 init_dummy_handlers(); 3999 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4000 == XML_STATUS_ERROR) 4001 xml_failure(g_parser); 4002 if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG) 4003 fail("Skip handler not executed"); 4004 } 4005 END_TEST 4006 4007 /* Test recursive parameter entity definition rejected in external DTD */ 4008 START_TEST(test_recursive_external_parameter_entity) { 4009 const char *text = "<?xml version='1.0'?>\n" 4010 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n" 4011 "<!ELEMENT root (#PCDATA|a)* >\n" 4012 "]>\n" 4013 "<root></root>"; 4014 ExtFaults dtd_data = {"<!ENTITY % pe2 '%pe2;'>\n%pe2;", 4015 "Recursive external parameter entity not faulted", NULL, 4016 XML_ERROR_RECURSIVE_ENTITY_REF}; 4017 4018 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 4019 XML_SetUserData(g_parser, &dtd_data); 4020 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4021 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 4022 "Recursive external parameter not spotted"); 4023 } 4024 END_TEST 4025 4026 /* Test undefined parameter entity in external entity handler */ 4027 START_TEST(test_undefined_ext_entity_in_external_dtd) { 4028 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 4029 "<doc></doc>\n"; 4030 4031 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4032 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer); 4033 XML_SetUserData(g_parser, NULL); 4034 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4035 == XML_STATUS_ERROR) 4036 xml_failure(g_parser); 4037 4038 /* Now repeat without the external entity ref handler invoking 4039 * another copy of itself. 4040 */ 4041 XML_ParserReset(g_parser, NULL); 4042 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4043 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer); 4044 XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */ 4045 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4046 == XML_STATUS_ERROR) 4047 xml_failure(g_parser); 4048 } 4049 END_TEST 4050 4051 /* Test suspending the parse on receiving an XML declaration works */ 4052 START_TEST(test_suspend_xdecl) { 4053 const char *text = long_character_data_text; 4054 4055 XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler); 4056 XML_SetUserData(g_parser, g_parser); 4057 g_resumable = XML_TRUE; 4058 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 4059 // we won't know exactly how much input we actually managed to give Expat. 4060 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 4061 != XML_STATUS_SUSPENDED) 4062 xml_failure(g_parser); 4063 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 4064 xml_failure(g_parser); 4065 /* Attempt to start a new parse while suspended */ 4066 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4067 != XML_STATUS_ERROR) 4068 fail("Attempt to parse while suspended not faulted"); 4069 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) 4070 fail("Suspended parse not faulted with correct error"); 4071 } 4072 END_TEST 4073 4074 /* Test aborting the parse in an epilog works */ 4075 START_TEST(test_abort_epilog) { 4076 const char *text = "<doc></doc>\n\r\n"; 4077 XML_Char trigger_char = XCS('\r'); 4078 4079 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 4080 XML_SetUserData(g_parser, &trigger_char); 4081 g_resumable = XML_FALSE; 4082 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4083 != XML_STATUS_ERROR) 4084 fail("Abort not triggered"); 4085 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED) 4086 xml_failure(g_parser); 4087 } 4088 END_TEST 4089 4090 /* Test a different code path for abort in the epilog */ 4091 START_TEST(test_abort_epilog_2) { 4092 const char *text = "<doc></doc>\n"; 4093 XML_Char trigger_char = XCS('\n'); 4094 4095 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 4096 XML_SetUserData(g_parser, &trigger_char); 4097 g_resumable = XML_FALSE; 4098 expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered"); 4099 } 4100 END_TEST 4101 4102 /* Test suspension from the epilog */ 4103 START_TEST(test_suspend_epilog) { 4104 const char *text = "<doc></doc>\n"; 4105 XML_Char trigger_char = XCS('\n'); 4106 4107 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 4108 XML_SetUserData(g_parser, &trigger_char); 4109 g_resumable = XML_TRUE; 4110 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4111 != XML_STATUS_SUSPENDED) 4112 xml_failure(g_parser); 4113 } 4114 END_TEST 4115 4116 START_TEST(test_suspend_in_sole_empty_tag) { 4117 const char *text = "<doc/>"; 4118 enum XML_Status rc; 4119 4120 XML_SetEndElementHandler(g_parser, suspending_end_handler); 4121 XML_SetUserData(g_parser, g_parser); 4122 rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE); 4123 if (rc == XML_STATUS_ERROR) 4124 xml_failure(g_parser); 4125 else if (rc != XML_STATUS_SUSPENDED) 4126 fail("Suspend not triggered"); 4127 rc = XML_ResumeParser(g_parser); 4128 if (rc == XML_STATUS_ERROR) 4129 xml_failure(g_parser); 4130 else if (rc != XML_STATUS_OK) 4131 fail("Resume failed"); 4132 } 4133 END_TEST 4134 4135 START_TEST(test_unfinished_epilog) { 4136 const char *text = "<doc></doc><"; 4137 4138 expect_failure(text, XML_ERROR_UNCLOSED_TOKEN, 4139 "Incomplete epilog entry not faulted"); 4140 } 4141 END_TEST 4142 4143 START_TEST(test_partial_char_in_epilog) { 4144 const char *text = "<doc></doc>\xe2\x82"; 4145 4146 /* First check that no fault is raised if the parse is not finished */ 4147 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 4148 == XML_STATUS_ERROR) 4149 xml_failure(g_parser); 4150 /* Now check that it is faulted once we finish */ 4151 if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR) 4152 fail("Partial character in epilog not faulted"); 4153 if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR) 4154 xml_failure(g_parser); 4155 } 4156 END_TEST 4157 4158 /* Test resuming a parse suspended in entity substitution */ 4159 START_TEST(test_suspend_resume_internal_entity) { 4160 const char *text 4161 = "<!DOCTYPE doc [\n" 4162 "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n" 4163 "]>\n" 4164 "<doc>&foo;</doc>\n"; 4165 const XML_Char *expected1 = XCS("Hi"); 4166 const XML_Char *expected2 = XCS("HiHo"); 4167 CharData storage; 4168 4169 CharData_Init(&storage); 4170 XML_SetStartElementHandler(g_parser, start_element_suspender); 4171 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 4172 XML_SetUserData(g_parser, &storage); 4173 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 4174 // we won't know exactly how much input we actually managed to give Expat. 4175 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 4176 != XML_STATUS_SUSPENDED) 4177 xml_failure(g_parser); 4178 CharData_CheckXMLChars(&storage, XCS("")); 4179 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED) 4180 xml_failure(g_parser); 4181 CharData_CheckXMLChars(&storage, expected1); 4182 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 4183 xml_failure(g_parser); 4184 CharData_CheckXMLChars(&storage, expected2); 4185 } 4186 END_TEST 4187 4188 START_TEST(test_suspend_resume_internal_entity_issue_629) { 4189 const char *const text 4190 = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n" 4191 "<" 4192 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4193 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4194 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4195 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4196 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4197 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4198 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4199 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4200 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4201 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4202 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4203 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4204 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4205 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4206 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4207 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4208 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4209 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4210 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4211 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4212 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4213 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4214 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4215 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4216 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4217 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4218 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4219 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4220 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4221 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4222 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4223 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4224 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4225 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4226 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4227 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4228 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4229 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4230 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4231 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4232 "/>" 4233 "</b></a>"; 4234 const size_t firstChunkSizeBytes = 54; 4235 4236 XML_Parser parser = XML_ParserCreate(NULL); 4237 XML_SetUserData(parser, parser); 4238 XML_SetCommentHandler(parser, suspending_comment_handler); 4239 4240 if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE) 4241 != XML_STATUS_SUSPENDED) 4242 xml_failure(parser); 4243 if (XML_ResumeParser(parser) != XML_STATUS_OK) 4244 xml_failure(parser); 4245 if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes, 4246 (int)(strlen(text) - firstChunkSizeBytes), 4247 XML_TRUE) 4248 != XML_STATUS_OK) 4249 xml_failure(parser); 4250 XML_ParserFree(parser); 4251 } 4252 END_TEST 4253 4254 /* Test syntax error is caught at parse resumption */ 4255 START_TEST(test_resume_entity_with_syntax_error) { 4256 if (g_chunkSize != 0) { 4257 // this test does not use SINGLE_BYTES, because of suspension 4258 return; 4259 } 4260 4261 const char *text = "<!DOCTYPE doc [\n" 4262 "<!ENTITY foo '<suspend>Hi</wombat>'>\n" 4263 "]>\n" 4264 "<doc>&foo;</doc>\n"; 4265 4266 XML_SetStartElementHandler(g_parser, start_element_suspender); 4267 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 4268 // we won't know exactly how much input we actually managed to give Expat. 4269 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 4270 != XML_STATUS_SUSPENDED) 4271 xml_failure(g_parser); 4272 if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR) 4273 fail("Syntax error in entity not faulted"); 4274 if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH) 4275 xml_failure(g_parser); 4276 } 4277 END_TEST 4278 4279 /* Test suspending and resuming in a parameter entity substitution */ 4280 START_TEST(test_suspend_resume_parameter_entity) { 4281 const char *text = "<!DOCTYPE doc [\n" 4282 "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n" 4283 "%foo;\n" 4284 "]>\n" 4285 "<doc>Hello, world</doc>"; 4286 const XML_Char *expected = XCS("Hello, world"); 4287 CharData storage; 4288 4289 CharData_Init(&storage); 4290 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4291 XML_SetElementDeclHandler(g_parser, element_decl_suspender); 4292 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 4293 XML_SetUserData(g_parser, &storage); 4294 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 4295 != XML_STATUS_SUSPENDED) 4296 xml_failure(g_parser); 4297 CharData_CheckXMLChars(&storage, XCS("")); 4298 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 4299 xml_failure(g_parser); 4300 CharData_CheckXMLChars(&storage, expected); 4301 } 4302 END_TEST 4303 4304 /* Test attempting to use parser after an error is faulted */ 4305 START_TEST(test_restart_on_error) { 4306 const char *text = "<$doc><doc></doc>"; 4307 4308 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4309 != XML_STATUS_ERROR) 4310 fail("Invalid tag name not faulted"); 4311 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 4312 xml_failure(g_parser); 4313 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 4314 fail("Restarting invalid parse not faulted"); 4315 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 4316 xml_failure(g_parser); 4317 } 4318 END_TEST 4319 4320 /* Test that angle brackets in an attribute default value are faulted */ 4321 START_TEST(test_reject_lt_in_attribute_value) { 4322 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n" 4323 "<doc></doc>"; 4324 4325 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4326 "Bad attribute default not faulted"); 4327 } 4328 END_TEST 4329 4330 START_TEST(test_reject_unfinished_param_in_att_value) { 4331 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n" 4332 "<doc></doc>"; 4333 4334 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4335 "Bad attribute default not faulted"); 4336 } 4337 END_TEST 4338 4339 START_TEST(test_trailing_cr_in_att_value) { 4340 const char *text = "<doc a='value\r'/>"; 4341 4342 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4343 == XML_STATUS_ERROR) 4344 xml_failure(g_parser); 4345 } 4346 END_TEST 4347 4348 /* Try parsing a general entity within a parameter entity in a 4349 * standalone internal DTD. Covers a corner case in the parser. 4350 */ 4351 START_TEST(test_standalone_internal_entity) { 4352 const char *text = "<?xml version='1.0' standalone='yes' ?>\n" 4353 "<!DOCTYPE doc [\n" 4354 " <!ELEMENT doc (#PCDATA)>\n" 4355 " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"≥\">'>\n" 4356 " <!ENTITY ge 'AttDefaultValue'>\n" 4357 " %pe;\n" 4358 "]>\n" 4359 "<doc att2='any'/>"; 4360 4361 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4362 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4363 == XML_STATUS_ERROR) 4364 xml_failure(g_parser); 4365 } 4366 END_TEST 4367 4368 /* Test that a reference to an unknown external entity is skipped */ 4369 START_TEST(test_skipped_external_entity) { 4370 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" 4371 "<doc></doc>\n"; 4372 ExtTest test_data = {"<!ELEMENT doc EMPTY>\n" 4373 "<!ENTITY % e2 '%e1;'>\n", 4374 NULL, NULL}; 4375 4376 XML_SetUserData(g_parser, &test_data); 4377 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4378 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 4379 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4380 == XML_STATUS_ERROR) 4381 xml_failure(g_parser); 4382 } 4383 END_TEST 4384 4385 START_TEST(test_scaff_index_shared_across_external_entity_parser) { 4386 const char text[] 4387 = "<!DOCTYPE doc [\n" 4388 "<!ELEMENT a " 4389 "((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((b))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))>\n" 4390 "<!ENTITY % e SYSTEM 'ext'>\n" 4391 "%e;\n" 4392 "<!ELEMENT c " 4393 "(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((d)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))>\n" 4394 "]>\n" 4395 "<doc/>"; 4396 ExtOption options[] 4397 = {{XCS("ext"), 4398 "<!ELEMENT x " 4399 "((((((((((((((((((((((((((((((((y))))))))))))))))))))))))))))))))>"}, 4400 {NULL, NULL}}; 4401 4402 XML_Parser parser = XML_ParserCreate(NULL); 4403 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4404 XML_SetUserData(parser, options); 4405 XML_SetExternalEntityRefHandler(parser, external_entity_optioner); 4406 XML_SetElementDeclHandler(parser, dummy_element_decl_handler); 4407 4408 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 4409 == XML_STATUS_ERROR) 4410 xml_failure(parser); 4411 4412 XML_ParserFree(parser); 4413 } 4414 END_TEST 4415 4416 /* Test a different form of unknown external entity */ 4417 START_TEST(test_skipped_null_loaded_ext_entity) { 4418 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" 4419 "<doc />"; 4420 ExtHdlrData test_data 4421 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n" 4422 "<!ENTITY % pe2 '%pe1;'>\n" 4423 "%pe2;\n", 4424 external_entity_null_loader, NULL}; 4425 4426 XML_SetUserData(g_parser, &test_data); 4427 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4428 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader); 4429 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4430 == XML_STATUS_ERROR) 4431 xml_failure(g_parser); 4432 } 4433 END_TEST 4434 4435 START_TEST(test_skipped_unloaded_ext_entity) { 4436 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" 4437 "<doc />"; 4438 ExtHdlrData test_data 4439 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n" 4440 "<!ENTITY % pe2 '%pe1;'>\n" 4441 "%pe2;\n", 4442 NULL, NULL}; 4443 4444 XML_SetUserData(g_parser, &test_data); 4445 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4446 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader); 4447 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4448 == XML_STATUS_ERROR) 4449 xml_failure(g_parser); 4450 } 4451 END_TEST 4452 4453 /* Test that a parameter entity value ending with a carriage return 4454 * has it translated internally into a newline. 4455 */ 4456 START_TEST(test_param_entity_with_trailing_cr) { 4457 #define PARAM_ENTITY_NAME "pe" 4458 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">" 4459 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" 4460 "<doc/>"; 4461 ExtTest test_data 4462 = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n" 4463 "%" PARAM_ENTITY_NAME ";\n", 4464 NULL, NULL}; 4465 4466 XML_SetUserData(g_parser, &test_data); 4467 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4468 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 4469 XML_SetEntityDeclHandler(g_parser, param_entity_match_handler); 4470 param_entity_match_init(XCS(PARAM_ENTITY_NAME), 4471 XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n")); 4472 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4473 == XML_STATUS_ERROR) 4474 xml_failure(g_parser); 4475 int entity_match_flag = get_param_entity_match_flag(); 4476 if (entity_match_flag == ENTITY_MATCH_FAIL) 4477 fail("Parameter entity CR->NEWLINE conversion failed"); 4478 else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND) 4479 fail("Parameter entity not parsed"); 4480 } 4481 #undef PARAM_ENTITY_NAME 4482 #undef PARAM_ENTITY_CORE_VALUE 4483 END_TEST 4484 4485 START_TEST(test_invalid_character_entity) { 4486 const char *text = "<!DOCTYPE doc [\n" 4487 " <!ENTITY entity '�'>\n" 4488 "]>\n" 4489 "<doc>&entity;</doc>"; 4490 4491 expect_failure(text, XML_ERROR_BAD_CHAR_REF, 4492 "Out of range character reference not faulted"); 4493 } 4494 END_TEST 4495 4496 START_TEST(test_invalid_character_entity_2) { 4497 const char *text = "<!DOCTYPE doc [\n" 4498 " <!ENTITY entity '&#xg0;'>\n" 4499 "]>\n" 4500 "<doc>&entity;</doc>"; 4501 4502 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4503 "Out of range character reference not faulted"); 4504 } 4505 END_TEST 4506 4507 START_TEST(test_invalid_character_entity_3) { 4508 const char text[] = 4509 /* <!DOCTYPE doc [\n */ 4510 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" 4511 /* U+0E04 = KHO KHWAI 4512 * U+0E08 = CHO CHAN */ 4513 /* <!ENTITY entity '&\u0e04\u0e08;'>\n */ 4514 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 " 4515 "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n" 4516 /* ]>\n */ 4517 "\0]\0>\0\n" 4518 /* <doc>&entity;</doc> */ 4519 "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>"; 4520 4521 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4522 != XML_STATUS_ERROR) 4523 fail("Invalid start of entity name not faulted"); 4524 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY) 4525 xml_failure(g_parser); 4526 } 4527 END_TEST 4528 4529 START_TEST(test_invalid_character_entity_4) { 4530 const char *text = "<!DOCTYPE doc [\n" 4531 " <!ENTITY entity '�'>\n" /* = � */ 4532 "]>\n" 4533 "<doc>&entity;</doc>"; 4534 4535 expect_failure(text, XML_ERROR_BAD_CHAR_REF, 4536 "Out of range character reference not faulted"); 4537 } 4538 END_TEST 4539 4540 /* Test that processing instructions are picked up by a default handler */ 4541 START_TEST(test_pi_handled_in_default) { 4542 const char *text = "<?test processing instruction?>\n<doc/>"; 4543 const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>"); 4544 CharData storage; 4545 4546 CharData_Init(&storage); 4547 XML_SetDefaultHandler(g_parser, accumulate_characters); 4548 XML_SetUserData(g_parser, &storage); 4549 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4550 == XML_STATUS_ERROR) 4551 xml_failure(g_parser); 4552 CharData_CheckXMLChars(&storage, expected); 4553 } 4554 END_TEST 4555 4556 /* Test that comments are picked up by a default handler */ 4557 START_TEST(test_comment_handled_in_default) { 4558 const char *text = "<!-- This is a comment -->\n<doc/>"; 4559 const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>"); 4560 CharData storage; 4561 4562 CharData_Init(&storage); 4563 XML_SetDefaultHandler(g_parser, accumulate_characters); 4564 XML_SetUserData(g_parser, &storage); 4565 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4566 == XML_STATUS_ERROR) 4567 xml_failure(g_parser); 4568 CharData_CheckXMLChars(&storage, expected); 4569 } 4570 END_TEST 4571 4572 /* Test PIs that look almost but not quite like XML declarations */ 4573 START_TEST(test_pi_yml) { 4574 const char *text = "<?yml something like data?><doc/>"; 4575 const XML_Char *expected = XCS("yml: something like data\n"); 4576 CharData storage; 4577 4578 CharData_Init(&storage); 4579 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4580 XML_SetUserData(g_parser, &storage); 4581 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4582 == XML_STATUS_ERROR) 4583 xml_failure(g_parser); 4584 CharData_CheckXMLChars(&storage, expected); 4585 } 4586 END_TEST 4587 4588 START_TEST(test_pi_xnl) { 4589 const char *text = "<?xnl nothing like data?><doc/>"; 4590 const XML_Char *expected = XCS("xnl: nothing like data\n"); 4591 CharData storage; 4592 4593 CharData_Init(&storage); 4594 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4595 XML_SetUserData(g_parser, &storage); 4596 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4597 == XML_STATUS_ERROR) 4598 xml_failure(g_parser); 4599 CharData_CheckXMLChars(&storage, expected); 4600 } 4601 END_TEST 4602 4603 START_TEST(test_pi_xmm) { 4604 const char *text = "<?xmm everything like data?><doc/>"; 4605 const XML_Char *expected = XCS("xmm: everything like data\n"); 4606 CharData storage; 4607 4608 CharData_Init(&storage); 4609 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4610 XML_SetUserData(g_parser, &storage); 4611 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4612 == XML_STATUS_ERROR) 4613 xml_failure(g_parser); 4614 CharData_CheckXMLChars(&storage, expected); 4615 } 4616 END_TEST 4617 4618 START_TEST(test_utf16_pi) { 4619 const char text[] = 4620 /* <?{KHO KHWAI}{CHO CHAN}?> 4621 * where {KHO KHWAI} = U+0E04 4622 * and {CHO CHAN} = U+0E08 4623 */ 4624 "<\0?\0\x04\x0e\x08\x0e?\0>\0" 4625 /* <q/> */ 4626 "<\0q\0/\0>\0"; 4627 #ifdef XML_UNICODE 4628 const XML_Char *expected = XCS("\x0e04\x0e08: \n"); 4629 #else 4630 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n"); 4631 #endif 4632 CharData storage; 4633 4634 CharData_Init(&storage); 4635 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4636 XML_SetUserData(g_parser, &storage); 4637 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4638 == XML_STATUS_ERROR) 4639 xml_failure(g_parser); 4640 CharData_CheckXMLChars(&storage, expected); 4641 } 4642 END_TEST 4643 4644 START_TEST(test_utf16_be_pi) { 4645 const char text[] = 4646 /* <?{KHO KHWAI}{CHO CHAN}?> 4647 * where {KHO KHWAI} = U+0E04 4648 * and {CHO CHAN} = U+0E08 4649 */ 4650 "\0<\0?\x0e\x04\x0e\x08\0?\0>" 4651 /* <q/> */ 4652 "\0<\0q\0/\0>"; 4653 #ifdef XML_UNICODE 4654 const XML_Char *expected = XCS("\x0e04\x0e08: \n"); 4655 #else 4656 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n"); 4657 #endif 4658 CharData storage; 4659 4660 CharData_Init(&storage); 4661 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4662 XML_SetUserData(g_parser, &storage); 4663 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4664 == XML_STATUS_ERROR) 4665 xml_failure(g_parser); 4666 CharData_CheckXMLChars(&storage, expected); 4667 } 4668 END_TEST 4669 4670 /* Test that comments can be picked up and translated */ 4671 START_TEST(test_utf16_be_comment) { 4672 const char text[] = 4673 /* <!-- Comment A --> */ 4674 "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n" 4675 /* <doc/> */ 4676 "\0<\0d\0o\0c\0/\0>"; 4677 const XML_Char *expected = XCS(" Comment A "); 4678 CharData storage; 4679 4680 CharData_Init(&storage); 4681 XML_SetCommentHandler(g_parser, accumulate_comment); 4682 XML_SetUserData(g_parser, &storage); 4683 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4684 == XML_STATUS_ERROR) 4685 xml_failure(g_parser); 4686 CharData_CheckXMLChars(&storage, expected); 4687 } 4688 END_TEST 4689 4690 START_TEST(test_utf16_le_comment) { 4691 const char text[] = 4692 /* <!-- Comment B --> */ 4693 "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0" 4694 /* <doc/> */ 4695 "<\0d\0o\0c\0/\0>\0"; 4696 const XML_Char *expected = XCS(" Comment B "); 4697 CharData storage; 4698 4699 CharData_Init(&storage); 4700 XML_SetCommentHandler(g_parser, accumulate_comment); 4701 XML_SetUserData(g_parser, &storage); 4702 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4703 == XML_STATUS_ERROR) 4704 xml_failure(g_parser); 4705 CharData_CheckXMLChars(&storage, expected); 4706 } 4707 END_TEST 4708 4709 /* Test that the unknown encoding handler with map entries that expect 4710 * conversion but no conversion function is faulted 4711 */ 4712 START_TEST(test_missing_encoding_conversion_fn) { 4713 const char *text = "<?xml version='1.0' encoding='no-conv'?>\n" 4714 "<doc>\x81</doc>"; 4715 4716 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4717 /* MiscEncodingHandler sets up an encoding with every top-bit-set 4718 * character introducing a two-byte sequence. For this, it 4719 * requires a convert function. The above function call doesn't 4720 * pass one through, so when BadEncodingHandler actually gets 4721 * called it should supply an invalid encoding. 4722 */ 4723 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4724 "Encoding with missing convert() not faulted"); 4725 } 4726 END_TEST 4727 4728 START_TEST(test_failing_encoding_conversion_fn) { 4729 const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n" 4730 "<doc>\x81</doc>"; 4731 4732 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4733 /* BadEncodingHandler sets up an encoding with every top-bit-set 4734 * character introducing a two-byte sequence. For this, it 4735 * requires a convert function. The above function call passes 4736 * one that insists all possible sequences are invalid anyway. 4737 */ 4738 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4739 "Encoding with failing convert() not faulted"); 4740 } 4741 END_TEST 4742 4743 /* Test unknown encoding conversions */ 4744 START_TEST(test_unknown_encoding_success) { 4745 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4746 /* Equivalent to <eoc>Hello, world</eoc> */ 4747 "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>"; 4748 4749 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4750 run_character_check(text, XCS("Hello, world")); 4751 } 4752 END_TEST 4753 4754 /* Test bad name character in unknown encoding */ 4755 START_TEST(test_unknown_encoding_bad_name) { 4756 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4757 "<\xff\x64oc>Hello, world</\xff\x64oc>"; 4758 4759 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4760 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4761 "Bad name start in unknown encoding not faulted"); 4762 } 4763 END_TEST 4764 4765 /* Test bad mid-name character in unknown encoding */ 4766 START_TEST(test_unknown_encoding_bad_name_2) { 4767 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4768 "<d\xffoc>Hello, world</d\xffoc>"; 4769 4770 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4771 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4772 "Bad name in unknown encoding not faulted"); 4773 } 4774 END_TEST 4775 4776 /* Test element name that is long enough to fill the conversion buffer 4777 * in an unknown encoding, finishing with an encoded character. 4778 */ 4779 START_TEST(test_unknown_encoding_long_name_1) { 4780 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4781 "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>" 4782 "Hi" 4783 "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"; 4784 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop"); 4785 CharData storage; 4786 4787 CharData_Init(&storage); 4788 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4789 XML_SetStartElementHandler(g_parser, record_element_start_handler); 4790 XML_SetUserData(g_parser, &storage); 4791 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4792 == XML_STATUS_ERROR) 4793 xml_failure(g_parser); 4794 CharData_CheckXMLChars(&storage, expected); 4795 } 4796 END_TEST 4797 4798 /* Test element name that is long enough to fill the conversion buffer 4799 * in an unknown encoding, finishing with an simple character. 4800 */ 4801 START_TEST(test_unknown_encoding_long_name_2) { 4802 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4803 "<abcdefghabcdefghabcdefghijklmnop>" 4804 "Hi" 4805 "</abcdefghabcdefghabcdefghijklmnop>"; 4806 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop"); 4807 CharData storage; 4808 4809 CharData_Init(&storage); 4810 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4811 XML_SetStartElementHandler(g_parser, record_element_start_handler); 4812 XML_SetUserData(g_parser, &storage); 4813 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4814 == XML_STATUS_ERROR) 4815 xml_failure(g_parser); 4816 CharData_CheckXMLChars(&storage, expected); 4817 } 4818 END_TEST 4819 4820 START_TEST(test_invalid_unknown_encoding) { 4821 const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n" 4822 "<doc>Hello world</doc>"; 4823 4824 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4825 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4826 "Invalid unknown encoding not faulted"); 4827 } 4828 END_TEST 4829 4830 START_TEST(test_unknown_ascii_encoding_ok) { 4831 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n" 4832 "<doc>Hello, world</doc>"; 4833 4834 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4835 run_character_check(text, XCS("Hello, world")); 4836 } 4837 END_TEST 4838 4839 START_TEST(test_unknown_ascii_encoding_fail) { 4840 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n" 4841 "<doc>Hello, \x80 world</doc>"; 4842 4843 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4844 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4845 "Invalid character not faulted"); 4846 } 4847 END_TEST 4848 4849 START_TEST(test_unknown_encoding_invalid_length) { 4850 const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n" 4851 "<doc>Hello, world</doc>"; 4852 4853 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4854 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4855 "Invalid unknown encoding not faulted"); 4856 } 4857 END_TEST 4858 4859 START_TEST(test_unknown_encoding_invalid_topbit) { 4860 const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n" 4861 "<doc>Hello, world</doc>"; 4862 4863 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4864 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4865 "Invalid unknown encoding not faulted"); 4866 } 4867 END_TEST 4868 4869 START_TEST(test_unknown_encoding_invalid_surrogate) { 4870 const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n" 4871 "<doc>Hello, \x82 world</doc>"; 4872 4873 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4874 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4875 "Invalid unknown encoding not faulted"); 4876 } 4877 END_TEST 4878 4879 START_TEST(test_unknown_encoding_invalid_high) { 4880 const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n" 4881 "<doc>Hello, world</doc>"; 4882 4883 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4884 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4885 "Invalid unknown encoding not faulted"); 4886 } 4887 END_TEST 4888 4889 START_TEST(test_unknown_encoding_invalid_attr_value) { 4890 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4891 "<doc attr='\xff\x30'/>"; 4892 4893 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4894 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4895 "Invalid attribute valid not faulted"); 4896 } 4897 END_TEST 4898 4899 START_TEST(test_unknown_encoding_user_data_primary) { 4900 // This test is based on ideas contributed by Artiphishell Inc. 4901 const char *const text = "<?xml version='1.0' encoding='x-unk'?>\n" 4902 "<root />\n"; 4903 XML_Parser parser = XML_ParserCreate(NULL); 4904 XML_SetUnknownEncodingHandler(parser, 4905 user_data_checking_unknown_encoding_handler, 4906 (void *)(intptr_t)0xC0FFEE); 4907 4908 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 4909 == XML_STATUS_OK); 4910 4911 XML_ParserFree(parser); 4912 } 4913 END_TEST 4914 4915 START_TEST(test_unknown_encoding_user_data_secondary) { 4916 // This test is based on ideas contributed by Artiphishell Inc. 4917 const char *const text_main = "<!DOCTYPE r [\n" 4918 " <!ENTITY ext SYSTEM 'ext.ent'>\n" 4919 "]>\n" 4920 "<r>&ext;</r>\n"; 4921 const char *const text_external = "<?xml version='1.0' encoding='x-unk'?>\n" 4922 "<e>data</e>"; 4923 ExtTest2 test_data = {text_external, (int)strlen(text_external), NULL, NULL}; 4924 XML_Parser parser = XML_ParserCreate(NULL); 4925 XML_SetExternalEntityRefHandler(parser, external_entity_loader2); 4926 XML_SetUnknownEncodingHandler(parser, 4927 user_data_checking_unknown_encoding_handler, 4928 (void *)(intptr_t)0xC0FFEE); 4929 XML_SetUserData(parser, &test_data); 4930 4931 assert_true(_XML_Parse_SINGLE_BYTES(parser, text_main, (int)strlen(text_main), 4932 XML_TRUE) 4933 == XML_STATUS_OK); 4934 4935 XML_ParserFree(parser); 4936 } 4937 END_TEST 4938 4939 /* Test an external entity parser set to use latin-1 detects UTF-16 4940 * BOMs correctly. 4941 */ 4942 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */ 4943 START_TEST(test_ext_entity_latin1_utf16le_bom) { 4944 const char *text = "<!DOCTYPE doc [\n" 4945 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4946 "]>\n" 4947 "<doc>&en;</doc>"; 4948 ExtTest2 test_data 4949 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4950 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4951 * 0x4c = L and 0x20 is a space 4952 */ 4953 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL}; 4954 #ifdef XML_UNICODE 4955 const XML_Char *expected = XCS("\x00ff\x00feL "); 4956 #else 4957 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4958 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL "); 4959 #endif 4960 CharData storage; 4961 4962 CharData_Init(&storage); 4963 test_data.storage = &storage; 4964 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4965 XML_SetUserData(g_parser, &test_data); 4966 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4967 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4968 == XML_STATUS_ERROR) 4969 xml_failure(g_parser); 4970 CharData_CheckXMLChars(&storage, expected); 4971 } 4972 END_TEST 4973 4974 START_TEST(test_ext_entity_latin1_utf16be_bom) { 4975 const char *text = "<!DOCTYPE doc [\n" 4976 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4977 "]>\n" 4978 "<doc>&en;</doc>"; 4979 ExtTest2 test_data 4980 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4981 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4982 * 0x4c = L and 0x20 is a space 4983 */ 4984 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL}; 4985 #ifdef XML_UNICODE 4986 const XML_Char *expected = XCS("\x00fe\x00ff L"); 4987 #else 4988 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4989 const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L"); 4990 #endif 4991 CharData storage; 4992 4993 CharData_Init(&storage); 4994 test_data.storage = &storage; 4995 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4996 XML_SetUserData(g_parser, &test_data); 4997 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4998 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4999 == XML_STATUS_ERROR) 5000 xml_failure(g_parser); 5001 CharData_CheckXMLChars(&storage, expected); 5002 } 5003 END_TEST 5004 5005 /* Parsing the full buffer rather than a byte at a time makes a 5006 * difference to the encoding scanning code, so repeat the above tests 5007 * without breaking them down by byte. 5008 */ 5009 START_TEST(test_ext_entity_latin1_utf16le_bom2) { 5010 const char *text = "<!DOCTYPE doc [\n" 5011 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 5012 "]>\n" 5013 "<doc>&en;</doc>"; 5014 ExtTest2 test_data 5015 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 5016 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 5017 * 0x4c = L and 0x20 is a space 5018 */ 5019 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL}; 5020 #ifdef XML_UNICODE 5021 const XML_Char *expected = XCS("\x00ff\x00feL "); 5022 #else 5023 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 5024 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL "); 5025 #endif 5026 CharData storage; 5027 5028 CharData_Init(&storage); 5029 test_data.storage = &storage; 5030 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5031 XML_SetUserData(g_parser, &test_data); 5032 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5033 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5034 == XML_STATUS_ERROR) 5035 xml_failure(g_parser); 5036 CharData_CheckXMLChars(&storage, expected); 5037 } 5038 END_TEST 5039 5040 START_TEST(test_ext_entity_latin1_utf16be_bom2) { 5041 const char *text = "<!DOCTYPE doc [\n" 5042 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 5043 "]>\n" 5044 "<doc>&en;</doc>"; 5045 ExtTest2 test_data 5046 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 5047 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 5048 * 0x4c = L and 0x20 is a space 5049 */ 5050 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL}; 5051 #ifdef XML_UNICODE 5052 const XML_Char *expected = XCS("\x00fe\x00ff L"); 5053 #else 5054 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 5055 const XML_Char *expected = "\xc3\xbe\xc3\xbf L"; 5056 #endif 5057 CharData storage; 5058 5059 CharData_Init(&storage); 5060 test_data.storage = &storage; 5061 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5062 XML_SetUserData(g_parser, &test_data); 5063 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5064 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5065 == XML_STATUS_ERROR) 5066 xml_failure(g_parser); 5067 CharData_CheckXMLChars(&storage, expected); 5068 } 5069 END_TEST 5070 5071 /* Test little-endian UTF-16 given an explicit big-endian encoding */ 5072 START_TEST(test_ext_entity_utf16_be) { 5073 const char *text = "<!DOCTYPE doc [\n" 5074 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 5075 "]>\n" 5076 "<doc>&en;</doc>"; 5077 ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL}; 5078 #ifdef XML_UNICODE 5079 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00"); 5080 #else 5081 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */ 5082 "\xe6\x94\x80" /* U+6500 */ 5083 "\xe2\xbc\x80" /* U+2F00 */ 5084 "\xe3\xb8\x80"); /* U+3E00 */ 5085 #endif 5086 CharData storage; 5087 5088 CharData_Init(&storage); 5089 test_data.storage = &storage; 5090 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5091 XML_SetUserData(g_parser, &test_data); 5092 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5093 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5094 == XML_STATUS_ERROR) 5095 xml_failure(g_parser); 5096 CharData_CheckXMLChars(&storage, expected); 5097 } 5098 END_TEST 5099 5100 /* Test big-endian UTF-16 given an explicit little-endian encoding */ 5101 START_TEST(test_ext_entity_utf16_le) { 5102 const char *text = "<!DOCTYPE doc [\n" 5103 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 5104 "]>\n" 5105 "<doc>&en;</doc>"; 5106 ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL}; 5107 #ifdef XML_UNICODE 5108 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00"); 5109 #else 5110 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */ 5111 "\xe6\x94\x80" /* U+6500 */ 5112 "\xe2\xbc\x80" /* U+2F00 */ 5113 "\xe3\xb8\x80"); /* U+3E00 */ 5114 #endif 5115 CharData storage; 5116 5117 CharData_Init(&storage); 5118 test_data.storage = &storage; 5119 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5120 XML_SetUserData(g_parser, &test_data); 5121 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5122 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5123 == XML_STATUS_ERROR) 5124 xml_failure(g_parser); 5125 CharData_CheckXMLChars(&storage, expected); 5126 } 5127 END_TEST 5128 5129 /* Test little-endian UTF-16 given no explicit encoding. 5130 * The existing default encoding (UTF-8) is assumed to hold without a 5131 * BOM to contradict it, so the entity value will in fact provoke an 5132 * error because 0x00 is not a valid XML character. We parse the 5133 * whole buffer in one go rather than feeding it in byte by byte to 5134 * exercise different code paths in the initial scanning routines. 5135 */ 5136 START_TEST(test_ext_entity_utf16_unknown) { 5137 const char *text = "<!DOCTYPE doc [\n" 5138 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 5139 "]>\n" 5140 "<doc>&en;</doc>"; 5141 ExtFaults2 test_data 5142 = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL, 5143 XML_ERROR_INVALID_TOKEN}; 5144 5145 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2); 5146 XML_SetUserData(g_parser, &test_data); 5147 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 5148 "Invalid character should not have been accepted"); 5149 } 5150 END_TEST 5151 5152 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */ 5153 START_TEST(test_ext_entity_utf8_non_bom) { 5154 const char *text = "<!DOCTYPE doc [\n" 5155 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 5156 "]>\n" 5157 "<doc>&en;</doc>"; 5158 ExtTest2 test_data 5159 = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */ 5160 3, NULL, NULL}; 5161 #ifdef XML_UNICODE 5162 const XML_Char *expected = XCS("\xfec0"); 5163 #else 5164 const XML_Char *expected = XCS("\xef\xbb\x80"); 5165 #endif 5166 CharData storage; 5167 5168 CharData_Init(&storage); 5169 test_data.storage = &storage; 5170 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5171 XML_SetUserData(g_parser, &test_data); 5172 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5173 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5174 == XML_STATUS_ERROR) 5175 xml_failure(g_parser); 5176 CharData_CheckXMLChars(&storage, expected); 5177 } 5178 END_TEST 5179 5180 /* Test that UTF-8 in a CDATA section is correctly passed through */ 5181 START_TEST(test_utf8_in_cdata_section) { 5182 const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>"; 5183 #ifdef XML_UNICODE 5184 const XML_Char *expected = XCS("one \x00e9 two"); 5185 #else 5186 const XML_Char *expected = XCS("one \xc3\xa9 two"); 5187 #endif 5188 5189 run_character_check(text, expected); 5190 } 5191 END_TEST 5192 5193 /* Test that little-endian UTF-16 in a CDATA section is handled */ 5194 START_TEST(test_utf8_in_cdata_section_2) { 5195 const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>"; 5196 #ifdef XML_UNICODE 5197 const XML_Char *expected = XCS("\x00e9]\x00e9two"); 5198 #else 5199 const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two"); 5200 #endif 5201 5202 run_character_check(text, expected); 5203 } 5204 END_TEST 5205 5206 START_TEST(test_utf8_in_start_tags) { 5207 struct test_case { 5208 bool goodName; 5209 bool goodNameStart; 5210 const char *tagName; 5211 }; 5212 5213 // The idea with the tests below is this: 5214 // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences 5215 // go to isNever and are hence not a concern. 5216 // 5217 // We start with a character that is a valid name character 5218 // (or even name-start character, see XML 1.0r4 spec) and then we flip 5219 // single bits at places where (1) the result leaves the UTF-8 encoding space 5220 // and (2) we stay in the same n-byte sequence family. 5221 // 5222 // The flipped bits are highlighted in angle brackets in comments, 5223 // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped 5224 // the most significant bit to 1 to leave UTF-8 encoding space. 5225 struct test_case cases[] = { 5226 // 1-byte UTF-8: [0xxx xxxx] 5227 {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':' 5228 {false, false, "\xBA"}, // [<1>011 1010] 5229 {true, false, "\x39"}, // [0011 1001] = ASCII nine '9' 5230 {false, false, "\xB9"}, // [<1>011 1001] 5231 5232 // 2-byte UTF-8: [110x xxxx] [10xx xxxx] 5233 {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] = 5234 // Arabic small waw U+06E5 5235 {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101] 5236 {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101] 5237 {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101] 5238 {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] = 5239 // combining char U+0301 5240 {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001] 5241 {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001] 5242 {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001] 5243 5244 // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx] 5245 {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] = 5246 // Devanagari Letter A U+0905 5247 {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101] 5248 {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101] 5249 {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101] 5250 {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101] 5251 {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101] 5252 {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] = 5253 // combining char U+0901 5254 {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001] 5255 {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001] 5256 {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001] 5257 {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001] 5258 {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001] 5259 }; 5260 const bool atNameStart[] = {true, false}; 5261 5262 size_t i = 0; 5263 char doc[1024]; 5264 size_t failCount = 0; 5265 5266 // we need all the bytes to be parsed, but we don't want the errors that can 5267 // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on. 5268 if (g_reparseDeferralEnabledDefault) { 5269 return; 5270 } 5271 5272 for (; i < sizeof(cases) / sizeof(cases[0]); i++) { 5273 size_t j = 0; 5274 for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { 5275 const bool expectedSuccess 5276 = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName; 5277 snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a", 5278 cases[i].tagName); 5279 XML_Parser parser = XML_ParserCreate(NULL); 5280 5281 const enum XML_Status status = _XML_Parse_SINGLE_BYTES( 5282 parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE); 5283 5284 bool success = true; 5285 if ((status == XML_STATUS_OK) != expectedSuccess) { 5286 success = false; 5287 } 5288 if ((status == XML_STATUS_ERROR) 5289 && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) { 5290 success = false; 5291 } 5292 5293 if (! success) { 5294 fprintf( 5295 stderr, 5296 "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n", 5297 (unsigned)i + 1u, atNameStart[j] ? " " : "not ", 5298 (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser)); 5299 failCount++; 5300 } 5301 5302 XML_ParserFree(parser); 5303 } 5304 } 5305 5306 if (failCount > 0) { 5307 fail("UTF-8 regression detected"); 5308 } 5309 } 5310 END_TEST 5311 5312 /* Test trailing spaces in elements are accepted */ 5313 START_TEST(test_trailing_spaces_in_elements) { 5314 const char *text = "<doc >Hi</doc >"; 5315 const XML_Char *expected = XCS("doc/doc"); 5316 CharData storage; 5317 5318 CharData_Init(&storage); 5319 XML_SetElementHandler(g_parser, record_element_start_handler, 5320 record_element_end_handler); 5321 XML_SetUserData(g_parser, &storage); 5322 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5323 == XML_STATUS_ERROR) 5324 xml_failure(g_parser); 5325 CharData_CheckXMLChars(&storage, expected); 5326 } 5327 END_TEST 5328 5329 START_TEST(test_utf16_attribute) { 5330 const char text[] = 5331 /* <d {KHO KHWAI}{CHO CHAN}='a'/> 5332 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 5333 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 5334 */ 5335 "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0"; 5336 const XML_Char *expected = XCS("a"); 5337 CharData storage; 5338 5339 CharData_Init(&storage); 5340 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5341 XML_SetUserData(g_parser, &storage); 5342 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5343 == XML_STATUS_ERROR) 5344 xml_failure(g_parser); 5345 CharData_CheckXMLChars(&storage, expected); 5346 } 5347 END_TEST 5348 5349 START_TEST(test_utf16_second_attr) { 5350 /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/> 5351 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 5352 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 5353 */ 5354 const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0" 5355 "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0"; 5356 const XML_Char *expected = XCS("1"); 5357 CharData storage; 5358 5359 CharData_Init(&storage); 5360 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5361 XML_SetUserData(g_parser, &storage); 5362 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5363 == XML_STATUS_ERROR) 5364 xml_failure(g_parser); 5365 CharData_CheckXMLChars(&storage, expected); 5366 } 5367 END_TEST 5368 5369 START_TEST(test_attr_after_solidus) { 5370 const char *text = "<doc attr1='a' / attr2='b'>"; 5371 5372 expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted"); 5373 } 5374 END_TEST 5375 5376 START_TEST(test_utf16_pe) { 5377 /* <!DOCTYPE doc [ 5378 * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'> 5379 * %{KHO KHWAI}{CHO CHAN}; 5380 * ]> 5381 * <doc></doc> 5382 * 5383 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 5384 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 5385 */ 5386 const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" 5387 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 " 5388 "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 " 5389 "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n" 5390 "\0%\x0e\x04\x0e\x08\0;\0\n" 5391 "\0]\0>\0\n" 5392 "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>"; 5393 #ifdef XML_UNICODE 5394 const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n"); 5395 #else 5396 const XML_Char *expected 5397 = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n"); 5398 #endif 5399 CharData storage; 5400 5401 CharData_Init(&storage); 5402 XML_SetUserData(g_parser, &storage); 5403 XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl); 5404 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5405 == XML_STATUS_ERROR) 5406 xml_failure(g_parser); 5407 CharData_CheckXMLChars(&storage, expected); 5408 } 5409 END_TEST 5410 5411 /* Test that duff attribute description keywords are rejected */ 5412 START_TEST(test_bad_attr_desc_keyword) { 5413 const char *text = "<!DOCTYPE doc [\n" 5414 " <!ATTLIST doc attr CDATA #!IMPLIED>\n" 5415 "]>\n" 5416 "<doc />"; 5417 5418 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5419 "Bad keyword !IMPLIED not faulted"); 5420 } 5421 END_TEST 5422 5423 /* Test that an invalid attribute description keyword consisting of 5424 * UTF-16 characters with their top bytes non-zero are correctly 5425 * faulted 5426 */ 5427 START_TEST(test_bad_attr_desc_keyword_utf16) { 5428 /* <!DOCTYPE d [ 5429 * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}> 5430 * ]><d/> 5431 * 5432 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 5433 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 5434 */ 5435 const char text[] 5436 = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" 5437 "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 " 5438 "\0#\x0e\x04\x0e\x08\0>\0\n" 5439 "\0]\0>\0<\0d\0/\0>"; 5440 5441 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5442 != XML_STATUS_ERROR) 5443 fail("Invalid UTF16 attribute keyword not faulted"); 5444 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX) 5445 xml_failure(g_parser); 5446 } 5447 END_TEST 5448 5449 /* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this 5450 * using prefix-encoding (see above) to trigger specific code paths 5451 */ 5452 START_TEST(test_bad_doctype) { 5453 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 5454 "<!DOCTYPE doc [ \x80\x44 ]><doc/>"; 5455 5456 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 5457 expect_failure(text, XML_ERROR_SYNTAX, 5458 "Invalid bytes in DOCTYPE not faulted"); 5459 } 5460 END_TEST 5461 5462 START_TEST(test_bad_doctype_utf8) { 5463 const char *text = "<!DOCTYPE \xDB\x25" 5464 "doc><doc/>"; // [1101 1011] [<0>010 0101] 5465 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5466 "Invalid UTF-8 in DOCTYPE not faulted"); 5467 } 5468 END_TEST 5469 5470 START_TEST(test_bad_doctype_utf16) { 5471 const char text[] = 5472 /* <!DOCTYPE doc [ \x06f2 ]><doc/> 5473 * 5474 * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number 5475 * (name character) but not a valid letter (name start character) 5476 */ 5477 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 " 5478 "\x06\xf2" 5479 "\0 \0]\0>\0<\0d\0o\0c\0/\0>"; 5480 5481 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5482 != XML_STATUS_ERROR) 5483 fail("Invalid bytes in DOCTYPE not faulted"); 5484 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX) 5485 xml_failure(g_parser); 5486 } 5487 END_TEST 5488 5489 START_TEST(test_bad_doctype_plus) { 5490 const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n" 5491 "<1+>&foo;</1+>"; 5492 5493 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5494 "'+' in document name not faulted"); 5495 } 5496 END_TEST 5497 5498 START_TEST(test_bad_doctype_star) { 5499 const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n" 5500 "<1*>&foo;</1*>"; 5501 5502 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5503 "'*' in document name not faulted"); 5504 } 5505 END_TEST 5506 5507 START_TEST(test_bad_doctype_query) { 5508 const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n" 5509 "<1?>&foo;</1?>"; 5510 5511 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5512 "'?' in document name not faulted"); 5513 } 5514 END_TEST 5515 5516 START_TEST(test_unknown_encoding_bad_ignore) { 5517 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>" 5518 "<!DOCTYPE doc SYSTEM 'foo'>" 5519 "<doc><e>&entity;</e></doc>"; 5520 ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>", 5521 "Invalid character not faulted", XCS("prefix-conv"), 5522 XML_ERROR_INVALID_TOKEN}; 5523 5524 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 5525 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5526 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 5527 XML_SetUserData(g_parser, &fault); 5528 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 5529 "Bad IGNORE section with unknown encoding not failed"); 5530 } 5531 END_TEST 5532 5533 START_TEST(test_entity_in_utf16_be_attr) { 5534 const char text[] = 5535 /* <e a='ä ä'></e> */ 5536 "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 " 5537 "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>"; 5538 #ifdef XML_UNICODE 5539 const XML_Char *expected = XCS("\x00e4 \x00e4"); 5540 #else 5541 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4"); 5542 #endif 5543 CharData storage; 5544 5545 CharData_Init(&storage); 5546 XML_SetUserData(g_parser, &storage); 5547 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5548 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5549 == XML_STATUS_ERROR) 5550 xml_failure(g_parser); 5551 CharData_CheckXMLChars(&storage, expected); 5552 } 5553 END_TEST 5554 5555 START_TEST(test_entity_in_utf16_le_attr) { 5556 const char text[] = 5557 /* <e a='ä ä'></e> */ 5558 "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0" 5559 "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0"; 5560 #ifdef XML_UNICODE 5561 const XML_Char *expected = XCS("\x00e4 \x00e4"); 5562 #else 5563 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4"); 5564 #endif 5565 CharData storage; 5566 5567 CharData_Init(&storage); 5568 XML_SetUserData(g_parser, &storage); 5569 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5570 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5571 == XML_STATUS_ERROR) 5572 xml_failure(g_parser); 5573 CharData_CheckXMLChars(&storage, expected); 5574 } 5575 END_TEST 5576 5577 START_TEST(test_entity_public_utf16_be) { 5578 const char text[] = 5579 /* <!DOCTYPE d [ */ 5580 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" 5581 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */ 5582 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 " 5583 "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n" 5584 /* %e; */ 5585 "\0%\0e\0;\0\n" 5586 /* ]> */ 5587 "\0]\0>\0\n" 5588 /* <d>&j;</d> */ 5589 "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>"; 5590 ExtTest2 test_data 5591 = {/* <!ENTITY j 'baz'> */ 5592 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL}; 5593 const XML_Char *expected = XCS("baz"); 5594 CharData storage; 5595 5596 CharData_Init(&storage); 5597 test_data.storage = &storage; 5598 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5599 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5600 XML_SetUserData(g_parser, &test_data); 5601 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5602 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5603 == XML_STATUS_ERROR) 5604 xml_failure(g_parser); 5605 CharData_CheckXMLChars(&storage, expected); 5606 } 5607 END_TEST 5608 5609 START_TEST(test_entity_public_utf16_le) { 5610 const char text[] = 5611 /* <!DOCTYPE d [ */ 5612 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0" 5613 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */ 5614 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0" 5615 "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0" 5616 /* %e; */ 5617 "%\0e\0;\0\n\0" 5618 /* ]> */ 5619 "]\0>\0\n\0" 5620 /* <d>&j;</d> */ 5621 "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0"; 5622 ExtTest2 test_data 5623 = {/* <!ENTITY j 'baz'> */ 5624 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL}; 5625 const XML_Char *expected = XCS("baz"); 5626 CharData storage; 5627 5628 CharData_Init(&storage); 5629 test_data.storage = &storage; 5630 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5631 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5632 XML_SetUserData(g_parser, &test_data); 5633 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5634 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5635 == XML_STATUS_ERROR) 5636 xml_failure(g_parser); 5637 CharData_CheckXMLChars(&storage, expected); 5638 } 5639 END_TEST 5640 5641 /* Test that a doctype with neither an internal nor external subset is 5642 * faulted 5643 */ 5644 START_TEST(test_short_doctype) { 5645 const char *text = "<!DOCTYPE doc></doc>"; 5646 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5647 "DOCTYPE without subset not rejected"); 5648 } 5649 END_TEST 5650 5651 START_TEST(test_short_doctype_2) { 5652 const char *text = "<!DOCTYPE doc PUBLIC></doc>"; 5653 expect_failure(text, XML_ERROR_SYNTAX, 5654 "DOCTYPE without Public ID not rejected"); 5655 } 5656 END_TEST 5657 5658 START_TEST(test_short_doctype_3) { 5659 const char *text = "<!DOCTYPE doc SYSTEM></doc>"; 5660 expect_failure(text, XML_ERROR_SYNTAX, 5661 "DOCTYPE without System ID not rejected"); 5662 } 5663 END_TEST 5664 5665 START_TEST(test_long_doctype) { 5666 const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>"; 5667 expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected"); 5668 } 5669 END_TEST 5670 5671 START_TEST(test_bad_entity) { 5672 const char *text = "<!DOCTYPE doc [\n" 5673 " <!ENTITY foo PUBLIC>\n" 5674 "]>\n" 5675 "<doc/>"; 5676 expect_failure(text, XML_ERROR_SYNTAX, 5677 "ENTITY without Public ID is not rejected"); 5678 } 5679 END_TEST 5680 5681 /* Test unquoted value is faulted */ 5682 START_TEST(test_bad_entity_2) { 5683 const char *text = "<!DOCTYPE doc [\n" 5684 " <!ENTITY % foo bar>\n" 5685 "]>\n" 5686 "<doc/>"; 5687 expect_failure(text, XML_ERROR_SYNTAX, 5688 "ENTITY without Public ID is not rejected"); 5689 } 5690 END_TEST 5691 5692 START_TEST(test_bad_entity_3) { 5693 const char *text = "<!DOCTYPE doc [\n" 5694 " <!ENTITY % foo PUBLIC>\n" 5695 "]>\n" 5696 "<doc/>"; 5697 expect_failure(text, XML_ERROR_SYNTAX, 5698 "Parameter ENTITY without Public ID is not rejected"); 5699 } 5700 END_TEST 5701 5702 START_TEST(test_bad_entity_4) { 5703 const char *text = "<!DOCTYPE doc [\n" 5704 " <!ENTITY % foo SYSTEM>\n" 5705 "]>\n" 5706 "<doc/>"; 5707 expect_failure(text, XML_ERROR_SYNTAX, 5708 "Parameter ENTITY without Public ID is not rejected"); 5709 } 5710 END_TEST 5711 5712 START_TEST(test_bad_notation) { 5713 const char *text = "<!DOCTYPE doc [\n" 5714 " <!NOTATION n SYSTEM>\n" 5715 "]>\n" 5716 "<doc/>"; 5717 expect_failure(text, XML_ERROR_SYNTAX, 5718 "Notation without System ID is not rejected"); 5719 } 5720 END_TEST 5721 5722 /* Test for issue #11, wrongly suppressed default handler */ 5723 START_TEST(test_default_doctype_handler) { 5724 const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n" 5725 " <!ENTITY foo 'bar'>\n" 5726 "]>\n" 5727 "<doc>&foo;</doc>"; 5728 DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE}, 5729 {XCS("'test.dtd'"), 10, XML_FALSE}, 5730 {NULL, 0, XML_FALSE}}; 5731 int i; 5732 5733 XML_SetUserData(g_parser, &test_data); 5734 XML_SetDefaultHandler(g_parser, checking_default_handler); 5735 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 5736 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5737 == XML_STATUS_ERROR) 5738 xml_failure(g_parser); 5739 for (i = 0; test_data[i].expected != NULL; i++) 5740 if (! test_data[i].seen) 5741 fail("Default handler not run for public !DOCTYPE"); 5742 } 5743 END_TEST 5744 5745 START_TEST(test_empty_element_abort) { 5746 const char *text = "<abort/>"; 5747 5748 XML_SetStartElementHandler(g_parser, start_element_suspender); 5749 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5750 != XML_STATUS_ERROR) 5751 fail("Expected to error on abort"); 5752 } 5753 END_TEST 5754 5755 /* Regression test for GH issue #612: unfinished m_declAttributeType 5756 * allocation in ->m_tempPool can corrupt following allocation. 5757 */ 5758 START_TEST(test_pool_integrity_with_unfinished_attr) { 5759 const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n" 5760 "<!DOCTYPE foo [\n" 5761 "<!ELEMENT foo ANY>\n" 5762 "<!ENTITY % entp SYSTEM \"external.dtd\">\n" 5763 "%entp;\n" 5764 "]>\n" 5765 "<a></a>\n"; 5766 const XML_Char *expected = XCS("COMMENT"); 5767 CharData storage; 5768 5769 CharData_Init(&storage); 5770 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5771 XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist); 5772 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 5773 XML_SetCommentHandler(g_parser, accumulate_comment); 5774 XML_SetUserData(g_parser, &storage); 5775 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5776 == XML_STATUS_ERROR) 5777 xml_failure(g_parser); 5778 CharData_CheckXMLChars(&storage, expected); 5779 } 5780 END_TEST 5781 5782 /* Test a possible early return location in internalEntityProcessor */ 5783 START_TEST(test_entity_ref_no_elements) { 5784 const char *const text = "<!DOCTYPE foo [\n" 5785 "<!ENTITY e1 \"test\">\n" 5786 "]> <foo>&e1;"; // intentionally missing newline 5787 5788 XML_Parser parser = XML_ParserCreate(NULL); 5789 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 5790 == XML_STATUS_ERROR); 5791 assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS); 5792 XML_ParserFree(parser); 5793 } 5794 END_TEST 5795 5796 /* Tests if chained entity references lead to unbounded recursion */ 5797 START_TEST(test_deep_nested_entity) { 5798 const size_t N_LINES = 60000; 5799 const size_t SIZE_PER_LINE = 50; 5800 5801 char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE); 5802 if (text == NULL) { 5803 fail("malloc failed"); 5804 } 5805 5806 char *textPtr = text; 5807 5808 // Create the XML 5809 textPtr += snprintf(textPtr, SIZE_PER_LINE, 5810 "<!DOCTYPE foo [\n" 5811 " <!ENTITY s0 'deepText'>\n"); 5812 5813 for (size_t i = 1; i < N_LINES; ++i) { 5814 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n", 5815 (long unsigned)i, (long unsigned)(i - 1)); 5816 } 5817 5818 snprintf(textPtr, SIZE_PER_LINE, "]> <foo>&s%lu;</foo>\n", 5819 (long unsigned)(N_LINES - 1)); 5820 5821 const XML_Char *const expected = XCS("deepText"); 5822 5823 CharData storage; 5824 CharData_Init(&storage); 5825 5826 XML_Parser parser = XML_ParserCreate(NULL); 5827 5828 XML_SetCharacterDataHandler(parser, accumulate_characters); 5829 XML_SetUserData(parser, &storage); 5830 5831 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 5832 == XML_STATUS_ERROR) 5833 xml_failure(parser); 5834 5835 CharData_CheckXMLChars(&storage, expected); 5836 XML_ParserFree(parser); 5837 free(text); 5838 } 5839 END_TEST 5840 5841 /* Tests if chained entity references in attributes 5842 lead to unbounded recursion */ 5843 START_TEST(test_deep_nested_attribute_entity) { 5844 const size_t N_LINES = 60000; 5845 const size_t SIZE_PER_LINE = 100; 5846 5847 char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE); 5848 if (text == NULL) { 5849 fail("malloc failed"); 5850 } 5851 5852 char *textPtr = text; 5853 5854 // Create the XML 5855 textPtr += snprintf(textPtr, SIZE_PER_LINE, 5856 "<!DOCTYPE foo [\n" 5857 " <!ENTITY s0 'deepText'>\n"); 5858 5859 for (size_t i = 1; i < N_LINES; ++i) { 5860 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n", 5861 (long unsigned)i, (long unsigned)(i - 1)); 5862 } 5863 5864 snprintf(textPtr, SIZE_PER_LINE, "]> <foo name='&s%lu;'>mainText</foo>\n", 5865 (long unsigned)(N_LINES - 1)); 5866 5867 AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}}; 5868 ElementInfo info[] 5869 = {{XCS("foo"), 1, 0, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}}; 5870 5871 XML_Parser parser = XML_ParserCreate(NULL); 5872 ParserAndElementInfo parserPlusElemenInfo = {parser, info}; 5873 5874 XML_SetStartElementHandler(parser, counting_start_element_handler); 5875 XML_SetUserData(parser, &parserPlusElemenInfo); 5876 5877 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 5878 == XML_STATUS_ERROR) 5879 xml_failure(parser); 5880 5881 XML_ParserFree(parser); 5882 free(text); 5883 } 5884 END_TEST 5885 5886 START_TEST(test_deep_nested_entity_delayed_interpretation) { 5887 const size_t N_LINES = 70000; 5888 const size_t SIZE_PER_LINE = 100; 5889 5890 char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE); 5891 if (text == NULL) { 5892 fail("malloc failed"); 5893 } 5894 5895 char *textPtr = text; 5896 5897 // Create the XML 5898 textPtr += snprintf(textPtr, SIZE_PER_LINE, 5899 "<!DOCTYPE foo [\n" 5900 " <!ENTITY %% s0 'deepText'>\n"); 5901 5902 for (size_t i = 1; i < N_LINES; ++i) { 5903 textPtr += snprintf(textPtr, SIZE_PER_LINE, 5904 " <!ENTITY %% s%lu '%s%lu;'>\n", (long unsigned)i, 5905 (long unsigned)(i - 1)); 5906 } 5907 5908 snprintf(textPtr, SIZE_PER_LINE, 5909 " <!ENTITY %% define_g \"<!ENTITY g '%s%lu;'>\">\n" 5910 " %%define_g;\n" 5911 "]>\n" 5912 "<foo/>\n", 5913 (long unsigned)(N_LINES - 1)); 5914 5915 XML_Parser parser = XML_ParserCreate(NULL); 5916 5917 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5918 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 5919 == XML_STATUS_ERROR) 5920 xml_failure(parser); 5921 5922 XML_ParserFree(parser); 5923 free(text); 5924 } 5925 END_TEST 5926 5927 START_TEST(test_nested_entity_suspend) { 5928 const char *const text = "<!DOCTYPE a [\n" 5929 " <!ENTITY e1 '<!--e1-->'>\n" 5930 " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n" 5931 " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n" 5932 "]>\n" 5933 "<a><!--start-->&e3;<!--end--></a>"; 5934 const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head") 5935 XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end"); 5936 CharData storage; 5937 CharData_Init(&storage); 5938 XML_Parser parser = XML_ParserCreate(NULL); 5939 ParserPlusStorage parserPlusStorage = {parser, &storage}; 5940 5941 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5942 XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler); 5943 XML_SetUserData(parser, &parserPlusStorage); 5944 5945 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE); 5946 while (status == XML_STATUS_SUSPENDED) { 5947 status = XML_ResumeParser(parser); 5948 } 5949 if (status != XML_STATUS_OK) 5950 xml_failure(parser); 5951 5952 CharData_CheckXMLChars(&storage, expected); 5953 XML_ParserFree(parser); 5954 } 5955 END_TEST 5956 5957 START_TEST(test_nested_entity_suspend_2) { 5958 const char *const text = "<!DOCTYPE doc [\n" 5959 " <!ENTITY ge1 'head1Ztail1'>\n" 5960 " <!ENTITY ge2 'head2&ge1;tail2'>\n" 5961 " <!ENTITY ge3 'head3&ge2;tail3'>\n" 5962 "]>\n" 5963 "<doc>&ge3;</doc>"; 5964 const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1") 5965 XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3"); 5966 CharData storage; 5967 CharData_Init(&storage); 5968 XML_Parser parser = XML_ParserCreate(NULL); 5969 ParserPlusStorage parserPlusStorage = {parser, &storage}; 5970 5971 XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend); 5972 XML_SetUserData(parser, &parserPlusStorage); 5973 5974 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE); 5975 while (status == XML_STATUS_SUSPENDED) { 5976 status = XML_ResumeParser(parser); 5977 } 5978 if (status != XML_STATUS_OK) 5979 xml_failure(parser); 5980 5981 CharData_CheckXMLChars(&storage, expected); 5982 XML_ParserFree(parser); 5983 } 5984 END_TEST 5985 5986 /* Regression test for quadratic parsing on large tokens */ 5987 START_TEST(test_big_tokens_scale_linearly) { 5988 const struct { 5989 const char *pre; 5990 const char *post; 5991 } text[] = { 5992 {"<a>", "</a>"}, // assumed good, used as baseline 5993 {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch 5994 {"<c attr='", "'></c>"}, // big attribute, used to be O(N²) 5995 {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²) 5996 {"<e><", "/></e>"}, // big elem name, used to be O(N²) 5997 }; 5998 const int num_cases = sizeof(text) / sizeof(text[0]); 5999 char aaaaaa[4096]; 6000 const int fillsize = (int)sizeof(aaaaaa); 6001 const int fillcount = 100; 6002 const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post. 6003 const unsigned max_factor = 4; 6004 const unsigned max_scanned = max_factor * approx_bytes; 6005 6006 memset(aaaaaa, 'a', fillsize); 6007 6008 if (! g_reparseDeferralEnabledDefault) { 6009 return; // heuristic is disabled; we would get O(n^2) and fail. 6010 } 6011 6012 for (int i = 0; i < num_cases; ++i) { 6013 XML_Parser parser = XML_ParserCreate(NULL); 6014 assert_true(parser != NULL); 6015 enum XML_Status status; 6016 set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post); 6017 6018 // parse the start text 6019 g_bytesScanned = 0; 6020 status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre, 6021 (int)strlen(text[i].pre), XML_FALSE); 6022 if (status != XML_STATUS_OK) { 6023 xml_failure(parser); 6024 } 6025 6026 // parse lots of 'a', failing the test early if it takes too long 6027 unsigned past_max_count = 0; 6028 for (int f = 0; f < fillcount; ++f) { 6029 status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE); 6030 if (status != XML_STATUS_OK) { 6031 xml_failure(parser); 6032 } 6033 if (g_bytesScanned > max_scanned) { 6034 // We're not done, and have already passed the limit -- the test will 6035 // definitely fail. This block allows us to save time by failing early. 6036 const unsigned pushed 6037 = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize; 6038 fprintf( 6039 stderr, 6040 "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", 6041 f + 1, fillcount, pushed, g_bytesScanned, 6042 g_bytesScanned / (double)pushed, max_scanned, max_factor); 6043 past_max_count++; 6044 // We are failing, but allow a few log prints first. If we don't reach 6045 // a count of five, the test will fail after the loop instead. 6046 assert_true(past_max_count < 5); 6047 } 6048 } 6049 6050 // parse the end text 6051 status = _XML_Parse_SINGLE_BYTES(parser, text[i].post, 6052 (int)strlen(text[i].post), XML_TRUE); 6053 if (status != XML_STATUS_OK) { 6054 xml_failure(parser); 6055 } 6056 6057 assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working 6058 if (g_bytesScanned > max_scanned) { 6059 fprintf( 6060 stderr, 6061 "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", 6062 g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned, 6063 max_factor); 6064 fail("scanned too many bytes"); 6065 } 6066 6067 XML_ParserFree(parser); 6068 } 6069 } 6070 END_TEST 6071 6072 START_TEST(test_set_reparse_deferral) { 6073 const char *const pre = "<d>"; 6074 const char *const start = "<x attr='"; 6075 const char *const end = "'></x>"; 6076 char eeeeee[100]; 6077 const int fillsize = (int)sizeof(eeeeee); 6078 memset(eeeeee, 'e', fillsize); 6079 6080 for (int enabled = 0; enabled <= 1; enabled += 1) { 6081 set_subtest("deferral=%d", enabled); 6082 6083 XML_Parser parser = XML_ParserCreate(NULL); 6084 assert_true(parser != NULL); 6085 assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); 6086 // pre-grow the buffer to avoid reparsing due to almost-fullness 6087 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); 6088 6089 CharData storage; 6090 CharData_Init(&storage); 6091 XML_SetUserData(parser, &storage); 6092 XML_SetStartElementHandler(parser, start_element_event_handler); 6093 6094 enum XML_Status status; 6095 // parse the start text 6096 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 6097 if (status != XML_STATUS_OK) { 6098 xml_failure(parser); 6099 } 6100 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done 6101 6102 // ..and the start of the token 6103 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); 6104 if (status != XML_STATUS_OK) { 6105 xml_failure(parser); 6106 } 6107 CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one 6108 6109 // try to parse lots of 'e', but the token isn't finished 6110 for (int c = 0; c < 100; ++c) { 6111 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 6112 if (status != XML_STATUS_OK) { 6113 xml_failure(parser); 6114 } 6115 } 6116 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one 6117 6118 // end the <x> token. 6119 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 6120 if (status != XML_STATUS_OK) { 6121 xml_failure(parser); 6122 } 6123 6124 if (enabled) { 6125 // In general, we may need to push more data to trigger a reparse attempt, 6126 // but in this test, the data is constructed to always require it. 6127 CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect 6128 // 2x the token length should suffice; the +1 covers the start and end. 6129 for (int c = 0; c < 101; ++c) { 6130 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 6131 if (status != XML_STATUS_OK) { 6132 xml_failure(parser); 6133 } 6134 } 6135 } 6136 CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done 6137 6138 XML_ParserFree(parser); 6139 } 6140 } 6141 END_TEST 6142 6143 struct element_decl_data { 6144 XML_Parser parser; 6145 int count; 6146 }; 6147 6148 static void 6149 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) { 6150 UNUSED_P(name); 6151 struct element_decl_data *testdata = (struct element_decl_data *)userData; 6152 testdata->count += 1; 6153 XML_FreeContentModel(testdata->parser, model); 6154 } 6155 6156 static int 6157 external_inherited_parser(XML_Parser p, const XML_Char *context, 6158 const XML_Char *base, const XML_Char *systemId, 6159 const XML_Char *publicId) { 6160 UNUSED_P(base); 6161 UNUSED_P(systemId); 6162 UNUSED_P(publicId); 6163 const char *const pre = "<!ELEMENT document ANY>\n"; 6164 const char *const start = "<!ELEMENT "; 6165 const char *const end = " ANY>\n"; 6166 const char *const post = "<!ELEMENT xyz ANY>\n"; 6167 const int enabled = *(int *)XML_GetUserData(p); 6168 char eeeeee[100]; 6169 char spaces[100]; 6170 const int fillsize = (int)sizeof(eeeeee); 6171 assert_true(fillsize == (int)sizeof(spaces)); 6172 memset(eeeeee, 'e', fillsize); 6173 memset(spaces, ' ', fillsize); 6174 6175 XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL); 6176 assert_true(parser != NULL); 6177 // pre-grow the buffer to avoid reparsing due to almost-fullness 6178 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); 6179 6180 struct element_decl_data testdata; 6181 testdata.parser = parser; 6182 testdata.count = 0; 6183 XML_SetUserData(parser, &testdata); 6184 XML_SetElementDeclHandler(parser, element_decl_counter); 6185 6186 enum XML_Status status; 6187 // parse the initial text 6188 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 6189 if (status != XML_STATUS_OK) { 6190 xml_failure(parser); 6191 } 6192 assert_true(testdata.count == 1); // first element should be done 6193 6194 // ..and the start of the big token 6195 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); 6196 if (status != XML_STATUS_OK) { 6197 xml_failure(parser); 6198 } 6199 assert_true(testdata.count == 1); // still just the first one 6200 6201 // try to parse lots of 'e', but the token isn't finished 6202 for (int c = 0; c < 100; ++c) { 6203 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 6204 if (status != XML_STATUS_OK) { 6205 xml_failure(parser); 6206 } 6207 } 6208 assert_true(testdata.count == 1); // *still* just the first one 6209 6210 // end the big token. 6211 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 6212 if (status != XML_STATUS_OK) { 6213 xml_failure(parser); 6214 } 6215 6216 if (enabled) { 6217 // In general, we may need to push more data to trigger a reparse attempt, 6218 // but in this test, the data is constructed to always require it. 6219 assert_true(testdata.count == 1); // or the test is incorrect 6220 // 2x the token length should suffice; the +1 covers the start and end. 6221 for (int c = 0; c < 101; ++c) { 6222 status = XML_Parse(parser, spaces, fillsize, XML_FALSE); 6223 if (status != XML_STATUS_OK) { 6224 xml_failure(parser); 6225 } 6226 } 6227 } 6228 assert_true(testdata.count == 2); // the big token should be done 6229 6230 // parse the final text 6231 status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE); 6232 if (status != XML_STATUS_OK) { 6233 xml_failure(parser); 6234 } 6235 assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done 6236 6237 XML_ParserFree(parser); 6238 return XML_STATUS_OK; 6239 } 6240 6241 START_TEST(test_reparse_deferral_is_inherited) { 6242 const char *const text 6243 = "<!DOCTYPE document SYSTEM 'something.ext'><document/>"; 6244 for (int enabled = 0; enabled <= 1; ++enabled) { 6245 set_subtest("deferral=%d", enabled); 6246 6247 XML_Parser parser = XML_ParserCreate(NULL); 6248 assert_true(parser != NULL); 6249 XML_SetUserData(parser, (void *)&enabled); 6250 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 6251 // this handler creates a sub-parser and checks that its deferral behavior 6252 // is what we expected, based on the value of `enabled` (in userdata). 6253 XML_SetExternalEntityRefHandler(parser, external_inherited_parser); 6254 assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); 6255 if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) 6256 xml_failure(parser); 6257 6258 XML_ParserFree(parser); 6259 } 6260 } 6261 END_TEST 6262 6263 START_TEST(test_set_reparse_deferral_on_null_parser) { 6264 assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE); 6265 assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE); 6266 assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE); 6267 assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE); 6268 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN) 6269 == XML_FALSE); 6270 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX) 6271 == XML_FALSE); 6272 } 6273 END_TEST 6274 6275 START_TEST(test_set_reparse_deferral_on_the_fly) { 6276 const char *const pre = "<d><x attr='"; 6277 const char *const end = "'></x>"; 6278 char iiiiii[100]; 6279 const int fillsize = (int)sizeof(iiiiii); 6280 memset(iiiiii, 'i', fillsize); 6281 6282 XML_Parser parser = XML_ParserCreate(NULL); 6283 assert_true(parser != NULL); 6284 assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE)); 6285 6286 CharData storage; 6287 CharData_Init(&storage); 6288 XML_SetUserData(parser, &storage); 6289 XML_SetStartElementHandler(parser, start_element_event_handler); 6290 6291 enum XML_Status status; 6292 // parse the start text 6293 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 6294 if (status != XML_STATUS_OK) { 6295 xml_failure(parser); 6296 } 6297 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done 6298 6299 // try to parse some 'i', but the token isn't finished 6300 status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE); 6301 if (status != XML_STATUS_OK) { 6302 xml_failure(parser); 6303 } 6304 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one 6305 6306 // end the <x> token. 6307 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 6308 if (status != XML_STATUS_OK) { 6309 xml_failure(parser); 6310 } 6311 CharData_CheckXMLChars(&storage, XCS("d")); // not yet. 6312 6313 // now change the heuristic setting and add *no* data 6314 assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE)); 6315 // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic. 6316 status = XML_Parse(parser, "", 0, XML_FALSE); 6317 if (status != XML_STATUS_OK) { 6318 xml_failure(parser); 6319 } 6320 CharData_CheckXMLChars(&storage, XCS("dx")); 6321 6322 XML_ParserFree(parser); 6323 } 6324 END_TEST 6325 6326 START_TEST(test_set_bad_reparse_option) { 6327 XML_Parser parser = XML_ParserCreate(NULL); 6328 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2)); 6329 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3)); 6330 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99)); 6331 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127)); 6332 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128)); 6333 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129)); 6334 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255)); 6335 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0)); 6336 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1)); 6337 XML_ParserFree(parser); 6338 } 6339 END_TEST 6340 6341 static size_t g_totalAlloc = 0; 6342 static size_t g_biggestAlloc = 0; 6343 6344 static void * 6345 counting_realloc(void *ptr, size_t size) { 6346 g_totalAlloc += size; 6347 if (size > g_biggestAlloc) { 6348 g_biggestAlloc = size; 6349 } 6350 return realloc(ptr, size); 6351 } 6352 6353 static void * 6354 counting_malloc(size_t size) { 6355 return counting_realloc(NULL, size); 6356 } 6357 6358 START_TEST(test_bypass_heuristic_when_close_to_bufsize) { 6359 if (g_chunkSize != 0) { 6360 // this test does not use SINGLE_BYTES, because it depends on very precise 6361 // buffer fills. 6362 return; 6363 } 6364 if (! g_reparseDeferralEnabledDefault) { 6365 return; // this test is irrelevant when the deferral heuristic is disabled. 6366 } 6367 6368 const int document_length = 65536; 6369 char *const document = malloc(document_length); 6370 assert_true(document != NULL); 6371 6372 const XML_Memory_Handling_Suite memfuncs = { 6373 counting_malloc, 6374 counting_realloc, 6375 free, 6376 }; 6377 6378 const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1}; 6379 const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1}; 6380 const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1}; 6381 6382 for (const int *leading = leading_list; *leading >= 0; leading++) { 6383 for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) { 6384 for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) { 6385 set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken, 6386 *fillsize); 6387 // start by checking that the test looks reasonably valid 6388 assert_true(*leading + *bigtoken <= document_length); 6389 6390 // put 'x' everywhere; some will be overwritten by elements. 6391 memset(document, 'x', document_length); 6392 // maybe add an initial tag 6393 if (*leading) { 6394 assert_true(*leading >= 3); // or the test case is invalid 6395 memcpy(document, "<a>", 3); 6396 } 6397 // add the large token 6398 document[*leading + 0] = '<'; 6399 document[*leading + 1] = 'b'; 6400 memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token 6401 document[*leading + *bigtoken - 1] = '>'; 6402 6403 // 1 for 'b', plus 1 or 0 depending on the presence of 'a' 6404 const int expected_elem_total = 1 + (*leading ? 1 : 0); 6405 6406 XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL); 6407 assert_true(parser != NULL); 6408 6409 CharData storage; 6410 CharData_Init(&storage); 6411 XML_SetUserData(parser, &storage); 6412 XML_SetStartElementHandler(parser, start_element_event_handler); 6413 6414 g_biggestAlloc = 0; 6415 g_totalAlloc = 0; 6416 int offset = 0; 6417 // fill data until the big token is covered (but not necessarily parsed) 6418 while (offset < *leading + *bigtoken) { 6419 assert_true(offset + *fillsize <= document_length); 6420 const enum XML_Status status 6421 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 6422 if (status != XML_STATUS_OK) { 6423 xml_failure(parser); 6424 } 6425 offset += *fillsize; 6426 } 6427 // Now, check that we've had a buffer allocation that could fit the 6428 // context bytes and our big token. In order to detect a special case, 6429 // we need to know how many bytes of our big token were included in the 6430 // first push that contained _any_ bytes of the big token: 6431 const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize); 6432 if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) { 6433 // Special case: we aren't saving any context, and the whole big token 6434 // was covered by a single fill, so Expat may have parsed directly 6435 // from our input pointer, without allocating an internal buffer. 6436 } else if (*leading < XML_CONTEXT_BYTES) { 6437 assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken); 6438 } else { 6439 assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken); 6440 } 6441 // fill data until the big token is actually parsed 6442 while (storage.count < expected_elem_total) { 6443 const size_t alloc_before = g_totalAlloc; 6444 assert_true(offset + *fillsize <= document_length); 6445 const enum XML_Status status 6446 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 6447 if (status != XML_STATUS_OK) { 6448 xml_failure(parser); 6449 } 6450 offset += *fillsize; 6451 // since all the bytes of the big token are already in the buffer, 6452 // the bufsize ceiling should make us finish its parsing without any 6453 // further buffer allocations. We assume that there will be no other 6454 // large allocations in this test. 6455 assert_true(g_totalAlloc - alloc_before < 4096); 6456 } 6457 // test-the-test: was our alloc even called? 6458 assert_true(g_totalAlloc > 0); 6459 // test-the-test: there shouldn't be any extra start elements 6460 assert_true(storage.count == expected_elem_total); 6461 6462 XML_ParserFree(parser); 6463 } 6464 } 6465 } 6466 free(document); 6467 } 6468 END_TEST 6469 6470 START_TEST(test_varying_buffer_fills) { 6471 const int KiB = 1024; 6472 const int MiB = 1024 * KiB; 6473 const int document_length = 16 * MiB; 6474 const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB 6475 6476 if (g_chunkSize != 0) { 6477 return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES(). 6478 } 6479 6480 char *const document = malloc(document_length); 6481 assert_true(document != NULL); 6482 memset(document, 'x', document_length); 6483 document[0] = '<'; 6484 document[1] = 't'; 6485 memset(&document[2], ' ', big - 2); // a very spacy token 6486 document[big - 1] = '>'; 6487 6488 // Each testcase is a list of buffer fill sizes, terminated by a value < 0. 6489 // When reparse deferral is enabled, the final (negated) value is the expected 6490 // maximum number of bytes scanned in parse attempts. 6491 const int testcases[][30] = { 6492 {8 * MiB, -8 * MiB}, 6493 {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total 6494 // zero-size fills shouldn't trigger the bypass 6495 {4 * MiB, 0, 4 * MiB, -12 * MiB}, 6496 {4 * MiB, 0, 0, 4 * MiB, -12 * MiB}, 6497 {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB}, 6498 // try to hit the buffer ceiling only once (at the end) 6499 {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB}, 6500 // try to hit the same buffer ceiling multiple times 6501 {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB}, 6502 6503 // try to hit every ceiling, by always landing 1K shy of the buffer size 6504 {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB, 6505 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB}, 6506 6507 // try to avoid every ceiling, by always landing 1B past the buffer size 6508 // the normal 2x heuristic threshold still forces parse attempts. 6509 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 6510 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 6511 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 6512 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 6513 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 6514 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 6515 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7 6516 -(10 * MiB + 682 * KiB + 7)}, 6517 // try to avoid every ceiling again, except on our last fill. 6518 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 6519 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 6520 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 6521 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 6522 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 6523 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 6524 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6 6525 -(10 * MiB + 682 * KiB + 6)}, 6526 6527 // try to hit ceilings on the way multiple times 6528 {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer 6529 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer 6530 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer 6531 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer 6532 // we'll make a parse attempt at every parse call 6533 -(45 * MiB + 12)}, 6534 }; 6535 const int testcount = sizeof(testcases) / sizeof(testcases[0]); 6536 for (int test_i = 0; test_i < testcount; test_i++) { 6537 const int *fillsize = testcases[test_i]; 6538 set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1], 6539 fillsize[2], fillsize[3]); 6540 XML_Parser parser = XML_ParserCreate(NULL); 6541 assert_true(parser != NULL); 6542 6543 CharData storage; 6544 CharData_Init(&storage); 6545 XML_SetUserData(parser, &storage); 6546 XML_SetStartElementHandler(parser, start_element_event_handler); 6547 6548 g_bytesScanned = 0; 6549 int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call) 6550 int offset = 0; 6551 while (*fillsize >= 0) { 6552 assert_true(offset + *fillsize <= document_length); // or test is invalid 6553 const enum XML_Status status 6554 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 6555 if (status != XML_STATUS_OK) { 6556 xml_failure(parser); 6557 } 6558 offset += *fillsize; 6559 fillsize++; 6560 assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow 6561 worstcase_bytes += offset; // we might've tried to parse all pending bytes 6562 } 6563 assert_true(storage.count == 1); // the big token should've been parsed 6564 assert_true(g_bytesScanned > 0); // test-the-test: does our counter work? 6565 if (g_reparseDeferralEnabledDefault) { 6566 // heuristic is enabled; some XML_Parse calls may have deferred reparsing 6567 const unsigned max_bytes_scanned = -*fillsize; 6568 if (g_bytesScanned > max_bytes_scanned) { 6569 fprintf(stderr, 6570 "bytes scanned in parse attempts: actual=%u limit=%u \n", 6571 g_bytesScanned, max_bytes_scanned); 6572 fail("too many bytes scanned in parse attempts"); 6573 } 6574 } 6575 assert_true(g_bytesScanned <= (unsigned)worstcase_bytes); 6576 6577 XML_ParserFree(parser); 6578 } 6579 free(document); 6580 } 6581 END_TEST 6582 6583 START_TEST(test_empty_ext_param_entity_in_value) { 6584 const char *text = "<!DOCTYPE r SYSTEM \"ext.dtd\"><r/>"; 6585 ExtOption options[] = { 6586 {XCS("ext.dtd"), "<!ENTITY % pe SYSTEM \"empty\">" 6587 "<!ENTITY ge \"%pe;\">"}, 6588 {XCS("empty"), ""}, 6589 {NULL, NULL}, 6590 }; 6591 6592 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 6593 XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner); 6594 XML_SetUserData(g_parser, options); 6595 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 6596 == XML_STATUS_ERROR) 6597 xml_failure(g_parser); 6598 } 6599 END_TEST 6600 6601 void 6602 make_basic_test_case(Suite *s) { 6603 TCase *tc_basic = tcase_create("basic tests"); 6604 6605 suite_add_tcase(s, tc_basic); 6606 tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown); 6607 6608 tcase_add_test(tc_basic, test_nul_byte); 6609 tcase_add_test(tc_basic, test_u0000_char); 6610 tcase_add_test(tc_basic, test_siphash_self); 6611 tcase_add_test(tc_basic, test_siphash_spec); 6612 tcase_add_test(tc_basic, test_bom_utf8); 6613 tcase_add_test(tc_basic, test_bom_utf16_be); 6614 tcase_add_test(tc_basic, test_bom_utf16_le); 6615 tcase_add_test(tc_basic, test_nobom_utf16_le); 6616 tcase_add_test(tc_basic, test_hash_collision); 6617 tcase_add_test(tc_basic, test_hash_salt_setter); 6618 tcase_add_test(tc_basic, test_illegal_utf8); 6619 tcase_add_test(tc_basic, test_utf8_auto_align); 6620 tcase_add_test(tc_basic, test_utf16); 6621 tcase_add_test(tc_basic, test_utf16_le_epilog_newline); 6622 tcase_add_test(tc_basic, test_not_utf16); 6623 tcase_add_test(tc_basic, test_bad_encoding); 6624 tcase_add_test(tc_basic, test_latin1_umlauts); 6625 tcase_add_test(tc_basic, test_long_utf8_character); 6626 tcase_add_test(tc_basic, test_long_latin1_attribute); 6627 tcase_add_test(tc_basic, test_long_ascii_attribute); 6628 /* Regression test for SF bug #491986. */ 6629 tcase_add_test(tc_basic, test_danish_latin1); 6630 /* Regression test for SF bug #514281. */ 6631 tcase_add_test(tc_basic, test_french_charref_hexidecimal); 6632 tcase_add_test(tc_basic, test_french_charref_decimal); 6633 tcase_add_test(tc_basic, test_french_latin1); 6634 tcase_add_test(tc_basic, test_french_utf8); 6635 tcase_add_test(tc_basic, test_utf8_false_rejection); 6636 tcase_add_test(tc_basic, test_line_number_after_parse); 6637 tcase_add_test(tc_basic, test_column_number_after_parse); 6638 tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers); 6639 tcase_add_test(tc_basic, test_line_number_after_error); 6640 tcase_add_test(tc_basic, test_column_number_after_error); 6641 tcase_add_test(tc_basic, test_really_long_lines); 6642 tcase_add_test(tc_basic, test_really_long_encoded_lines); 6643 tcase_add_test(tc_basic, test_end_element_events); 6644 tcase_add_test(tc_basic, test_helper_is_whitespace_normalized); 6645 tcase_add_test(tc_basic, test_attr_whitespace_normalization); 6646 tcase_add_test(tc_basic, test_xmldecl_misplaced); 6647 tcase_add_test(tc_basic, test_xmldecl_invalid); 6648 tcase_add_test(tc_basic, test_xmldecl_missing_attr); 6649 tcase_add_test(tc_basic, test_xmldecl_missing_value); 6650 tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity); 6651 tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity); 6652 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding); 6653 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler); 6654 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom); 6655 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding); 6656 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2); 6657 tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset); 6658 tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset); 6659 tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone); 6660 tcase_add_test(tc_basic, 6661 test_wfc_undeclared_entity_with_external_subset_standalone); 6662 tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone); 6663 tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset); 6664 tcase_add_test(tc_basic, test_not_standalone_handler_reject); 6665 tcase_add_test(tc_basic, test_not_standalone_handler_accept); 6666 tcase_add_test(tc_basic, test_entity_start_tag_level_greater_than_one); 6667 tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs); 6668 tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs); 6669 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse); 6670 tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling); 6671 tcase_add_test(tc_basic, test_dtd_attr_handling); 6672 tcase_add_test(tc_basic, test_empty_ns_without_namespaces); 6673 tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces); 6674 tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls); 6675 tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls); 6676 tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls); 6677 tcase_add_test(tc_basic, test_good_cdata_ascii); 6678 tcase_add_test(tc_basic, test_good_cdata_utf16); 6679 tcase_add_test(tc_basic, test_good_cdata_utf16_le); 6680 tcase_add_test(tc_basic, test_long_cdata_utf16); 6681 tcase_add_test(tc_basic, test_multichar_cdata_utf16); 6682 tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair); 6683 tcase_add_test(tc_basic, test_bad_cdata); 6684 tcase_add_test(tc_basic, test_bad_cdata_utf16); 6685 tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls); 6686 tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls); 6687 tcase_add_test(tc_basic, test_memory_allocation); 6688 tcase_add_test__if_xml_ge(tc_basic, test_default_current); 6689 tcase_add_test(tc_basic, test_dtd_elements); 6690 tcase_add_test(tc_basic, test_dtd_elements_nesting); 6691 tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd); 6692 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone); 6693 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd); 6694 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype); 6695 tcase_add_test__ifdef_xml_dtd(tc_basic, 6696 test_foreign_dtd_without_external_subset); 6697 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd); 6698 tcase_add_test(tc_basic, test_set_base); 6699 tcase_add_test(tc_basic, test_attributes); 6700 tcase_add_test(tc_basic, test_duplicate_cdata_attribute); 6701 tcase_add_test(tc_basic, test_duplicate_id_attribute_1); 6702 tcase_add_test(tc_basic, test_duplicate_id_attribute_2); 6703 tcase_add_test(tc_basic, test_duplicate_cdata_attribute_multiple_attlistdecl); 6704 tcase_add_test(tc_basic, 6705 test_duplicate_cdata_attribute_multiple_attlistdecl_2); 6706 tcase_add_test(tc_basic, 6707 test_duplicate_cdata_attribute_multiple_attlistdecl_3); 6708 tcase_add_test(tc_basic, test_duplicate_id_attribute_multiple_attlistdecl); 6709 tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity); 6710 tcase_add_test(tc_basic, test_resume_invalid_parse); 6711 tcase_add_test(tc_basic, test_resume_resuspended); 6712 tcase_add_test(tc_basic, test_cdata_default); 6713 tcase_add_test(tc_basic, test_subordinate_reset); 6714 tcase_add_test(tc_basic, test_subordinate_suspend); 6715 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend); 6716 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort); 6717 tcase_add_test__ifdef_xml_dtd(tc_basic, 6718 test_ext_entity_invalid_suspended_parse); 6719 tcase_add_test(tc_basic, test_explicit_encoding); 6720 tcase_add_test(tc_basic, test_trailing_cr); 6721 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr); 6722 tcase_add_test(tc_basic, test_trailing_rsqb); 6723 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb); 6724 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata); 6725 tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters); 6726 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter); 6727 tcase_add_test(tc_basic, test_empty_parse); 6728 tcase_add_test(tc_basic, test_negative_len_parse); 6729 tcase_add_test(tc_basic, test_negative_len_parse_buffer); 6730 tcase_add_test(tc_basic, test_get_buffer_1); 6731 tcase_add_test(tc_basic, test_get_buffer_2); 6732 #if XML_CONTEXT_BYTES > 0 6733 tcase_add_test(tc_basic, test_get_buffer_3_overflow); 6734 #endif 6735 tcase_add_test(tc_basic, test_buffer_can_grow_to_max); 6736 tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len); 6737 tcase_add_test(tc_basic, test_byte_info_at_end); 6738 tcase_add_test(tc_basic, test_byte_info_at_error); 6739 tcase_add_test(tc_basic, test_byte_info_at_cdata); 6740 tcase_add_test(tc_basic, test_predefined_entities); 6741 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd); 6742 tcase_add_test(tc_basic, test_not_predefined_entities); 6743 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section); 6744 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16); 6745 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be); 6746 tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section); 6747 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed); 6748 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values); 6749 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone); 6750 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort); 6751 tcase_add_test(tc_basic, test_bad_public_doctype); 6752 tcase_add_test(tc_basic, test_attribute_enum_value); 6753 tcase_add_test(tc_basic, test_predefined_entity_redefinition); 6754 tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing); 6755 tcase_add_test(tc_basic, test_public_notation_no_sysid); 6756 tcase_add_test(tc_basic, test_nested_groups); 6757 tcase_add_test(tc_basic, test_group_choice); 6758 tcase_add_test(tc_basic, test_standalone_parameter_entity); 6759 tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity); 6760 tcase_add_test__ifdef_xml_dtd(tc_basic, 6761 test_recursive_external_parameter_entity); 6762 tcase_add_test__ifdef_xml_dtd(tc_basic, 6763 test_recursive_external_parameter_entity_2); 6764 tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd); 6765 tcase_add_test(tc_basic, test_suspend_xdecl); 6766 tcase_add_test(tc_basic, test_abort_epilog); 6767 tcase_add_test(tc_basic, test_abort_epilog_2); 6768 tcase_add_test(tc_basic, test_suspend_epilog); 6769 tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag); 6770 tcase_add_test(tc_basic, test_unfinished_epilog); 6771 tcase_add_test(tc_basic, test_partial_char_in_epilog); 6772 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity); 6773 tcase_add_test__ifdef_xml_dtd(tc_basic, 6774 test_suspend_resume_internal_entity_issue_629); 6775 tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error); 6776 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity); 6777 tcase_add_test(tc_basic, test_restart_on_error); 6778 tcase_add_test(tc_basic, test_reject_lt_in_attribute_value); 6779 tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value); 6780 tcase_add_test(tc_basic, test_trailing_cr_in_att_value); 6781 tcase_add_test(tc_basic, test_standalone_internal_entity); 6782 tcase_add_test(tc_basic, test_skipped_external_entity); 6783 tcase_add_test__ifdef_xml_dtd( 6784 tc_basic, test_scaff_index_shared_across_external_entity_parser); 6785 tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity); 6786 tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity); 6787 tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr); 6788 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity); 6789 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2); 6790 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3); 6791 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4); 6792 tcase_add_test(tc_basic, test_pi_handled_in_default); 6793 tcase_add_test(tc_basic, test_comment_handled_in_default); 6794 tcase_add_test(tc_basic, test_pi_yml); 6795 tcase_add_test(tc_basic, test_pi_xnl); 6796 tcase_add_test(tc_basic, test_pi_xmm); 6797 tcase_add_test(tc_basic, test_utf16_pi); 6798 tcase_add_test(tc_basic, test_utf16_be_pi); 6799 tcase_add_test(tc_basic, test_utf16_be_comment); 6800 tcase_add_test(tc_basic, test_utf16_le_comment); 6801 tcase_add_test(tc_basic, test_missing_encoding_conversion_fn); 6802 tcase_add_test(tc_basic, test_failing_encoding_conversion_fn); 6803 tcase_add_test(tc_basic, test_unknown_encoding_success); 6804 tcase_add_test(tc_basic, test_unknown_encoding_bad_name); 6805 tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2); 6806 tcase_add_test(tc_basic, test_unknown_encoding_long_name_1); 6807 tcase_add_test(tc_basic, test_unknown_encoding_long_name_2); 6808 tcase_add_test(tc_basic, test_invalid_unknown_encoding); 6809 tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok); 6810 tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail); 6811 tcase_add_test(tc_basic, test_unknown_encoding_invalid_length); 6812 tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit); 6813 tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate); 6814 tcase_add_test(tc_basic, test_unknown_encoding_invalid_high); 6815 tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value); 6816 tcase_add_test(tc_basic, test_unknown_encoding_user_data_primary); 6817 tcase_add_test(tc_basic, test_unknown_encoding_user_data_secondary); 6818 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom); 6819 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom); 6820 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2); 6821 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2); 6822 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be); 6823 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le); 6824 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown); 6825 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom); 6826 tcase_add_test(tc_basic, test_utf8_in_cdata_section); 6827 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2); 6828 tcase_add_test(tc_basic, test_utf8_in_start_tags); 6829 tcase_add_test(tc_basic, test_trailing_spaces_in_elements); 6830 tcase_add_test(tc_basic, test_utf16_attribute); 6831 tcase_add_test(tc_basic, test_utf16_second_attr); 6832 tcase_add_test(tc_basic, test_attr_after_solidus); 6833 tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe); 6834 tcase_add_test(tc_basic, test_bad_attr_desc_keyword); 6835 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16); 6836 tcase_add_test(tc_basic, test_bad_doctype); 6837 tcase_add_test(tc_basic, test_bad_doctype_utf8); 6838 tcase_add_test(tc_basic, test_bad_doctype_utf16); 6839 tcase_add_test(tc_basic, test_bad_doctype_plus); 6840 tcase_add_test(tc_basic, test_bad_doctype_star); 6841 tcase_add_test(tc_basic, test_bad_doctype_query); 6842 tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore); 6843 tcase_add_test(tc_basic, test_entity_in_utf16_be_attr); 6844 tcase_add_test(tc_basic, test_entity_in_utf16_le_attr); 6845 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be); 6846 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le); 6847 tcase_add_test(tc_basic, test_short_doctype); 6848 tcase_add_test(tc_basic, test_short_doctype_2); 6849 tcase_add_test(tc_basic, test_short_doctype_3); 6850 tcase_add_test(tc_basic, test_long_doctype); 6851 tcase_add_test(tc_basic, test_bad_entity); 6852 tcase_add_test(tc_basic, test_bad_entity_2); 6853 tcase_add_test(tc_basic, test_bad_entity_3); 6854 tcase_add_test(tc_basic, test_bad_entity_4); 6855 tcase_add_test(tc_basic, test_bad_notation); 6856 tcase_add_test(tc_basic, test_default_doctype_handler); 6857 tcase_add_test(tc_basic, test_empty_element_abort); 6858 tcase_add_test__ifdef_xml_dtd(tc_basic, 6859 test_pool_integrity_with_unfinished_attr); 6860 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_ext_param_entity_in_value); 6861 tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements); 6862 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity); 6863 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity); 6864 tcase_add_test__if_xml_ge(tc_basic, 6865 test_deep_nested_entity_delayed_interpretation); 6866 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend); 6867 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2); 6868 tcase_add_test(tc_basic, test_big_tokens_scale_linearly); 6869 tcase_add_test(tc_basic, test_set_reparse_deferral); 6870 tcase_add_test(tc_basic, test_reparse_deferral_is_inherited); 6871 tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser); 6872 tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly); 6873 tcase_add_test(tc_basic, test_set_bad_reparse_option); 6874 tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize); 6875 tcase_add_test(tc_basic, test_varying_buffer_fills); 6876 } 6877