1 /* Tests in the "basic" test case for the Expat test suite 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> 11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> 13 Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> 14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 15 Copyright (c) 2017 Joe Orton <jorton@redhat.com> 16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> 17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com> 20 Copyright (c) 2021 Donghee Na <donghee.na@python.org> 21 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com> 22 Copyright (c) 2024-2026 Berkay Eren Ürün <berkay.ueruen@siemens.com> 23 Copyright (c) 2026 Francesco Bertolaccini 24 Copyright (c) 2026 Matthew Fernandez <matthew.fernandez@gmail.com> 25 Licensed under the MIT license: 26 27 Permission is hereby granted, free of charge, to any person obtaining 28 a copy of this software and associated documentation files (the 29 "Software"), to deal in the Software without restriction, including 30 without limitation the rights to use, copy, modify, merge, publish, 31 distribute, sublicense, and/or sell copies of the Software, and to permit 32 persons to whom the Software is furnished to do so, subject to the 33 following conditions: 34 35 The above copyright notice and this permission notice shall be included 36 in all copies or substantial portions of the Software. 37 38 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 39 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 40 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 41 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 42 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 43 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 44 USE OR OTHER DEALINGS IN THE SOFTWARE. 45 */ 46 47 #if defined(NDEBUG) 48 # undef NDEBUG /* because test suite relies on assert(...) at the moment */ 49 #endif 50 51 #include <assert.h> 52 53 #include <stdio.h> 54 #include <string.h> 55 #include <time.h> 56 57 #if ! defined(__cplusplus) 58 # include <stdbool.h> 59 #endif 60 61 #include "expat_config.h" 62 63 #include "expat.h" 64 #include "internal.h" 65 #include "minicheck.h" 66 #include "structdata.h" 67 #include "common.h" 68 #include "dummy.h" 69 #include "handlers.h" 70 #include "siphash.h" 71 #include "basic_tests.h" 72 73 static void 74 basic_setup(void) { 75 g_parser = XML_ParserCreate(NULL); 76 if (g_parser == NULL) 77 fail("Parser not created."); 78 } 79 80 /* 81 * Character & encoding tests. 82 */ 83 84 START_TEST(test_nul_byte) { 85 char text[] = "<doc>\0</doc>"; 86 87 /* test that a NUL byte (in US-ASCII data) is an error */ 88 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 89 == XML_STATUS_OK) 90 fail("Parser did not report error on NUL-byte."); 91 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 92 xml_failure(g_parser); 93 } 94 END_TEST 95 96 START_TEST(test_u0000_char) { 97 /* test that a NUL byte (in US-ASCII data) is an error */ 98 expect_failure("<doc>�</doc>", XML_ERROR_BAD_CHAR_REF, 99 "Parser did not report error on NUL-byte."); 100 } 101 END_TEST 102 103 START_TEST(test_siphash_self) { 104 if (! sip24_valid()) 105 fail("SipHash self-test failed"); 106 } 107 END_TEST 108 109 START_TEST(test_siphash_spec) { 110 /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */ 111 const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" 112 "\x0a\x0b\x0c\x0d\x0e"; 113 const size_t len = sizeof(message) - 1; 114 const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U); 115 struct siphash state; 116 struct sipkey key; 117 118 sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" 119 "\x0a\x0b\x0c\x0d\x0e\x0f"); 120 sip24_init(&state, &key); 121 122 /* Cover spread across calls */ 123 sip24_update(&state, message, 4); 124 sip24_update(&state, message + 4, len - 4); 125 126 /* Cover null length */ 127 sip24_update(&state, message, 0); 128 129 if (sip24_final(&state) != expected) 130 fail("sip24_final failed spec test\n"); 131 132 /* Cover wrapper */ 133 if (siphash24(message, len, &key) != expected) 134 fail("siphash24 failed spec test\n"); 135 } 136 END_TEST 137 138 START_TEST(test_bom_utf8) { 139 /* This test is really just making sure we don't core on a UTF-8 BOM. */ 140 const char *text = "\357\273\277<e/>"; 141 142 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 143 == XML_STATUS_ERROR) 144 xml_failure(g_parser); 145 } 146 END_TEST 147 148 START_TEST(test_bom_utf16_be) { 149 char text[] = "\376\377\0<\0e\0/\0>"; 150 151 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 152 == XML_STATUS_ERROR) 153 xml_failure(g_parser); 154 } 155 END_TEST 156 157 START_TEST(test_bom_utf16_le) { 158 char text[] = "\377\376<\0e\0/\0>\0"; 159 160 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 161 == XML_STATUS_ERROR) 162 xml_failure(g_parser); 163 } 164 END_TEST 165 166 START_TEST(test_nobom_utf16_le) { 167 char text[] = " \0<\0e\0/\0>\0"; 168 169 if (g_chunkSize == 1) { 170 // TODO: with just the first byte, we can't tell the difference between 171 // UTF-16-LE and UTF-8. Avoid the failure for now. 172 return; 173 } 174 175 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 176 == XML_STATUS_ERROR) 177 xml_failure(g_parser); 178 } 179 END_TEST 180 181 START_TEST(test_hash_collision) { 182 /* For full coverage of the lookup routine, we need to ensure a 183 * hash collision even though we can only tell that we have one 184 * through breakpoint debugging or coverage statistics. The 185 * following will cause a hash collision on machines with a 64-bit 186 * long type; others will have to experiment. The full coverage 187 * tests invoked from qa.sh usually provide a hash collision, but 188 * not always. This is an attempt to provide insurance. 189 */ 190 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U) 191 const char *text 192 = "<doc>\n" 193 "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n" 194 "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n" 195 "<b5></b5><b6></b6><b7></b7><b8></b8>\n" 196 "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n" 197 "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n" 198 "<d8>This triggers the table growth and collides with b2</d8>\n" 199 "</doc>\n"; 200 201 XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT); 202 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 203 == XML_STATUS_ERROR) 204 xml_failure(g_parser); 205 } 206 END_TEST 207 #undef COLLIDING_HASH_SALT 208 209 START_TEST(test_hash_salt_setter) { 210 const uint8_t entropy[16] = {'0', '1', '2', '3', '4', '5', '6', '7', 211 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; 212 XML_Parser parser = XML_ParserCreate(NULL); 213 214 // NULL parser should be rejected 215 assert_true(XML_SetHashSalt16Bytes(NULL, entropy) == XML_FALSE); 216 217 // NULL entropy should be rejected 218 assert_true(XML_SetHashSalt16Bytes(parser, NULL) == XML_FALSE); 219 220 // Setting should be allowed more than once 221 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE); 222 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE); 223 224 // But not after parsing has started 225 assert_true(XML_Parse(parser, "", 0, XML_FALSE /* isFinal */) 226 == XML_STATUS_OK); 227 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_FALSE); 228 229 XML_ParserFree(parser); 230 } 231 END_TEST 232 233 /* Regression test for SF bug #491986. */ 234 START_TEST(test_danish_latin1) { 235 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 236 "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>"; 237 #ifdef XML_UNICODE 238 const XML_Char *expected 239 = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5"); 240 #else 241 const XML_Char *expected 242 = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85"); 243 #endif 244 run_character_check(text, expected); 245 } 246 END_TEST 247 248 /* Regression test for SF bug #514281. */ 249 START_TEST(test_french_charref_hexidecimal) { 250 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 251 "<doc>éèàçêÈ</doc>"; 252 #ifdef XML_UNICODE 253 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 254 #else 255 const XML_Char *expected 256 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 257 #endif 258 run_character_check(text, expected); 259 } 260 END_TEST 261 262 START_TEST(test_french_charref_decimal) { 263 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 264 "<doc>éèàçêÈ</doc>"; 265 #ifdef XML_UNICODE 266 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 267 #else 268 const XML_Char *expected 269 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 270 #endif 271 run_character_check(text, expected); 272 } 273 END_TEST 274 275 START_TEST(test_french_latin1) { 276 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 277 "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>"; 278 #ifdef XML_UNICODE 279 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 280 #else 281 const XML_Char *expected 282 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 283 #endif 284 run_character_check(text, expected); 285 } 286 END_TEST 287 288 START_TEST(test_french_utf8) { 289 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n" 290 "<doc>\xC3\xA9</doc>"; 291 #ifdef XML_UNICODE 292 const XML_Char *expected = XCS("\x00e9"); 293 #else 294 const XML_Char *expected = XCS("\xC3\xA9"); 295 #endif 296 run_character_check(text, expected); 297 } 298 END_TEST 299 300 /* Regression test for SF bug #600479. 301 XXX There should be a test that exercises all legal XML Unicode 302 characters as PCDATA and attribute value content, and XML Name 303 characters as part of element and attribute names. 304 */ 305 START_TEST(test_utf8_false_rejection) { 306 const char *text = "<doc>\xEF\xBA\xBF</doc>"; 307 #ifdef XML_UNICODE 308 const XML_Char *expected = XCS("\xfebf"); 309 #else 310 const XML_Char *expected = XCS("\xEF\xBA\xBF"); 311 #endif 312 run_character_check(text, expected); 313 } 314 END_TEST 315 316 /* Regression test for SF bug #477667. 317 This test assures that any 8-bit character followed by a 7-bit 318 character will not be mistakenly interpreted as a valid UTF-8 319 sequence. 320 */ 321 START_TEST(test_illegal_utf8) { 322 char text[100]; 323 int i; 324 325 for (i = 128; i <= 255; ++i) { 326 snprintf(text, sizeof(text), "<e>%ccd</e>", i); 327 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 328 == XML_STATUS_OK) { 329 snprintf(text, sizeof(text), 330 "expected token error for '%c' (ordinal %d) in UTF-8 text", i, 331 i); 332 fail(text); 333 } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 334 xml_failure(g_parser); 335 /* Reset the parser since we use the same parser repeatedly. */ 336 XML_ParserReset(g_parser, NULL); 337 } 338 } 339 END_TEST 340 341 /* Examples, not masks: */ 342 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */ 343 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */ 344 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */ 345 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */ 346 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */ 347 348 START_TEST(test_utf8_auto_align) { 349 struct TestCase { 350 ptrdiff_t expectedMovementInChars; 351 const char *input; 352 }; 353 354 struct TestCase cases[] = { 355 {00, ""}, 356 357 {00, UTF8_LEAD_1}, 358 359 {-1, UTF8_LEAD_2}, 360 {00, UTF8_LEAD_2 UTF8_FOLLOW}, 361 362 {-1, UTF8_LEAD_3}, 363 {-2, UTF8_LEAD_3 UTF8_FOLLOW}, 364 {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW}, 365 366 {-1, UTF8_LEAD_4}, 367 {-2, UTF8_LEAD_4 UTF8_FOLLOW}, 368 {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW}, 369 {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW}, 370 }; 371 372 size_t i = 0; 373 bool success = true; 374 for (; i < sizeof(cases) / sizeof(*cases); i++) { 375 const char *fromLim = cases[i].input + strlen(cases[i].input); 376 const char *const fromLimInitially = fromLim; 377 ptrdiff_t actualMovementInChars; 378 379 _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim); 380 381 actualMovementInChars = (fromLim - fromLimInitially); 382 if (actualMovementInChars != cases[i].expectedMovementInChars) { 383 size_t j = 0; 384 success = false; 385 printf("[-] UTF-8 case %2u: Expected movement by %2d chars" 386 ", actually moved by %2d chars: \"", 387 (unsigned)(i + 1), (int)cases[i].expectedMovementInChars, 388 (int)actualMovementInChars); 389 for (; j < strlen(cases[i].input); j++) { 390 printf("\\x%02x", (unsigned char)cases[i].input[j]); 391 } 392 printf("\"\n"); 393 } 394 } 395 396 if (! success) { 397 fail("UTF-8 auto-alignment is not bullet-proof\n"); 398 } 399 } 400 END_TEST 401 402 START_TEST(test_utf16) { 403 /* <?xml version="1.0" encoding="UTF-16"?> 404 * <doc a='123'>some {A} text</doc> 405 * 406 * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A 407 */ 408 char text[] 409 = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o" 410 "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o" 411 "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066" 412 "\000'\000?\000>\000\n" 413 "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>" 414 "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000" 415 "<\000/\000d\000o\000c\000>"; 416 #ifdef XML_UNICODE 417 const XML_Char *expected = XCS("some \xff21 text"); 418 #else 419 const XML_Char *expected = XCS("some \357\274\241 text"); 420 #endif 421 CharData storage; 422 423 CharData_Init(&storage); 424 XML_SetUserData(g_parser, &storage); 425 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 426 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 427 == XML_STATUS_ERROR) 428 xml_failure(g_parser); 429 CharData_CheckXMLChars(&storage, expected); 430 } 431 END_TEST 432 433 START_TEST(test_utf16_le_epilog_newline) { 434 unsigned int first_chunk_bytes = 17; 435 char text[] = "\xFF\xFE" /* BOM */ 436 "<\000e\000/\000>\000" /* document element */ 437 "\r\000\n\000\r\000\n\000"; /* epilog */ 438 439 if (first_chunk_bytes >= sizeof(text) - 1) 440 fail("bad value of first_chunk_bytes"); 441 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)first_chunk_bytes, XML_FALSE) 442 == XML_STATUS_ERROR) 443 xml_failure(g_parser); 444 else { 445 enum XML_Status rc; 446 rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes, 447 (int)(sizeof(text) - first_chunk_bytes - 1), 448 XML_TRUE); 449 if (rc == XML_STATUS_ERROR) 450 xml_failure(g_parser); 451 } 452 } 453 END_TEST 454 455 /* Test that an outright lie in the encoding is faulted */ 456 START_TEST(test_not_utf16) { 457 const char *text = "<?xml version='1.0' encoding='utf-16'?>" 458 "<doc>Hi</doc>"; 459 460 /* Use a handler to provoke the appropriate code paths */ 461 XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler); 462 expect_failure(text, XML_ERROR_INCORRECT_ENCODING, 463 "UTF-16 declared in UTF-8 not faulted"); 464 } 465 END_TEST 466 467 /* Test that an unknown encoding is rejected */ 468 START_TEST(test_bad_encoding) { 469 const char *text = "<doc>Hi</doc>"; 470 471 if (! XML_SetEncoding(g_parser, XCS("unknown-encoding"))) 472 fail("XML_SetEncoding failed"); 473 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 474 "Unknown encoding not faulted"); 475 } 476 END_TEST 477 478 /* Regression test for SF bug #481609, #774028. */ 479 START_TEST(test_latin1_umlauts) { 480 const char *text 481 = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 482 "<e a='\xE4 \xF6 \xFC ä ö ü ä ö ü >'\n" 483 " >\xE4 \xF6 \xFC ä ö ü ä ö ü ></e>"; 484 #ifdef XML_UNICODE 485 /* Expected results in UTF-16 */ 486 const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ") 487 XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >"); 488 #else 489 /* Expected results in UTF-8 */ 490 const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") 491 XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >"); 492 #endif 493 494 run_character_check(text, expected); 495 XML_ParserReset(g_parser, NULL); 496 run_attribute_check(text, expected); 497 /* Repeat with a default handler */ 498 XML_ParserReset(g_parser, NULL); 499 XML_SetDefaultHandler(g_parser, dummy_default_handler); 500 run_character_check(text, expected); 501 XML_ParserReset(g_parser, NULL); 502 XML_SetDefaultHandler(g_parser, dummy_default_handler); 503 run_attribute_check(text, expected); 504 } 505 END_TEST 506 507 /* Test that an element name with a 4-byte UTF-8 character is rejected */ 508 START_TEST(test_long_utf8_character) { 509 const char *text 510 = "<?xml version='1.0' encoding='utf-8'?>\n" 511 /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */ 512 "<do\xf0\x90\x80\x80/>"; 513 expect_failure(text, XML_ERROR_INVALID_TOKEN, 514 "4-byte UTF-8 character in element name not faulted"); 515 } 516 END_TEST 517 518 /* Test that a long latin-1 attribute (too long to convert in one go) 519 * is correctly converted 520 */ 521 START_TEST(test_long_latin1_attribute) { 522 const char *text 523 = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 524 "<doc att='" 525 /* 64 characters per line */ 526 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 527 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 528 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 529 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 530 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 531 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 532 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 533 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 534 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 535 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 536 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 537 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 538 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 539 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 540 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 541 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" 542 /* Last character splits across a buffer boundary */ 543 "\xe4'>\n</doc>"; 544 545 const XML_Char *expected = 546 /* 64 characters per line */ 547 /* clang-format off */ 548 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 549 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 550 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 551 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 552 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 553 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 554 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 555 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 556 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 557 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 558 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 559 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 560 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 561 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 562 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 563 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO") 564 /* clang-format on */ 565 #ifdef XML_UNICODE 566 XCS("\x00e4"); 567 #else 568 XCS("\xc3\xa4"); 569 #endif 570 571 run_attribute_check(text, expected); 572 } 573 END_TEST 574 575 /* Test that a long ASCII attribute (too long to convert in one go) 576 * is correctly converted 577 */ 578 START_TEST(test_long_ascii_attribute) { 579 const char *text 580 = "<?xml version='1.0' encoding='us-ascii'?>\n" 581 "<doc att='" 582 /* 64 characters per line */ 583 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 584 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 585 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 586 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 587 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 588 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 589 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 590 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 591 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 592 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 593 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 594 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 595 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 596 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 597 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 598 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 599 "01234'>\n</doc>"; 600 const XML_Char *expected = 601 /* 64 characters per line */ 602 /* clang-format off */ 603 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 604 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 605 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 606 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 607 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 608 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 609 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 610 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 611 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 612 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 613 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 614 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 615 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 616 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 617 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 618 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 619 XCS("01234"); 620 /* clang-format on */ 621 622 run_attribute_check(text, expected); 623 } 624 END_TEST 625 626 /* Regression test #1 for SF bug #653180. */ 627 START_TEST(test_line_number_after_parse) { 628 const char *text = "<tag>\n" 629 "\n" 630 "\n</tag>"; 631 XML_Size lineno; 632 633 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 634 == XML_STATUS_ERROR) 635 xml_failure(g_parser); 636 lineno = XML_GetCurrentLineNumber(g_parser); 637 if (lineno != 4) { 638 char buffer[100]; 639 snprintf(buffer, sizeof(buffer), 640 "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno); 641 fail(buffer); 642 } 643 } 644 END_TEST 645 646 /* Regression test #2 for SF bug #653180. */ 647 START_TEST(test_column_number_after_parse) { 648 const char *text = "<tag></tag>"; 649 XML_Size colno; 650 651 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 652 == XML_STATUS_ERROR) 653 xml_failure(g_parser); 654 colno = XML_GetCurrentColumnNumber(g_parser); 655 if (colno != 11) { 656 char buffer[100]; 657 snprintf(buffer, sizeof(buffer), 658 "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno); 659 fail(buffer); 660 } 661 } 662 END_TEST 663 664 /* Regression test #3 for SF bug #653180. */ 665 START_TEST(test_line_and_column_numbers_inside_handlers) { 666 const char *text = "<a>\n" /* Unix end-of-line */ 667 " <b>\r\n" /* Windows end-of-line */ 668 " <c/>\r" /* Mac OS end-of-line */ 669 " </b>\n" 670 " <d>\n" 671 " <f/>\n" 672 " </d>\n" 673 "</a>"; 674 const StructDataEntry expected[] 675 = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG}, 676 {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG}, 677 {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG}, 678 {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG}, 679 {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}}; 680 const int expected_count = sizeof(expected) / sizeof(StructDataEntry); 681 StructData storage; 682 683 StructData_Init(&storage); 684 XML_SetUserData(g_parser, &storage); 685 XML_SetStartElementHandler(g_parser, start_element_event_handler2); 686 XML_SetEndElementHandler(g_parser, end_element_event_handler2); 687 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 688 == XML_STATUS_ERROR) 689 xml_failure(g_parser); 690 691 StructData_CheckItems(&storage, expected, expected_count); 692 StructData_Dispose(&storage); 693 } 694 END_TEST 695 696 /* Regression test #4 for SF bug #653180. */ 697 START_TEST(test_line_number_after_error) { 698 const char *text = "<a>\n" 699 " <b>\n" 700 " </a>"; /* missing </b> */ 701 XML_Size lineno; 702 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 703 != XML_STATUS_ERROR) 704 fail("Expected a parse error"); 705 706 lineno = XML_GetCurrentLineNumber(g_parser); 707 if (lineno != 3) { 708 char buffer[100]; 709 snprintf(buffer, sizeof(buffer), 710 "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno); 711 fail(buffer); 712 } 713 } 714 END_TEST 715 716 /* Regression test #5 for SF bug #653180. */ 717 START_TEST(test_column_number_after_error) { 718 const char *text = "<a>\n" 719 " <b>\n" 720 " </a>"; /* missing </b> */ 721 XML_Size colno; 722 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 723 != XML_STATUS_ERROR) 724 fail("Expected a parse error"); 725 726 colno = XML_GetCurrentColumnNumber(g_parser); 727 if (colno != 4) { 728 char buffer[100]; 729 snprintf(buffer, sizeof(buffer), 730 "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno); 731 fail(buffer); 732 } 733 } 734 END_TEST 735 736 /* Regression test for SF bug #478332. */ 737 START_TEST(test_really_long_lines) { 738 /* This parses an input line longer than INIT_DATA_BUF_SIZE 739 characters long (defined to be 1024 in xmlparse.c). We take a 740 really cheesy approach to building the input buffer, because 741 this avoids writing bugs in buffer-filling code. 742 */ 743 const char *text 744 = "<e>" 745 /* 64 chars */ 746 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 747 /* until we have at least 1024 characters on the line: */ 748 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 749 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 750 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 751 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 752 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 753 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 754 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 755 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 756 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 757 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 758 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 759 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 760 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 761 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 762 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 763 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 764 "</e>"; 765 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 766 == XML_STATUS_ERROR) 767 xml_failure(g_parser); 768 } 769 END_TEST 770 771 /* Test cdata processing across a buffer boundary */ 772 START_TEST(test_really_long_encoded_lines) { 773 /* As above, except that we want to provoke an output buffer 774 * overflow with a non-trivial encoding. For this we need to pass 775 * the whole cdata in one go, not byte-by-byte. 776 */ 777 void *buffer; 778 const char *text 779 = "<?xml version='1.0' encoding='iso-8859-1'?>" 780 "<e>" 781 /* 64 chars */ 782 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 783 /* until we have at least 1024 characters on the line: */ 784 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 785 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 786 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 787 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 788 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 789 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 790 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 791 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 792 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 793 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 794 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 795 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 796 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 797 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 798 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 799 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 800 "</e>"; 801 int parse_len = (int)strlen(text); 802 803 /* Need a cdata handler to provoke the code path we want to test */ 804 XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler); 805 buffer = XML_GetBuffer(g_parser, parse_len); 806 if (buffer == NULL) 807 fail("Could not allocate parse buffer"); 808 assert(buffer != NULL); 809 memcpy(buffer, text, parse_len); 810 if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR) 811 xml_failure(g_parser); 812 } 813 END_TEST 814 815 /* 816 * Element event tests. 817 */ 818 819 START_TEST(test_end_element_events) { 820 const char *text = "<a><b><c/></b><d><f/></d></a>"; 821 const XML_Char *expected = XCS("/c/b/f/d/a"); 822 CharData storage; 823 824 CharData_Init(&storage); 825 XML_SetUserData(g_parser, &storage); 826 XML_SetEndElementHandler(g_parser, end_element_event_handler); 827 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 828 == XML_STATUS_ERROR) 829 xml_failure(g_parser); 830 CharData_CheckXMLChars(&storage, expected); 831 } 832 END_TEST 833 834 /* 835 * Attribute tests. 836 */ 837 838 /* Helper used by the following tests; this checks any "attr" and "refs" 839 attributes to make sure whitespace has been normalized. 840 841 Return true if whitespace has been normalized in a string, using 842 the rules for attribute value normalization. The 'is_cdata' flag 843 is needed since CDATA attributes don't need to have multiple 844 whitespace characters collapsed to a single space, while other 845 attribute data types do. (Section 3.3.3 of the recommendation.) 846 */ 847 static int 848 is_whitespace_normalized(const XML_Char *s, int is_cdata) { 849 int blanks = 0; 850 int at_start = 1; 851 while (*s) { 852 if (*s == XCS(' ')) 853 ++blanks; 854 else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r')) 855 return 0; 856 else { 857 if (at_start) { 858 at_start = 0; 859 if (blanks && ! is_cdata) 860 /* illegal leading blanks */ 861 return 0; 862 } else if (blanks > 1 && ! is_cdata) 863 return 0; 864 blanks = 0; 865 } 866 ++s; 867 } 868 if (blanks && ! is_cdata) 869 return 0; 870 return 1; 871 } 872 873 /* Check the attribute whitespace checker: */ 874 START_TEST(test_helper_is_whitespace_normalized) { 875 assert(is_whitespace_normalized(XCS("abc"), 0)); 876 assert(is_whitespace_normalized(XCS("abc"), 1)); 877 assert(is_whitespace_normalized(XCS("abc def ghi"), 0)); 878 assert(is_whitespace_normalized(XCS("abc def ghi"), 1)); 879 assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0)); 880 assert(is_whitespace_normalized(XCS(" abc def ghi"), 1)); 881 assert(! is_whitespace_normalized(XCS("abc def ghi"), 0)); 882 assert(is_whitespace_normalized(XCS("abc def ghi"), 1)); 883 assert(! is_whitespace_normalized(XCS("abc def ghi "), 0)); 884 assert(is_whitespace_normalized(XCS("abc def ghi "), 1)); 885 assert(! is_whitespace_normalized(XCS(" "), 0)); 886 assert(is_whitespace_normalized(XCS(" "), 1)); 887 assert(! is_whitespace_normalized(XCS("\t"), 0)); 888 assert(! is_whitespace_normalized(XCS("\t"), 1)); 889 assert(! is_whitespace_normalized(XCS("\n"), 0)); 890 assert(! is_whitespace_normalized(XCS("\n"), 1)); 891 assert(! is_whitespace_normalized(XCS("\r"), 0)); 892 assert(! is_whitespace_normalized(XCS("\r"), 1)); 893 assert(! is_whitespace_normalized(XCS("abc\t def"), 1)); 894 } 895 END_TEST 896 897 static void XMLCALL 898 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name, 899 const XML_Char **atts) { 900 int i; 901 UNUSED_P(userData); 902 UNUSED_P(name); 903 for (i = 0; atts[i] != NULL; i += 2) { 904 const XML_Char *attrname = atts[i]; 905 const XML_Char *value = atts[i + 1]; 906 if (xcstrcmp(XCS("attr"), attrname) == 0 907 || xcstrcmp(XCS("ents"), attrname) == 0 908 || xcstrcmp(XCS("refs"), attrname) == 0) { 909 if (! is_whitespace_normalized(value, 0)) { 910 char buffer[256]; 911 snprintf(buffer, sizeof(buffer), 912 "attribute value not normalized: %" XML_FMT_STR 913 "='%" XML_FMT_STR "'", 914 attrname, value); 915 fail(buffer); 916 } 917 } 918 } 919 } 920 921 START_TEST(test_attr_whitespace_normalization) { 922 const char *text 923 = "<!DOCTYPE doc [\n" 924 " <!ATTLIST doc\n" 925 " attr NMTOKENS #REQUIRED\n" 926 " ents ENTITIES #REQUIRED\n" 927 " refs IDREFS #REQUIRED>\n" 928 "]>\n" 929 "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n" 930 " ents=' ent-1 \t\r\n" 931 " ent-2 ' >\n" 932 " <e id='id-1'/>\n" 933 " <e id='id-2'/>\n" 934 "</doc>"; 935 936 XML_SetStartElementHandler(g_parser, 937 check_attr_contains_normalized_whitespace); 938 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 939 == XML_STATUS_ERROR) 940 xml_failure(g_parser); 941 } 942 END_TEST 943 944 /* 945 * XML declaration tests. 946 */ 947 948 START_TEST(test_xmldecl_misplaced) { 949 expect_failure("\n" 950 "<?xml version='1.0'?>\n" 951 "<a/>", 952 XML_ERROR_MISPLACED_XML_PI, 953 "failed to report misplaced XML declaration"); 954 } 955 END_TEST 956 957 START_TEST(test_xmldecl_invalid) { 958 expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL, 959 "Failed to report invalid XML declaration"); 960 } 961 END_TEST 962 963 START_TEST(test_xmldecl_missing_attr) { 964 expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL, 965 "Failed to report missing XML declaration attribute"); 966 } 967 END_TEST 968 969 START_TEST(test_xmldecl_missing_value) { 970 expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n" 971 "<doc/>", 972 XML_ERROR_XML_DECL, 973 "Failed to report missing attribute value"); 974 } 975 END_TEST 976 977 /* Regression test for SF bug #584832. */ 978 START_TEST(test_unknown_encoding_internal_entity) { 979 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n" 980 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n" 981 "<test a='&foo;'/>"; 982 983 XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL); 984 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 985 == XML_STATUS_ERROR) 986 xml_failure(g_parser); 987 } 988 END_TEST 989 990 /* Test unrecognised encoding handler */ 991 START_TEST(test_unrecognised_encoding_internal_entity) { 992 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n" 993 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n" 994 "<test a='&foo;'/>"; 995 996 XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL); 997 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 998 != XML_STATUS_ERROR) 999 fail("Unrecognised encoding not rejected"); 1000 } 1001 END_TEST 1002 1003 /* Regression test for SF bug #620106. */ 1004 START_TEST(test_ext_entity_set_encoding) { 1005 const char *text = "<!DOCTYPE doc [\n" 1006 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1007 "]>\n" 1008 "<doc>&en;</doc>"; 1009 ExtTest test_data 1010 = {/* This text says it's an unsupported encoding, but it's really 1011 UTF-8, which we tell Expat using XML_SetEncoding(). 1012 */ 1013 "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL}; 1014 #ifdef XML_UNICODE 1015 const XML_Char *expected = XCS("\x00e9"); 1016 #else 1017 const XML_Char *expected = XCS("\xc3\xa9"); 1018 #endif 1019 1020 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1021 run_ext_character_check(text, &test_data, expected); 1022 } 1023 END_TEST 1024 1025 /* Test external entities with no handler */ 1026 START_TEST(test_ext_entity_no_handler) { 1027 const char *text = "<!DOCTYPE doc [\n" 1028 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1029 "]>\n" 1030 "<doc>&en;</doc>"; 1031 1032 XML_SetDefaultHandler(g_parser, dummy_default_handler); 1033 run_character_check(text, XCS("")); 1034 } 1035 END_TEST 1036 1037 /* Test UTF-8 BOM is accepted */ 1038 START_TEST(test_ext_entity_set_bom) { 1039 const char *text = "<!DOCTYPE doc [\n" 1040 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1041 "]>\n" 1042 "<doc>&en;</doc>"; 1043 ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */ 1044 "<?xml encoding='iso-8859-3'?>" 1045 "\xC3\xA9", 1046 XCS("utf-8"), NULL}; 1047 #ifdef XML_UNICODE 1048 const XML_Char *expected = XCS("\x00e9"); 1049 #else 1050 const XML_Char *expected = XCS("\xc3\xa9"); 1051 #endif 1052 1053 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1054 run_ext_character_check(text, &test_data, expected); 1055 } 1056 END_TEST 1057 1058 /* Test that bad encodings are faulted */ 1059 START_TEST(test_ext_entity_bad_encoding) { 1060 const char *text = "<!DOCTYPE doc [\n" 1061 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1062 "]>\n" 1063 "<doc>&en;</doc>"; 1064 ExtFaults fault 1065 = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted", 1066 XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING}; 1067 1068 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1069 XML_SetUserData(g_parser, &fault); 1070 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1071 "Bad encoding should not have been accepted"); 1072 } 1073 END_TEST 1074 1075 /* Try handing an invalid encoding to an external entity parser */ 1076 START_TEST(test_ext_entity_bad_encoding_2) { 1077 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1078 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1079 "<doc>&entity;</doc>"; 1080 ExtFaults fault 1081 = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted", 1082 XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING}; 1083 1084 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1085 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1086 XML_SetUserData(g_parser, &fault); 1087 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1088 "Bad encoding not faulted in external entity handler"); 1089 } 1090 END_TEST 1091 1092 /* Test that no error is reported for unknown entities if we don't 1093 read an external subset. This was fixed in Expat 1.95.5. 1094 */ 1095 START_TEST(test_wfc_undeclared_entity_unread_external_subset) { 1096 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 1097 "<doc>&entity;</doc>"; 1098 1099 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1100 == XML_STATUS_ERROR) 1101 xml_failure(g_parser); 1102 } 1103 END_TEST 1104 1105 /* Test that an error is reported for unknown entities if we don't 1106 have an external subset. 1107 */ 1108 START_TEST(test_wfc_undeclared_entity_no_external_subset) { 1109 expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY, 1110 "Parser did not report undefined entity w/out a DTD."); 1111 } 1112 END_TEST 1113 1114 /* Test that an error is reported for unknown entities if we don't 1115 read an external subset, but have been declared standalone. 1116 */ 1117 START_TEST(test_wfc_undeclared_entity_standalone) { 1118 const char *text 1119 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1120 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1121 "<doc>&entity;</doc>"; 1122 1123 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1124 "Parser did not report undefined entity (standalone)."); 1125 } 1126 END_TEST 1127 1128 /* Test that an error is reported for unknown entities if we have read 1129 an external subset, and standalone is true. 1130 */ 1131 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) { 1132 const char *text 1133 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1134 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1135 "<doc>&entity;</doc>"; 1136 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1137 1138 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1139 XML_SetUserData(g_parser, &test_data); 1140 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1141 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1142 "Parser did not report undefined entity (external DTD)."); 1143 } 1144 END_TEST 1145 1146 /* Test that external entity handling is not done if the parsing flag 1147 * is set to UNLESS_STANDALONE 1148 */ 1149 START_TEST(test_entity_with_external_subset_unless_standalone) { 1150 const char *text 1151 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1152 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1153 "<doc>&entity;</doc>"; 1154 ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL}; 1155 1156 XML_SetParamEntityParsing(g_parser, 1157 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); 1158 XML_SetUserData(g_parser, &test_data); 1159 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1160 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1161 "Parser did not report undefined entity"); 1162 } 1163 END_TEST 1164 1165 /* Test that no error is reported for unknown entities if we have read 1166 an external subset, and standalone is false. 1167 */ 1168 START_TEST(test_wfc_undeclared_entity_with_external_subset) { 1169 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1170 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1171 "<doc>&entity;</doc>"; 1172 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1173 1174 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1175 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1176 run_ext_character_check(text, &test_data, XCS("")); 1177 } 1178 END_TEST 1179 1180 /* Test that an error is reported if our NotStandalone handler fails */ 1181 START_TEST(test_not_standalone_handler_reject) { 1182 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1183 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1184 "<doc>&entity;</doc>"; 1185 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1186 1187 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1188 XML_SetUserData(g_parser, &test_data); 1189 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1190 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 1191 expect_failure(text, XML_ERROR_NOT_STANDALONE, 1192 "NotStandalone handler failed to reject"); 1193 1194 /* Try again but without external entity handling */ 1195 XML_ParserReset(g_parser, NULL); 1196 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 1197 expect_failure(text, XML_ERROR_NOT_STANDALONE, 1198 "NotStandalone handler failed to reject"); 1199 } 1200 END_TEST 1201 1202 /* Test that no error is reported if our NotStandalone handler succeeds */ 1203 START_TEST(test_not_standalone_handler_accept) { 1204 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1205 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1206 "<doc>&entity;</doc>"; 1207 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1208 1209 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1210 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1211 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler); 1212 run_ext_character_check(text, &test_data, XCS("")); 1213 1214 /* Repeat without the external entity handler */ 1215 XML_ParserReset(g_parser, NULL); 1216 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler); 1217 run_character_check(text, XCS("")); 1218 } 1219 END_TEST 1220 1221 START_TEST(test_entity_start_tag_level_greater_than_one) { 1222 const char *const text = "<!DOCTYPE t1 [\n" 1223 " <!ENTITY e1 'hello'>\n" 1224 "]>\n" 1225 "<t1>\n" 1226 " <t2>&e1;</t2>\n" 1227 "</t1>\n"; 1228 1229 XML_Parser parser = XML_ParserCreate(NULL); 1230 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), 1231 /*isFinal*/ XML_TRUE) 1232 == XML_STATUS_OK); 1233 XML_ParserFree(parser); 1234 } 1235 END_TEST 1236 1237 START_TEST(test_wfc_no_recursive_entity_refs) { 1238 const char *text = "<!DOCTYPE doc [\n" 1239 " <!ENTITY entity '&entity;'>\n" 1240 "]>\n" 1241 "<doc>&entity;</doc>"; 1242 1243 expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF, 1244 "Parser did not report recursive entity reference."); 1245 } 1246 END_TEST 1247 1248 START_TEST(test_no_indirectly_recursive_entity_refs) { 1249 struct TestCase { 1250 const char *doc; 1251 bool usesParameterEntities; 1252 }; 1253 1254 const struct TestCase cases[] = { 1255 // general entity + character data 1256 {"<!DOCTYPE a [\n" 1257 " <!ENTITY e1 '&e2;'>\n" 1258 " <!ENTITY e2 '&e1;'>\n" 1259 "]><a>&e2;</a>\n", 1260 false}, 1261 1262 // general entity + attribute value 1263 {"<!DOCTYPE a [\n" 1264 " <!ENTITY e1 '&e2;'>\n" 1265 " <!ENTITY e2 '&e1;'>\n" 1266 "]><a k1='&e2;' />\n", 1267 false}, 1268 1269 // parameter entity 1270 {"<!DOCTYPE doc [\n" 1271 " <!ENTITY % p1 '%p2;'>\n" 1272 " <!ENTITY % p2 '%p1;'>\n" 1273 " <!ENTITY % define_g \"<!ENTITY g '%p2;'>\">\n" 1274 " %define_g;\n" 1275 "]>\n" 1276 "<doc/>\n", 1277 true}, 1278 }; 1279 const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE}; 1280 1281 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { 1282 for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]); 1283 j++) { 1284 const XML_Bool reset_wanted = reset_or_not[j]; 1285 const char *const doc = cases[i].doc; 1286 const bool usesParameterEntities = cases[i].usesParameterEntities; 1287 1288 set_subtest("[%i,reset=%i] %s", (int)i, (int)j, doc); 1289 1290 #ifdef XML_DTD // both GE and DTD 1291 const bool rejection_expected = true; 1292 #elif XML_GE == 1 // GE but not DTD 1293 const bool rejection_expected = ! usesParameterEntities; 1294 #else // neither DTD nor GE 1295 const bool rejection_expected = false; 1296 #endif 1297 1298 XML_Parser parser = XML_ParserCreate(NULL); 1299 1300 #ifdef XML_DTD 1301 if (usesParameterEntities) { 1302 assert_true( 1303 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS) 1304 == 1); 1305 } 1306 #else 1307 UNUSED_P(usesParameterEntities); 1308 #endif // XML_DTD 1309 1310 const enum XML_Status status 1311 = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), 1312 /*isFinal*/ XML_TRUE); 1313 1314 if (rejection_expected) { 1315 assert_true(status == XML_STATUS_ERROR); 1316 assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF); 1317 } else { 1318 assert_true(status == XML_STATUS_OK); 1319 } 1320 1321 if (reset_wanted) { 1322 // This covers free'ing of (eventually) all three open entity lists by 1323 // XML_ParserReset. 1324 XML_ParserReset(parser, NULL); 1325 } 1326 1327 // This covers free'ing of (eventually) all three open entity lists by 1328 // XML_ParserFree (unless XML_ParserReset has already done that above). 1329 XML_ParserFree(parser); 1330 } 1331 } 1332 } 1333 END_TEST 1334 1335 START_TEST(test_recursive_external_parameter_entity_2) { 1336 struct TestCase { 1337 const char *doc; 1338 enum XML_Status expectedStatus; 1339 }; 1340 1341 struct TestCase cases[] = { 1342 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR}, 1343 {"<!ENTITY % p1 '%p1;'>" 1344 "<!ENTITY % p1 'first declaration wins'>", 1345 XML_STATUS_ERROR}, 1346 {"<!ENTITY % p1 'first declaration wins'>" 1347 "<!ENTITY % p1 '%p1;'>", 1348 XML_STATUS_OK}, 1349 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_OK}, 1350 }; 1351 1352 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { 1353 const char *const doc = cases[i].doc; 1354 const enum XML_Status expectedStatus = cases[i].expectedStatus; 1355 set_subtest("%s", doc); 1356 1357 XML_Parser parser = XML_ParserCreate(NULL); 1358 assert_true(parser != NULL); 1359 1360 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); 1361 assert_true(ext_parser != NULL); 1362 1363 const enum XML_Status actualStatus 1364 = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE); 1365 1366 assert_true(actualStatus == expectedStatus); 1367 if (actualStatus != XML_STATUS_OK) { 1368 assert_true(XML_GetErrorCode(ext_parser) 1369 == XML_ERROR_RECURSIVE_ENTITY_REF); 1370 } 1371 1372 XML_ParserFree(ext_parser); 1373 XML_ParserFree(parser); 1374 } 1375 } 1376 END_TEST 1377 1378 /* Test incomplete external entities are faulted */ 1379 START_TEST(test_ext_entity_invalid_parse) { 1380 const char *text = "<!DOCTYPE doc [\n" 1381 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1382 "]>\n" 1383 "<doc>&en;</doc>"; 1384 const ExtFaults faults[] 1385 = {{"<", "Incomplete element declaration not faulted", NULL, 1386 XML_ERROR_UNCLOSED_TOKEN}, 1387 {"<\xe2\x82", /* First two bytes of a three-byte char */ 1388 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, 1389 {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL, 1390 XML_ERROR_PARTIAL_CHAR}, 1391 {NULL, NULL, NULL, XML_ERROR_NONE}}; 1392 const ExtFaults *fault = faults; 1393 1394 for (; fault->parse_text != NULL; fault++) { 1395 set_subtest("\"%s\"", fault->parse_text); 1396 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1397 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1398 XML_SetUserData(g_parser, (void *)fault); 1399 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1400 "Parser did not report external entity error"); 1401 XML_ParserReset(g_parser, NULL); 1402 } 1403 } 1404 END_TEST 1405 1406 /* Regression test for SF bug #483514. */ 1407 START_TEST(test_dtd_default_handling) { 1408 const char *text = "<!DOCTYPE doc [\n" 1409 "<!ENTITY e SYSTEM 'http://example.org/e'>\n" 1410 "<!NOTATION n SYSTEM 'http://example.org/n'>\n" 1411 "<!ELEMENT doc EMPTY>\n" 1412 "<!ATTLIST doc a CDATA #IMPLIED>\n" 1413 "<?pi in dtd?>\n" 1414 "<!--comment in dtd-->\n" 1415 "]><doc/>"; 1416 1417 XML_SetDefaultHandler(g_parser, accumulate_characters); 1418 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 1419 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 1420 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 1421 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); 1422 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 1423 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 1424 XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler); 1425 XML_SetCommentHandler(g_parser, dummy_comment_handler); 1426 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler); 1427 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler); 1428 run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>")); 1429 } 1430 END_TEST 1431 1432 /* Test handling of attribute declarations */ 1433 START_TEST(test_dtd_attr_handling) { 1434 const char *prolog = "<!DOCTYPE doc [\n" 1435 "<!ELEMENT doc EMPTY>\n"; 1436 AttTest attr_data[] 1437 = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n" 1438 "]>" 1439 "<doc a='two'/>", 1440 XCS("doc"), XCS("a"), 1441 XCS("(one|two|three)"), /* Extraneous spaces will be removed */ 1442 NULL, XML_TRUE}, 1443 {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n" 1444 "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n" 1445 "]>" 1446 "<doc/>", 1447 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE}, 1448 {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n" 1449 "]>" 1450 "<doc/>", 1451 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE}, 1452 {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n" 1453 "]>" 1454 "<doc/>", 1455 XCS("doc"), XCS("a"), XCS("CDATA"), 1456 #ifdef XML_UNICODE 1457 XCS("\x06f2"), 1458 #else 1459 XCS("\xdb\xb2"), 1460 #endif 1461 XML_FALSE}, 1462 {NULL, NULL, NULL, NULL, NULL, XML_FALSE}}; 1463 AttTest *test; 1464 1465 for (test = attr_data; test->definition != NULL; test++) { 1466 set_subtest("%s", test->definition); 1467 XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler); 1468 XML_SetUserData(g_parser, test); 1469 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog), 1470 XML_FALSE) 1471 == XML_STATUS_ERROR) 1472 xml_failure(g_parser); 1473 if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition, 1474 (int)strlen(test->definition), XML_TRUE) 1475 == XML_STATUS_ERROR) 1476 xml_failure(g_parser); 1477 XML_ParserReset(g_parser, NULL); 1478 } 1479 } 1480 END_TEST 1481 1482 /* See related SF bug #673791. 1483 When namespace processing is enabled, setting the namespace URI for 1484 a prefix is not allowed; this test ensures that it *is* allowed 1485 when namespace processing is not enabled. 1486 (See Namespaces in XML, section 2.) 1487 */ 1488 START_TEST(test_empty_ns_without_namespaces) { 1489 const char *text = "<doc xmlns:prefix='http://example.org/'>\n" 1490 " <e xmlns:prefix=''/>\n" 1491 "</doc>"; 1492 1493 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1494 == XML_STATUS_ERROR) 1495 xml_failure(g_parser); 1496 } 1497 END_TEST 1498 1499 /* Regression test for SF bug #824420. 1500 Checks that an xmlns:prefix attribute set in an attribute's default 1501 value isn't misinterpreted. 1502 */ 1503 START_TEST(test_ns_in_attribute_default_without_namespaces) { 1504 const char *text = "<!DOCTYPE e:element [\n" 1505 " <!ATTLIST e:element\n" 1506 " xmlns:e CDATA 'http://example.org/'>\n" 1507 " ]>\n" 1508 "<e:element/>"; 1509 1510 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1511 == XML_STATUS_ERROR) 1512 xml_failure(g_parser); 1513 } 1514 END_TEST 1515 1516 /* Regression test for SF bug #1515266: missing check of stopped 1517 parser in doContext() 'for' loop. */ 1518 START_TEST(test_stop_parser_between_char_data_calls) { 1519 /* The sample data must be big enough that there are two calls to 1520 the character data handler from within the inner "for" loop of 1521 the XML_TOK_DATA_CHARS case in doContent(), and the character 1522 handler must stop the parser and clear the character data 1523 handler. 1524 */ 1525 const char *text = long_character_data_text; 1526 1527 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1528 g_resumable = XML_FALSE; 1529 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1530 != XML_STATUS_ERROR) 1531 xml_failure(g_parser); 1532 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED) 1533 xml_failure(g_parser); 1534 } 1535 END_TEST 1536 1537 /* Regression test for SF bug #1515266: missing check of stopped 1538 parser in doContext() 'for' loop. */ 1539 START_TEST(test_suspend_parser_between_char_data_calls) { 1540 /* The sample data must be big enough that there are two calls to 1541 the character data handler from within the inner "for" loop of 1542 the XML_TOK_DATA_CHARS case in doContent(), and the character 1543 handler must stop the parser and clear the character data 1544 handler. 1545 */ 1546 const char *text = long_character_data_text; 1547 1548 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1549 g_resumable = XML_TRUE; 1550 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 1551 // we won't know exactly how much input we actually managed to give Expat. 1552 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 1553 != XML_STATUS_SUSPENDED) 1554 xml_failure(g_parser); 1555 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 1556 xml_failure(g_parser); 1557 /* Try parsing directly */ 1558 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1559 != XML_STATUS_ERROR) 1560 fail("Attempt to continue parse while suspended not faulted"); 1561 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) 1562 fail("Suspended parse not faulted with correct error"); 1563 } 1564 END_TEST 1565 1566 /* Test repeated calls to XML_StopParser are handled correctly */ 1567 START_TEST(test_repeated_stop_parser_between_char_data_calls) { 1568 const char *text = long_character_data_text; 1569 1570 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1571 g_resumable = XML_FALSE; 1572 g_abortable = XML_FALSE; 1573 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1574 != XML_STATUS_ERROR) 1575 fail("Failed to double-stop parser"); 1576 1577 XML_ParserReset(g_parser, NULL); 1578 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1579 g_resumable = XML_TRUE; 1580 g_abortable = XML_FALSE; 1581 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 1582 // we won't know exactly how much input we actually managed to give Expat. 1583 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 1584 != XML_STATUS_SUSPENDED) 1585 fail("Failed to double-suspend parser"); 1586 1587 XML_ParserReset(g_parser, NULL); 1588 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1589 g_resumable = XML_TRUE; 1590 g_abortable = XML_TRUE; 1591 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1592 != XML_STATUS_ERROR) 1593 fail("Failed to suspend-abort parser"); 1594 } 1595 END_TEST 1596 1597 START_TEST(test_good_cdata_ascii) { 1598 const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>"; 1599 const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>"); 1600 1601 CharData storage; 1602 CharData_Init(&storage); 1603 XML_SetUserData(g_parser, &storage); 1604 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1605 /* Add start and end handlers for coverage */ 1606 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler); 1607 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler); 1608 1609 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1610 == XML_STATUS_ERROR) 1611 xml_failure(g_parser); 1612 CharData_CheckXMLChars(&storage, expected); 1613 1614 /* Try again, this time with a default handler */ 1615 XML_ParserReset(g_parser, NULL); 1616 CharData_Init(&storage); 1617 XML_SetUserData(g_parser, &storage); 1618 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1619 XML_SetDefaultHandler(g_parser, dummy_default_handler); 1620 1621 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1622 == XML_STATUS_ERROR) 1623 xml_failure(g_parser); 1624 CharData_CheckXMLChars(&storage, expected); 1625 } 1626 END_TEST 1627 1628 START_TEST(test_good_cdata_utf16) { 1629 /* Test data is: 1630 * <?xml version='1.0' encoding='utf-16'?> 1631 * <a><![CDATA[hello]]></a> 1632 */ 1633 const char text[] 1634 = "\0<\0?\0x\0m\0l\0" 1635 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1636 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1637 "1\0" 1638 "6\0'" 1639 "\0?\0>\0\n" 1640 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>"; 1641 const XML_Char *expected = XCS("hello"); 1642 1643 CharData storage; 1644 CharData_Init(&storage); 1645 XML_SetUserData(g_parser, &storage); 1646 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1647 1648 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1649 == XML_STATUS_ERROR) 1650 xml_failure(g_parser); 1651 CharData_CheckXMLChars(&storage, expected); 1652 } 1653 END_TEST 1654 1655 START_TEST(test_good_cdata_utf16_le) { 1656 /* Test data is: 1657 * <?xml version='1.0' encoding='utf-16'?> 1658 * <a><![CDATA[hello]]></a> 1659 */ 1660 const char text[] 1661 = "<\0?\0x\0m\0l\0" 1662 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1663 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1664 "1\0" 1665 "6\0'" 1666 "\0?\0>\0\n" 1667 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0"; 1668 const XML_Char *expected = XCS("hello"); 1669 1670 CharData storage; 1671 CharData_Init(&storage); 1672 XML_SetUserData(g_parser, &storage); 1673 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1674 1675 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1676 == XML_STATUS_ERROR) 1677 xml_failure(g_parser); 1678 CharData_CheckXMLChars(&storage, expected); 1679 } 1680 END_TEST 1681 1682 /* Test UTF16 conversion of a long cdata string */ 1683 1684 /* 16 characters: handy macro to reduce visual clutter */ 1685 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P" 1686 1687 START_TEST(test_long_cdata_utf16) { 1688 /* Test data is: 1689 * <?xlm version='1.0' encoding='utf-16'?> 1690 * <a><![CDATA[ 1691 * ABCDEFGHIJKLMNOP 1692 * ]]></a> 1693 */ 1694 const char text[] 1695 = "\0<\0?\0x\0m\0l\0 " 1696 "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 " 1697 "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>" 1698 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1699 /* 64 characters per line */ 1700 /* clang-format off */ 1701 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1702 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1703 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1704 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1705 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1706 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1707 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1708 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1709 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1710 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1711 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1712 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1713 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1714 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1715 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1716 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1717 A_TO_P_IN_UTF16 1718 /* clang-format on */ 1719 "\0]\0]\0>\0<\0/\0a\0>"; 1720 const XML_Char *expected = 1721 /* clang-format off */ 1722 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1723 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1724 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1725 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1726 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1727 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1728 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1729 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1730 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1731 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1732 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1733 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1734 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1735 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1736 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1737 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1738 XCS("ABCDEFGHIJKLMNOP"); 1739 /* clang-format on */ 1740 CharData storage; 1741 void *buffer; 1742 1743 CharData_Init(&storage); 1744 XML_SetUserData(g_parser, &storage); 1745 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1746 buffer = XML_GetBuffer(g_parser, sizeof(text) - 1); 1747 if (buffer == NULL) 1748 fail("Could not allocate parse buffer"); 1749 assert(buffer != NULL); 1750 memcpy(buffer, text, sizeof(text) - 1); 1751 if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) 1752 xml_failure(g_parser); 1753 CharData_CheckXMLChars(&storage, expected); 1754 } 1755 END_TEST 1756 1757 /* Test handling of multiple unit UTF-16 characters */ 1758 START_TEST(test_multichar_cdata_utf16) { 1759 /* Test data is: 1760 * <?xml version='1.0' encoding='utf-16'?> 1761 * <a><![CDATA[{MINIM}{CROTCHET}]]></a> 1762 * 1763 * where {MINIM} is U+1d15e (a minim or half-note) 1764 * UTF-16: 0xd834 0xdd5e 1765 * UTF-8: 0xf0 0x9d 0x85 0x9e 1766 * and {CROTCHET} is U+1d15f (a crotchet or quarter-note) 1767 * UTF-16: 0xd834 0xdd5f 1768 * UTF-8: 0xf0 0x9d 0x85 0x9f 1769 */ 1770 const char text[] = "\0<\0?\0x\0m\0l\0" 1771 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1772 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1773 "1\0" 1774 "6\0'" 1775 "\0?\0>\0\n" 1776 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1777 "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f" 1778 "\0]\0]\0>\0<\0/\0a\0>"; 1779 #ifdef XML_UNICODE 1780 const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f"); 1781 #else 1782 const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f"); 1783 #endif 1784 CharData storage; 1785 1786 CharData_Init(&storage); 1787 XML_SetUserData(g_parser, &storage); 1788 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1789 1790 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1791 == XML_STATUS_ERROR) 1792 xml_failure(g_parser); 1793 CharData_CheckXMLChars(&storage, expected); 1794 } 1795 END_TEST 1796 1797 /* Test that an element name with a UTF-16 surrogate pair is rejected */ 1798 START_TEST(test_utf16_bad_surrogate_pair) { 1799 /* Test data is: 1800 * <?xml version='1.0' encoding='utf-16'?> 1801 * <a><![CDATA[{BADLINB}]]></a> 1802 * 1803 * where {BADLINB} is U+10000 (the first Linear B character) 1804 * with the UTF-16 surrogate pair in the wrong order, i.e. 1805 * 0xdc00 0xd800 1806 */ 1807 const char text[] = "\0<\0?\0x\0m\0l\0" 1808 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1809 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1810 "1\0" 1811 "6\0'" 1812 "\0?\0>\0\n" 1813 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1814 "\xdc\x00\xd8\x00" 1815 "\0]\0]\0>\0<\0/\0a\0>"; 1816 1817 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1818 != XML_STATUS_ERROR) 1819 fail("Reversed UTF-16 surrogate pair not faulted"); 1820 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 1821 xml_failure(g_parser); 1822 } 1823 END_TEST 1824 1825 START_TEST(test_bad_cdata) { 1826 struct CaseData { 1827 const char *text; 1828 enum XML_Error expectedError; 1829 }; 1830 1831 struct CaseData cases[] 1832 = {{"<a><", XML_ERROR_UNCLOSED_TOKEN}, 1833 {"<a><!", XML_ERROR_UNCLOSED_TOKEN}, 1834 {"<a><![", XML_ERROR_UNCLOSED_TOKEN}, 1835 {"<a><![C", XML_ERROR_UNCLOSED_TOKEN}, 1836 {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN}, 1837 {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN}, 1838 {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN}, 1839 {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN}, 1840 1841 {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1842 {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1843 {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1844 1845 {"<a><!<a/>", XML_ERROR_INVALID_TOKEN}, 1846 {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ 1847 {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ 1848 {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN}, 1849 {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN}, 1850 {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN}, 1851 {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN}, 1852 1853 {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1854 {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1855 {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}}; 1856 1857 size_t i = 0; 1858 for (; i < sizeof(cases) / sizeof(struct CaseData); i++) { 1859 set_subtest("%s", cases[i].text); 1860 const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES( 1861 g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE); 1862 const enum XML_Error actualError = XML_GetErrorCode(g_parser); 1863 1864 assert(actualStatus == XML_STATUS_ERROR); 1865 1866 if (actualError != cases[i].expectedError) { 1867 char message[100]; 1868 snprintf(message, sizeof(message), 1869 "Expected error %d but got error %d for case %u: \"%s\"\n", 1870 cases[i].expectedError, actualError, (unsigned int)i + 1, 1871 cases[i].text); 1872 fail(message); 1873 } 1874 1875 XML_ParserReset(g_parser, NULL); 1876 } 1877 } 1878 END_TEST 1879 1880 /* Test failures in UTF-16 CDATA */ 1881 START_TEST(test_bad_cdata_utf16) { 1882 struct CaseData { 1883 size_t text_bytes; 1884 const char *text; 1885 enum XML_Error expected_error; 1886 }; 1887 1888 const char prolog[] = "\0<\0?\0x\0m\0l\0" 1889 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1890 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1891 "1\0" 1892 "6\0'" 1893 "\0?\0>\0\n" 1894 "\0<\0a\0>"; 1895 struct CaseData cases[] = { 1896 {1, "\0", XML_ERROR_UNCLOSED_TOKEN}, 1897 {2, "\0<", XML_ERROR_UNCLOSED_TOKEN}, 1898 {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN}, 1899 {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN}, 1900 {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN}, 1901 {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN}, 1902 {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN}, 1903 {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN}, 1904 {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN}, 1905 {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN}, 1906 {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN}, 1907 {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN}, 1908 {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN}, 1909 {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN}, 1910 {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN}, 1911 {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN}, 1912 {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN}, 1913 {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1914 {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1915 {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1916 /* Now add a four-byte UTF-16 character */ 1917 {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8", 1918 XML_ERROR_UNCLOSED_CDATA_SECTION}, 1919 {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR}, 1920 {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd", 1921 XML_ERROR_PARTIAL_CHAR}, 1922 {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e", 1923 XML_ERROR_UNCLOSED_CDATA_SECTION}}; 1924 size_t i; 1925 1926 for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) { 1927 set_subtest("case %lu", (long unsigned)(i + 1)); 1928 enum XML_Status actual_status; 1929 enum XML_Error actual_error; 1930 1931 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1, 1932 XML_FALSE) 1933 == XML_STATUS_ERROR) 1934 xml_failure(g_parser); 1935 actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text, 1936 (int)cases[i].text_bytes, XML_TRUE); 1937 assert(actual_status == XML_STATUS_ERROR); 1938 actual_error = XML_GetErrorCode(g_parser); 1939 if (actual_error != cases[i].expected_error) { 1940 char message[1024]; 1941 1942 snprintf(message, sizeof(message), 1943 "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR 1944 ") for case %lu\n", 1945 cases[i].expected_error, 1946 XML_ErrorString(cases[i].expected_error), actual_error, 1947 XML_ErrorString(actual_error), (long unsigned)(i + 1)); 1948 fail(message); 1949 } 1950 XML_ParserReset(g_parser, NULL); 1951 } 1952 } 1953 END_TEST 1954 1955 /* Test stopping the parser in cdata handler */ 1956 START_TEST(test_stop_parser_between_cdata_calls) { 1957 const char *text = long_cdata_text; 1958 1959 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1960 g_resumable = XML_FALSE; 1961 expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler"); 1962 } 1963 END_TEST 1964 1965 /* Test suspending the parser in cdata handler */ 1966 START_TEST(test_suspend_parser_between_cdata_calls) { 1967 if (g_chunkSize != 0) { 1968 // this test does not use SINGLE_BYTES, because of suspension 1969 return; 1970 } 1971 1972 const char *text = long_cdata_text; 1973 enum XML_Status result; 1974 1975 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1976 g_resumable = XML_TRUE; 1977 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 1978 // we won't know exactly how much input we actually managed to give Expat. 1979 result = XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE); 1980 if (result != XML_STATUS_SUSPENDED) { 1981 if (result == XML_STATUS_ERROR) 1982 xml_failure(g_parser); 1983 fail("Parse not suspended in CDATA handler"); 1984 } 1985 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 1986 xml_failure(g_parser); 1987 } 1988 END_TEST 1989 1990 /* Test memory allocation functions */ 1991 START_TEST(test_memory_allocation) { 1992 char *buffer = (char *)XML_MemMalloc(g_parser, 256); 1993 char *p; 1994 1995 if (buffer == NULL) { 1996 fail("Allocation failed"); 1997 } else { 1998 /* Try writing to memory; some OSes try to cheat! */ 1999 buffer[0] = 'T'; 2000 buffer[1] = 'E'; 2001 buffer[2] = 'S'; 2002 buffer[3] = 'T'; 2003 buffer[4] = '\0'; 2004 if (strcmp(buffer, "TEST") != 0) { 2005 fail("Memory not writable"); 2006 } else { 2007 p = (char *)XML_MemRealloc(g_parser, buffer, 512); 2008 if (p == NULL) { 2009 fail("Reallocation failed"); 2010 } else { 2011 /* Write again, just to be sure */ 2012 buffer = p; 2013 buffer[0] = 'V'; 2014 if (strcmp(buffer, "VEST") != 0) { 2015 fail("Reallocated memory not writable"); 2016 } 2017 } 2018 } 2019 XML_MemFree(g_parser, buffer); 2020 } 2021 } 2022 END_TEST 2023 2024 /* Test XML_DefaultCurrent() passes handling on correctly */ 2025 START_TEST(test_default_current) { 2026 const char *text = "<doc>hell]</doc>"; 2027 const char *entity_text = "<!DOCTYPE doc [\n" 2028 "<!ENTITY entity '%'>\n" 2029 "]>\n" 2030 "<doc>&entity;</doc>"; 2031 2032 set_subtest("with defaulting"); 2033 { 2034 struct handler_record_list storage; 2035 storage.count = 0; 2036 XML_SetDefaultHandler(g_parser, record_default_handler); 2037 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2038 XML_SetUserData(g_parser, &storage); 2039 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2040 == XML_STATUS_ERROR) 2041 xml_failure(g_parser); 2042 int i = 0; 2043 assert_record_handler_called(&storage, i++, "record_default_handler", 5); 2044 // we should have gotten one or more cdata callbacks, totaling 5 chars 2045 int cdata_len_remaining = 5; 2046 while (cdata_len_remaining > 0) { 2047 const struct handler_record_entry *c_entry 2048 = handler_record_get(&storage, i++); 2049 assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0); 2050 assert_true(c_entry->arg > 0); 2051 assert_true(c_entry->arg <= cdata_len_remaining); 2052 cdata_len_remaining -= c_entry->arg; 2053 // default handler must follow, with the exact same len argument. 2054 assert_record_handler_called(&storage, i++, "record_default_handler", 2055 c_entry->arg); 2056 } 2057 assert_record_handler_called(&storage, i++, "record_default_handler", 6); 2058 assert_true(storage.count == i); 2059 } 2060 2061 /* Again, without the defaulting */ 2062 set_subtest("no defaulting"); 2063 { 2064 struct handler_record_list storage; 2065 storage.count = 0; 2066 XML_ParserReset(g_parser, NULL); 2067 XML_SetDefaultHandler(g_parser, record_default_handler); 2068 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); 2069 XML_SetUserData(g_parser, &storage); 2070 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2071 == XML_STATUS_ERROR) 2072 xml_failure(g_parser); 2073 int i = 0; 2074 assert_record_handler_called(&storage, i++, "record_default_handler", 5); 2075 // we should have gotten one or more cdata callbacks, totaling 5 chars 2076 int cdata_len_remaining = 5; 2077 while (cdata_len_remaining > 0) { 2078 const struct handler_record_entry *c_entry 2079 = handler_record_get(&storage, i++); 2080 assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0); 2081 assert_true(c_entry->arg > 0); 2082 assert_true(c_entry->arg <= cdata_len_remaining); 2083 cdata_len_remaining -= c_entry->arg; 2084 } 2085 assert_record_handler_called(&storage, i++, "record_default_handler", 6); 2086 assert_true(storage.count == i); 2087 } 2088 2089 /* Now with an internal entity to complicate matters */ 2090 set_subtest("with internal entity"); 2091 { 2092 struct handler_record_list storage; 2093 storage.count = 0; 2094 XML_ParserReset(g_parser, NULL); 2095 XML_SetDefaultHandler(g_parser, record_default_handler); 2096 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2097 XML_SetUserData(g_parser, &storage); 2098 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2099 XML_TRUE) 2100 == XML_STATUS_ERROR) 2101 xml_failure(g_parser); 2102 /* The default handler suppresses the entity */ 2103 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2104 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2105 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2106 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2107 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2108 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2109 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2110 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2111 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2112 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2113 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2114 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2115 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2116 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2117 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2118 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2119 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2120 assert_record_handler_called(&storage, 17, "record_default_handler", 8); 2121 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2122 assert_true(storage.count == 19); 2123 } 2124 2125 /* Again, with a skip handler */ 2126 set_subtest("with skip handler"); 2127 { 2128 struct handler_record_list storage; 2129 storage.count = 0; 2130 XML_ParserReset(g_parser, NULL); 2131 XML_SetDefaultHandler(g_parser, record_default_handler); 2132 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2133 XML_SetSkippedEntityHandler(g_parser, record_skip_handler); 2134 XML_SetUserData(g_parser, &storage); 2135 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2136 XML_TRUE) 2137 == XML_STATUS_ERROR) 2138 xml_failure(g_parser); 2139 /* The default handler suppresses the entity */ 2140 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2141 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2142 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2143 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2144 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2145 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2146 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2147 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2148 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2149 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2150 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2151 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2152 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2153 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2154 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2155 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2156 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2157 assert_record_handler_called(&storage, 17, "record_skip_handler", 0); 2158 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2159 assert_true(storage.count == 19); 2160 } 2161 2162 /* This time, allow the entity through */ 2163 set_subtest("allow entity"); 2164 { 2165 struct handler_record_list storage; 2166 storage.count = 0; 2167 XML_ParserReset(g_parser, NULL); 2168 XML_SetDefaultHandlerExpand(g_parser, record_default_handler); 2169 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2170 XML_SetUserData(g_parser, &storage); 2171 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2172 XML_TRUE) 2173 == XML_STATUS_ERROR) 2174 xml_failure(g_parser); 2175 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2176 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2177 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2178 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2179 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2180 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2181 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2182 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2183 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2184 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2185 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2186 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2187 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2188 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2189 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2190 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2191 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2192 assert_record_handler_called(&storage, 17, "record_cdata_handler", 1); 2193 assert_record_handler_called(&storage, 18, "record_default_handler", 1); 2194 assert_record_handler_called(&storage, 19, "record_default_handler", 6); 2195 assert_true(storage.count == 20); 2196 } 2197 2198 /* Finally, without passing the cdata to the default handler */ 2199 set_subtest("not passing cdata"); 2200 { 2201 struct handler_record_list storage; 2202 storage.count = 0; 2203 XML_ParserReset(g_parser, NULL); 2204 XML_SetDefaultHandlerExpand(g_parser, record_default_handler); 2205 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); 2206 XML_SetUserData(g_parser, &storage); 2207 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2208 XML_TRUE) 2209 == XML_STATUS_ERROR) 2210 xml_failure(g_parser); 2211 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2212 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2213 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2214 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2215 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2216 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2217 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2218 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2219 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2220 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2221 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2222 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2223 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2224 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2225 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2226 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2227 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2228 assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler", 2229 1); 2230 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2231 assert_true(storage.count == 19); 2232 } 2233 } 2234 END_TEST 2235 2236 /* Test DTD element parsing code paths */ 2237 START_TEST(test_dtd_elements) { 2238 const char *text = "<!DOCTYPE doc [\n" 2239 "<!ELEMENT doc (chapter)>\n" 2240 "<!ELEMENT chapter (#PCDATA)>\n" 2241 "]>\n" 2242 "<doc><chapter>Wombats are go</chapter></doc>"; 2243 2244 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 2245 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2246 == XML_STATUS_ERROR) 2247 xml_failure(g_parser); 2248 } 2249 END_TEST 2250 2251 static void XMLCALL 2252 element_decl_check_model(void *userData, const XML_Char *name, 2253 XML_Content *model) { 2254 UNUSED_P(userData); 2255 uint32_t errorFlags = 0; 2256 2257 /* Expected model array structure is this: 2258 * [0] (type 6, quant 0) 2259 * [1] (type 5, quant 0) 2260 * [3] (type 4, quant 0, name "bar") 2261 * [4] (type 4, quant 0, name "foo") 2262 * [5] (type 4, quant 3, name "xyz") 2263 * [2] (type 4, quant 2, name "zebra") 2264 */ 2265 errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0)); 2266 errorFlags |= ((model != NULL) ? 0 : (1u << 1)); 2267 2268 if (model != NULL) { 2269 errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2)); 2270 errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3)); 2271 errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4)); 2272 errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5)); 2273 errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6)); 2274 2275 errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7)); 2276 errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8)); 2277 errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9)); 2278 errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10)); 2279 errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11)); 2280 2281 errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12)); 2282 errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13)); 2283 errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14)); 2284 errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15)); 2285 errorFlags 2286 |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16)); 2287 2288 errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17)); 2289 errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18)); 2290 errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19)); 2291 errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20)); 2292 errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21)); 2293 2294 errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22)); 2295 errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23)); 2296 errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24)); 2297 errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25)); 2298 errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26)); 2299 2300 errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27)); 2301 errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28)); 2302 errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29)); 2303 errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30)); 2304 errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31)); 2305 } 2306 2307 XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags); 2308 XML_FreeContentModel(g_parser, model); 2309 } 2310 2311 START_TEST(test_dtd_elements_nesting) { 2312 // Payload inspired by a test in Perl's XML::Parser 2313 const char *text = "<!DOCTYPE foo [\n" 2314 "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n" 2315 "]>\n" 2316 "<foo/>"; 2317 2318 XML_SetUserData(g_parser, (void *)(uintptr_t)-1); 2319 2320 XML_SetElementDeclHandler(g_parser, element_decl_check_model); 2321 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2322 == XML_STATUS_ERROR) 2323 xml_failure(g_parser); 2324 2325 if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0) 2326 fail("Element declaration model regression detected"); 2327 } 2328 END_TEST 2329 2330 /* Test foreign DTD handling */ 2331 START_TEST(test_set_foreign_dtd) { 2332 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"; 2333 const char *text2 = "<doc>&entity;</doc>"; 2334 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2335 2336 /* Check hash salt is passed through too */ 2337 XML_SetHashSalt(g_parser, 0x12345678); 2338 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2339 XML_SetUserData(g_parser, &test_data); 2340 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2341 /* Add a default handler to exercise more code paths */ 2342 XML_SetDefaultHandler(g_parser, dummy_default_handler); 2343 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2344 fail("Could not set foreign DTD"); 2345 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2346 == XML_STATUS_ERROR) 2347 xml_failure(g_parser); 2348 2349 /* Ensure that trying to set the DTD after parsing has started 2350 * is faulted, even if it's the same setting. 2351 */ 2352 if (XML_UseForeignDTD(g_parser, XML_TRUE) 2353 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) 2354 fail("Failed to reject late foreign DTD setting"); 2355 /* Ditto for the hash salt */ 2356 if (XML_SetHashSalt(g_parser, 0x23456789)) 2357 fail("Failed to reject late hash salt change"); 2358 2359 /* Now finish the parse */ 2360 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2361 == XML_STATUS_ERROR) 2362 xml_failure(g_parser); 2363 } 2364 END_TEST 2365 2366 /* Test foreign DTD handling with a failing NotStandalone handler */ 2367 START_TEST(test_foreign_dtd_not_standalone) { 2368 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2369 "<doc>&entity;</doc>"; 2370 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2371 2372 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2373 XML_SetUserData(g_parser, &test_data); 2374 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2375 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 2376 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2377 fail("Could not set foreign DTD"); 2378 expect_failure(text, XML_ERROR_NOT_STANDALONE, 2379 "NotStandalonehandler failed to reject"); 2380 } 2381 END_TEST 2382 2383 /* Test invalid character in a foreign DTD is faulted */ 2384 START_TEST(test_invalid_foreign_dtd) { 2385 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2386 "<doc>&entity;</doc>"; 2387 ExtFaults test_data 2388 = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN}; 2389 2390 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2391 XML_SetUserData(g_parser, &test_data); 2392 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 2393 XML_UseForeignDTD(g_parser, XML_TRUE); 2394 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 2395 "Bad DTD should not have been accepted"); 2396 } 2397 END_TEST 2398 2399 /* Test foreign DTD use with a doctype */ 2400 START_TEST(test_foreign_dtd_with_doctype) { 2401 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n" 2402 "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n"; 2403 const char *text2 = "<doc>&entity;</doc>"; 2404 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2405 2406 /* Check hash salt is passed through too */ 2407 XML_SetHashSalt(g_parser, 0x12345678); 2408 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2409 XML_SetUserData(g_parser, &test_data); 2410 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2411 /* Add a default handler to exercise more code paths */ 2412 XML_SetDefaultHandler(g_parser, dummy_default_handler); 2413 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2414 fail("Could not set foreign DTD"); 2415 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2416 == XML_STATUS_ERROR) 2417 xml_failure(g_parser); 2418 2419 /* Ensure that trying to set the DTD after parsing has started 2420 * is faulted, even if it's the same setting. 2421 */ 2422 if (XML_UseForeignDTD(g_parser, XML_TRUE) 2423 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) 2424 fail("Failed to reject late foreign DTD setting"); 2425 /* Ditto for the hash salt */ 2426 if (XML_SetHashSalt(g_parser, 0x23456789)) 2427 fail("Failed to reject late hash salt change"); 2428 2429 /* Now finish the parse */ 2430 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2431 == XML_STATUS_ERROR) 2432 xml_failure(g_parser); 2433 } 2434 END_TEST 2435 2436 /* Test XML_UseForeignDTD with no external subset present */ 2437 START_TEST(test_foreign_dtd_without_external_subset) { 2438 const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n" 2439 "<doc>&foo;</doc>"; 2440 2441 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2442 XML_SetUserData(g_parser, NULL); 2443 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); 2444 XML_UseForeignDTD(g_parser, XML_TRUE); 2445 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2446 == XML_STATUS_ERROR) 2447 xml_failure(g_parser); 2448 } 2449 END_TEST 2450 2451 START_TEST(test_empty_foreign_dtd) { 2452 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2453 "<doc>&entity;</doc>"; 2454 2455 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2456 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); 2457 XML_UseForeignDTD(g_parser, XML_TRUE); 2458 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 2459 "Undefined entity not faulted"); 2460 } 2461 END_TEST 2462 2463 /* Test XML Base is set and unset appropriately */ 2464 START_TEST(test_set_base) { 2465 const XML_Char *old_base; 2466 const XML_Char *new_base = XCS("/local/file/name.xml"); 2467 2468 old_base = XML_GetBase(g_parser); 2469 if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK) 2470 fail("Unable to set base"); 2471 if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0) 2472 fail("Base setting not correct"); 2473 if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK) 2474 fail("Unable to NULL base"); 2475 if (XML_GetBase(g_parser) != NULL) 2476 fail("Base setting not nulled"); 2477 XML_SetBase(g_parser, old_base); 2478 } 2479 END_TEST 2480 2481 /* Test attribute counts, indexing, etc */ 2482 START_TEST(test_attributes) { 2483 const char *text = "<!DOCTYPE doc [\n" 2484 "<!ELEMENT doc (tag)>\n" 2485 "<!ATTLIST doc id ID #REQUIRED>\n" 2486 "]>" 2487 "<doc a='1' id='one' b='2'>" 2488 "<tag c='3'/>" 2489 "</doc>"; 2490 AttrInfo doc_info[] = {{XCS("a"), XCS("1")}, 2491 {XCS("b"), XCS("2")}, 2492 {XCS("id"), XCS("one")}, 2493 {NULL, NULL}}; 2494 AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}}; 2495 ElementInfo info[] = {{XCS("doc"), 3, 0, XCS("id"), doc_info}, 2496 {XCS("tag"), 1, 0, NULL, tag_info}, 2497 {NULL, 0, 0, NULL, NULL}}; 2498 2499 XML_Parser parser = XML_ParserCreate(NULL); 2500 assert_true(parser != NULL); 2501 ParserAndElementInfo parserAndElementInfos = { 2502 parser, 2503 info, 2504 }; 2505 2506 XML_SetStartElementHandler(parser, counting_start_element_handler); 2507 XML_SetUserData(parser, &parserAndElementInfos); 2508 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2509 == XML_STATUS_ERROR) 2510 xml_failure(parser); 2511 2512 XML_ParserFree(parser); 2513 } 2514 END_TEST 2515 2516 START_TEST(test_duplicate_cdata_attribute) { 2517 /* 2518 https://www.w3.org/TR/xml/#attdecls 2519 2520 Test the following statement from the linked specification: 2521 When more than one definition is provided for the same attribute of a given 2522 element type, the first declaration is binding and later declarations are 2523 ignored. 2524 */ 2525 2526 const char *text 2527 = "<!DOCTYPE doc [\n" 2528 " <!ATTLIST doc attribute CDATA 'expected' attribute CDATA 'ignored'>\n" 2529 "]>\n" 2530 "<doc/>\n"; 2531 AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected")}, {NULL, NULL}}; 2532 ElementInfo info[] 2533 = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}}; 2534 2535 XML_Parser parser = XML_ParserCreate(NULL); 2536 assert_true(parser != NULL); 2537 2538 ParserAndElementInfo parserAndElementInfos = { 2539 parser, 2540 info, 2541 }; 2542 2543 XML_SetStartElementHandler(parser, counting_start_element_handler); 2544 XML_SetUserData(parser, &parserAndElementInfos); 2545 2546 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2547 != XML_STATUS_OK) 2548 xml_failure(parser); 2549 2550 XML_ParserFree(parser); 2551 } 2552 END_TEST 2553 2554 START_TEST(test_duplicate_id_attribute_1) { 2555 /* 2556 https://www.w3.org/TR/xml/#attdecls 2557 2558 Test the following statement from the linked specification: 2559 When more than one definition is provided for the same attribute of a given 2560 element type, the first declaration is binding and later declarations are 2561 ignored. 2562 */ 2563 2564 const char *text 2565 = "<!DOCTYPE doc [\n" 2566 " <!ATTLIST doc identifier CDATA 'expected' identifier ID #REQUIRED>\n" 2567 "]>\n" 2568 "<doc/>\n"; 2569 AttrInfo doc_info[] = {{XCS("identifier"), XCS("expected")}, {NULL, NULL}}; 2570 ElementInfo info[] 2571 = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}}; 2572 2573 XML_Parser parser = XML_ParserCreate(NULL); 2574 assert_true(parser != NULL); 2575 2576 ParserAndElementInfo parserAndElementInfos = { 2577 parser, 2578 info, 2579 }; 2580 2581 XML_SetStartElementHandler(parser, counting_start_element_handler); 2582 XML_SetUserData(parser, &parserAndElementInfos); 2583 2584 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2585 != XML_STATUS_OK) 2586 xml_failure(parser); 2587 2588 XML_ParserFree(parser); 2589 } 2590 END_TEST 2591 2592 START_TEST(test_duplicate_id_attribute_2) { 2593 /* 2594 https://www.w3.org/TR/xml/#attdecls 2595 2596 Test the following statement from the linked specification: 2597 When more than one definition is provided for the same attribute of a given 2598 element type, the first declaration is binding and later declarations are 2599 ignored. 2600 */ 2601 2602 const char *text 2603 = "<!DOCTYPE doc [\n" 2604 " <!ATTLIST doc identifier ID #REQUIRED identifier CDATA 'unexpected'>\n" 2605 "]>\n" 2606 "<doc/>\n"; 2607 AttrInfo doc_info[] = {{NULL, NULL}}; 2608 2609 ElementInfo info[] 2610 = {{XCS("doc"), 0, 0, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}}; 2611 2612 XML_Parser parser = XML_ParserCreate(NULL); 2613 assert_true(parser != NULL); 2614 2615 ParserAndElementInfo parserAndElementInfos = { 2616 parser, 2617 info, 2618 }; 2619 2620 XML_SetStartElementHandler(parser, counting_start_element_handler); 2621 XML_SetUserData(parser, &parserAndElementInfos); 2622 2623 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2624 != XML_STATUS_OK) 2625 xml_failure(parser); 2626 2627 XML_ParserFree(parser); 2628 } 2629 END_TEST 2630 2631 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl) { 2632 /* 2633 https://www.w3.org/TR/xml/#attdecls 2634 2635 Test the following statement from the linked specification: 2636 When more than one AttlistDecl is provided for a given element type, 2637 the contents of all those provided are merged. 2638 */ 2639 const char *text = "<!DOCTYPE doc [\n" 2640 " <!ATTLIST doc attribute CDATA 'expected'>\n" 2641 " <!ATTLIST doc attribute CDATA 'ignored'>\n" 2642 "]>\n" 2643 "<doc/>\n"; 2644 AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected")}, {NULL, NULL}}; 2645 ElementInfo info[] 2646 = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}}; 2647 2648 XML_Parser parser = XML_ParserCreate(NULL); 2649 assert_true(parser != NULL); 2650 2651 ParserAndElementInfo parserAndElementInfos = { 2652 parser, 2653 info, 2654 }; 2655 2656 XML_SetStartElementHandler(parser, counting_start_element_handler); 2657 XML_SetUserData(parser, &parserAndElementInfos); 2658 2659 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2660 != XML_STATUS_OK) 2661 xml_failure(parser); 2662 2663 XML_ParserFree(parser); 2664 } 2665 END_TEST 2666 2667 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_2) { 2668 /* 2669 https://www.w3.org/TR/xml/#attdecls 2670 2671 Test the following statement from the linked specification: 2672 When more than one AttlistDecl is provided for a given element type, 2673 the contents of all those provided are merged. 2674 */ 2675 const char *text = "<!DOCTYPE doc [\n" 2676 " <!ATTLIST doc attribute CDATA 'expected_doc'>\n" 2677 " <!ATTLIST tag attribute CDATA 'expected_tag'>\n" 2678 " <!ATTLIST doc attribute CDATA 'ignored_doc'>\n" 2679 "]>\n" 2680 "<doc><tag></tag></doc>\n"; 2681 AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected_doc")}, {NULL, NULL}}; 2682 AttrInfo tag_info[] = {{XCS("attribute"), XCS("expected_tag")}, {NULL, NULL}}; 2683 ElementInfo info[] = {{XCS("doc"), 0, 1, NULL, doc_info}, 2684 {XCS("tag"), 0, 1, NULL, tag_info}, 2685 {NULL, 0, 0, NULL, NULL}}; 2686 2687 XML_Parser parser = XML_ParserCreate(NULL); 2688 assert_true(parser != NULL); 2689 2690 ParserAndElementInfo parserAndElementInfos = { 2691 parser, 2692 info, 2693 }; 2694 2695 XML_SetStartElementHandler(parser, counting_start_element_handler); 2696 XML_SetUserData(parser, &parserAndElementInfos); 2697 2698 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2699 != XML_STATUS_OK) 2700 xml_failure(parser); 2701 2702 XML_ParserFree(parser); 2703 } 2704 END_TEST 2705 2706 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_3) { 2707 /* 2708 https://www.w3.org/TR/xml/#attdecls 2709 2710 Test the following statement from the linked specification: 2711 When more than one AttlistDecl is provided for a given element type, 2712 the contents of all those provided are merged. 2713 */ 2714 const char *text 2715 = "<!DOCTYPE doc [\n" 2716 " <!ATTLIST doc attribute CDATA 'expected_doc'>\n" 2717 " <!ATTLIST tag attribute CDATA 'expected_tag'>\n" 2718 " <!ATTLIST doc second_attribute CDATA 'second_expected_doc' attribute CDATA 'ignored_doc'>\n" 2719 "]>\n" 2720 "<doc><tag></tag></doc>\n"; 2721 AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected_doc")}, 2722 {XCS("second_attribute"), XCS("second_expected_doc")}, 2723 {NULL, NULL}}; 2724 AttrInfo tag_info[] = {{XCS("attribute"), XCS("expected_tag")}, {NULL, NULL}}; 2725 ElementInfo info[] = {{XCS("doc"), 0, 2, NULL, doc_info}, 2726 {XCS("tag"), 0, 1, NULL, tag_info}, 2727 {NULL, 0, 0, NULL, NULL}}; 2728 2729 XML_Parser parser = XML_ParserCreate(NULL); 2730 assert_true(parser != NULL); 2731 2732 ParserAndElementInfo parserAndElementInfos = { 2733 parser, 2734 info, 2735 }; 2736 2737 XML_SetStartElementHandler(parser, counting_start_element_handler); 2738 XML_SetUserData(parser, &parserAndElementInfos); 2739 2740 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2741 != XML_STATUS_OK) 2742 xml_failure(parser); 2743 2744 XML_ParserFree(parser); 2745 } 2746 END_TEST 2747 2748 START_TEST(test_duplicate_id_attribute_multiple_attlistdecl) { 2749 /* 2750 https://www.w3.org/TR/xml/#attdecls 2751 2752 Test the following statement from the linked specification: 2753 When more than one AttlistDecl is provided for a given element type, 2754 the contents of all those provided are merged. 2755 */ 2756 const char *text = "<!DOCTYPE doc [\n" 2757 " <!ATTLIST doc identifier ID #REQUIRED>\n" 2758 " <!ATTLIST tag identifier CDATA 'identifier_tag'>\n" 2759 " <!ATTLIST doc identifier CDATA 'ignored'>\n" 2760 "]>\n" 2761 "<doc identifier='doc_identity'><tag></tag></doc>\n"; 2762 AttrInfo doc_info[] 2763 = {{XCS("identifier"), XCS("doc_identity")}, {NULL, NULL}}; 2764 AttrInfo tag_info[] 2765 = {{XCS("identifier"), XCS("identifier_tag")}, {NULL, NULL}}; 2766 ElementInfo info[] = {{XCS("doc"), 1, 0, XCS("identifier"), doc_info}, 2767 {XCS("tag"), 0, 1, NULL, tag_info}, 2768 {NULL, 0, 0, NULL, NULL}}; 2769 2770 XML_Parser parser = XML_ParserCreate(NULL); 2771 assert_true(parser != NULL); 2772 2773 ParserAndElementInfo parserAndElementInfos = { 2774 parser, 2775 info, 2776 }; 2777 2778 XML_SetStartElementHandler(parser, counting_start_element_handler); 2779 XML_SetUserData(parser, &parserAndElementInfos); 2780 2781 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2782 != XML_STATUS_OK) 2783 xml_failure(parser); 2784 2785 XML_ParserFree(parser); 2786 } 2787 END_TEST 2788 2789 /* Test reset works correctly in the middle of processing an internal 2790 * entity. Exercises some obscure code in XML_ParserReset(). 2791 */ 2792 START_TEST(test_reset_in_entity) { 2793 if (g_chunkSize != 0) { 2794 // this test does not use SINGLE_BYTES, because of suspension 2795 return; 2796 } 2797 2798 const char *text = "<!DOCTYPE doc [\n" 2799 "<!ENTITY wombat 'wom'>\n" 2800 "<!ENTITY entity 'hi &wom; there'>\n" 2801 "]>\n" 2802 "<doc>&entity;</doc>"; 2803 XML_ParsingStatus status; 2804 2805 g_resumable = XML_TRUE; 2806 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2807 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 2808 // we won't know exactly how much input we actually managed to give Expat. 2809 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2810 == XML_STATUS_ERROR) 2811 xml_failure(g_parser); 2812 XML_GetParsingStatus(g_parser, &status); 2813 if (status.parsing != XML_SUSPENDED) 2814 fail("Parsing status not SUSPENDED"); 2815 XML_ParserReset(g_parser, NULL); 2816 XML_GetParsingStatus(g_parser, &status); 2817 if (status.parsing != XML_INITIALIZED) 2818 fail("Parsing status doesn't reset to INITIALIZED"); 2819 } 2820 END_TEST 2821 2822 /* Test that resume correctly passes through parse errors */ 2823 START_TEST(test_resume_invalid_parse) { 2824 const char *text = "<doc>Hello</doc"; /* Missing closing wedge */ 2825 2826 g_resumable = XML_TRUE; 2827 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2828 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2829 == XML_STATUS_ERROR) 2830 xml_failure(g_parser); 2831 if (XML_ResumeParser(g_parser) == XML_STATUS_OK) 2832 fail("Resumed invalid parse not faulted"); 2833 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN) 2834 fail("Invalid parse not correctly faulted"); 2835 } 2836 END_TEST 2837 2838 /* Test that re-suspended parses are correctly passed through */ 2839 START_TEST(test_resume_resuspended) { 2840 const char *text = "<doc>Hello<meep/>world</doc>"; 2841 2842 g_resumable = XML_TRUE; 2843 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2844 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2845 == XML_STATUS_ERROR) 2846 xml_failure(g_parser); 2847 g_resumable = XML_TRUE; 2848 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2849 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED) 2850 fail("Resumption not suspended"); 2851 /* This one should succeed and finish up */ 2852 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 2853 xml_failure(g_parser); 2854 } 2855 END_TEST 2856 2857 /* Test that CDATA shows up correctly through a default handler */ 2858 START_TEST(test_cdata_default) { 2859 const char *text = "<doc><![CDATA[Hello\nworld]]></doc>"; 2860 const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>"); 2861 CharData storage; 2862 2863 CharData_Init(&storage); 2864 XML_SetUserData(g_parser, &storage); 2865 XML_SetDefaultHandler(g_parser, accumulate_characters); 2866 2867 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2868 == XML_STATUS_ERROR) 2869 xml_failure(g_parser); 2870 CharData_CheckXMLChars(&storage, expected); 2871 } 2872 END_TEST 2873 2874 /* Test resetting a subordinate parser does exactly nothing */ 2875 START_TEST(test_subordinate_reset) { 2876 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2877 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2878 "<doc>&entity;</doc>"; 2879 2880 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2881 XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter); 2882 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2883 == XML_STATUS_ERROR) 2884 xml_failure(g_parser); 2885 } 2886 END_TEST 2887 2888 /* Test suspending a subordinate parser */ 2889 START_TEST(test_subordinate_suspend) { 2890 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2891 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2892 "<doc>&entity;</doc>"; 2893 2894 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2895 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender); 2896 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2897 == XML_STATUS_ERROR) 2898 xml_failure(g_parser); 2899 } 2900 END_TEST 2901 2902 /* Test suspending a subordinate parser from an XML declaration */ 2903 /* Increases code coverage of the tests */ 2904 2905 START_TEST(test_subordinate_xdecl_suspend) { 2906 const char *text 2907 = "<!DOCTYPE doc [\n" 2908 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n" 2909 "]>\n" 2910 "<doc>&entity;</doc>"; 2911 2912 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2913 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl); 2914 g_resumable = XML_TRUE; 2915 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2916 == XML_STATUS_ERROR) 2917 xml_failure(g_parser); 2918 } 2919 END_TEST 2920 2921 START_TEST(test_subordinate_xdecl_abort) { 2922 const char *text 2923 = "<!DOCTYPE doc [\n" 2924 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n" 2925 "]>\n" 2926 "<doc>&entity;</doc>"; 2927 2928 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2929 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl); 2930 g_resumable = XML_FALSE; 2931 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2932 == XML_STATUS_ERROR) 2933 xml_failure(g_parser); 2934 } 2935 END_TEST 2936 2937 /* Test external entity fault handling with suspension */ 2938 START_TEST(test_ext_entity_invalid_suspended_parse) { 2939 const char *text = "<!DOCTYPE doc [\n" 2940 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2941 "]>\n" 2942 "<doc>&en;</doc>"; 2943 ExtFaults faults[] 2944 = {{"<?xml version='1.0' encoding='us-ascii'?><", 2945 "Incomplete element declaration not faulted", NULL, 2946 XML_ERROR_UNCLOSED_TOKEN}, 2947 {/* First two bytes of a three-byte char */ 2948 "<?xml version='1.0' encoding='utf-8'?>\xe2\x82", 2949 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, 2950 {NULL, NULL, NULL, XML_ERROR_NONE}}; 2951 ExtFaults *fault; 2952 2953 for (fault = &faults[0]; fault->parse_text != NULL; fault++) { 2954 set_subtest("%s", fault->parse_text); 2955 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2956 XML_SetExternalEntityRefHandler(g_parser, 2957 external_entity_suspending_faulter); 2958 XML_SetUserData(g_parser, fault); 2959 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 2960 "Parser did not report external entity error"); 2961 XML_ParserReset(g_parser, NULL); 2962 } 2963 } 2964 END_TEST 2965 2966 /* Test setting an explicit encoding */ 2967 START_TEST(test_explicit_encoding) { 2968 const char *text1 = "<doc>Hello "; 2969 const char *text2 = " World</doc>"; 2970 2971 /* Just check that we can set the encoding to NULL before starting */ 2972 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK) 2973 fail("Failed to initialise encoding to NULL"); 2974 /* Say we are UTF-8 */ 2975 if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK) 2976 fail("Failed to set explicit encoding"); 2977 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2978 == XML_STATUS_ERROR) 2979 xml_failure(g_parser); 2980 /* Try to switch encodings mid-parse */ 2981 if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR) 2982 fail("Allowed encoding change"); 2983 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2984 == XML_STATUS_ERROR) 2985 xml_failure(g_parser); 2986 /* Try now the parse is over */ 2987 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK) 2988 fail("Failed to unset encoding"); 2989 } 2990 END_TEST 2991 2992 /* Test handling of trailing CR (rather than newline) */ 2993 START_TEST(test_trailing_cr) { 2994 const char *text = "<doc>\r"; 2995 int found_cr; 2996 2997 /* Try with a character handler, for code coverage */ 2998 XML_SetCharacterDataHandler(g_parser, cr_cdata_handler); 2999 XML_SetUserData(g_parser, &found_cr); 3000 found_cr = 0; 3001 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3002 == XML_STATUS_OK) 3003 fail("Failed to fault unclosed doc"); 3004 if (found_cr == 0) 3005 fail("Did not catch the carriage return"); 3006 XML_ParserReset(g_parser, NULL); 3007 3008 /* Now with a default handler instead */ 3009 XML_SetDefaultHandler(g_parser, cr_cdata_handler); 3010 XML_SetUserData(g_parser, &found_cr); 3011 found_cr = 0; 3012 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3013 == XML_STATUS_OK) 3014 fail("Failed to fault unclosed doc"); 3015 if (found_cr == 0) 3016 fail("Did not catch default carriage return"); 3017 } 3018 END_TEST 3019 3020 /* Test trailing CR in an external entity parse */ 3021 START_TEST(test_ext_entity_trailing_cr) { 3022 const char *text = "<!DOCTYPE doc [\n" 3023 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 3024 "]>\n" 3025 "<doc>&en;</doc>"; 3026 int found_cr; 3027 3028 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3029 XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher); 3030 XML_SetUserData(g_parser, &found_cr); 3031 found_cr = 0; 3032 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3033 != XML_STATUS_OK) 3034 xml_failure(g_parser); 3035 if (found_cr == 0) 3036 fail("No carriage return found"); 3037 XML_ParserReset(g_parser, NULL); 3038 3039 /* Try again with a different trailing CR */ 3040 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3041 XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher); 3042 XML_SetUserData(g_parser, &found_cr); 3043 found_cr = 0; 3044 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3045 != XML_STATUS_OK) 3046 xml_failure(g_parser); 3047 if (found_cr == 0) 3048 fail("No carriage return found"); 3049 } 3050 END_TEST 3051 3052 /* Test handling of trailing square bracket */ 3053 START_TEST(test_trailing_rsqb) { 3054 const char *text8 = "<doc>]"; 3055 const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000"; 3056 int found_rsqb; 3057 int text8_len = (int)strlen(text8); 3058 3059 XML_SetCharacterDataHandler(g_parser, rsqb_handler); 3060 XML_SetUserData(g_parser, &found_rsqb); 3061 found_rsqb = 0; 3062 if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE) 3063 == XML_STATUS_OK) 3064 fail("Failed to fault unclosed doc"); 3065 if (found_rsqb == 0) 3066 fail("Did not catch the right square bracket"); 3067 3068 /* Try again with a different encoding */ 3069 XML_ParserReset(g_parser, NULL); 3070 XML_SetCharacterDataHandler(g_parser, rsqb_handler); 3071 XML_SetUserData(g_parser, &found_rsqb); 3072 found_rsqb = 0; 3073 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1, 3074 XML_TRUE) 3075 == XML_STATUS_OK) 3076 fail("Failed to fault unclosed doc"); 3077 if (found_rsqb == 0) 3078 fail("Did not catch the right square bracket"); 3079 3080 /* And finally with a default handler */ 3081 XML_ParserReset(g_parser, NULL); 3082 XML_SetDefaultHandler(g_parser, rsqb_handler); 3083 XML_SetUserData(g_parser, &found_rsqb); 3084 found_rsqb = 0; 3085 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1, 3086 XML_TRUE) 3087 == XML_STATUS_OK) 3088 fail("Failed to fault unclosed doc"); 3089 if (found_rsqb == 0) 3090 fail("Did not catch the right square bracket"); 3091 } 3092 END_TEST 3093 3094 /* Test trailing right square bracket in an external entity parse */ 3095 START_TEST(test_ext_entity_trailing_rsqb) { 3096 const char *text = "<!DOCTYPE doc [\n" 3097 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 3098 "]>\n" 3099 "<doc>&en;</doc>"; 3100 int found_rsqb; 3101 3102 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3103 XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher); 3104 XML_SetUserData(g_parser, &found_rsqb); 3105 found_rsqb = 0; 3106 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3107 != XML_STATUS_OK) 3108 xml_failure(g_parser); 3109 if (found_rsqb == 0) 3110 fail("No right square bracket found"); 3111 } 3112 END_TEST 3113 3114 /* Test CDATA handling in an external entity */ 3115 START_TEST(test_ext_entity_good_cdata) { 3116 const char *text = "<!DOCTYPE doc [\n" 3117 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 3118 "]>\n" 3119 "<doc>&en;</doc>"; 3120 3121 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3122 XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii); 3123 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3124 != XML_STATUS_OK) 3125 xml_failure(g_parser); 3126 } 3127 END_TEST 3128 3129 /* Test user parameter settings */ 3130 START_TEST(test_user_parameters) { 3131 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 3132 "<!-- Primary parse -->\n" 3133 "<!DOCTYPE doc SYSTEM 'foo'>\n" 3134 "<doc>&entity;"; 3135 const char *epilog = "<!-- Back to primary parser -->\n" 3136 "</doc>"; 3137 3138 g_comment_count = 0; 3139 g_skip_count = 0; 3140 g_xdecl_count = 0; 3141 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3142 XML_SetXmlDeclHandler(g_parser, xml_decl_handler); 3143 XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker); 3144 XML_SetCommentHandler(g_parser, data_check_comment_handler); 3145 XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler); 3146 XML_UseParserAsHandlerArg(g_parser); 3147 XML_SetUserData(g_parser, (void *)1); 3148 g_handler_data = g_parser; 3149 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 3150 == XML_STATUS_ERROR) 3151 xml_failure(g_parser); 3152 /* Ensure we can't change policy mid-parse */ 3153 if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER)) 3154 fail("Changed param entity parsing policy while parsing"); 3155 if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE) 3156 == XML_STATUS_ERROR) 3157 xml_failure(g_parser); 3158 if (g_comment_count != 3) 3159 fail("Comment handler not invoked enough times"); 3160 if (g_skip_count != 1) 3161 fail("Skip handler not invoked enough times"); 3162 if (g_xdecl_count != 1) 3163 fail("XML declaration handler not invoked"); 3164 } 3165 END_TEST 3166 3167 /* Test that an explicit external entity handler argument replaces 3168 * the parser as the first argument. 3169 * 3170 * We do not call the first parameter to the external entity handler 3171 * 'parser' for once, since the first time the handler is called it 3172 * will actually be a text string. We need to be able to access the 3173 * global 'parser' variable to create our external entity parser from, 3174 * since there are code paths we need to ensure get executed. 3175 */ 3176 START_TEST(test_ext_entity_ref_parameter) { 3177 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 3178 "<!DOCTYPE doc SYSTEM 'foo'>\n" 3179 "<doc>&entity;</doc>"; 3180 3181 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3182 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker); 3183 /* Set a handler arg that is not NULL and not parser (which is 3184 * what NULL would cause to be passed. 3185 */ 3186 XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text); 3187 g_handler_data = text; 3188 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3189 == XML_STATUS_ERROR) 3190 xml_failure(g_parser); 3191 3192 /* Now try again with unset args */ 3193 XML_ParserReset(g_parser, NULL); 3194 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3195 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker); 3196 XML_SetExternalEntityRefHandlerArg(g_parser, NULL); 3197 g_handler_data = g_parser; 3198 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3199 == XML_STATUS_ERROR) 3200 xml_failure(g_parser); 3201 } 3202 END_TEST 3203 3204 /* Test the parsing of an empty string */ 3205 START_TEST(test_empty_parse) { 3206 const char *text = "<doc></doc>"; 3207 const char *partial = "<doc>"; 3208 3209 if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR) 3210 fail("Parsing empty string faulted"); 3211 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 3212 fail("Parsing final empty string not faulted"); 3213 if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS) 3214 fail("Parsing final empty string faulted for wrong reason"); 3215 3216 /* Now try with valid text before the empty end */ 3217 XML_ParserReset(g_parser, NULL); 3218 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 3219 == XML_STATUS_ERROR) 3220 xml_failure(g_parser); 3221 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR) 3222 fail("Parsing final empty string faulted"); 3223 3224 /* Now try with invalid text before the empty end */ 3225 XML_ParserReset(g_parser, NULL); 3226 if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial), 3227 XML_FALSE) 3228 == XML_STATUS_ERROR) 3229 xml_failure(g_parser); 3230 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 3231 fail("Parsing final incomplete empty string not faulted"); 3232 } 3233 END_TEST 3234 3235 /* Test XML_Parse for len < 0 */ 3236 START_TEST(test_negative_len_parse) { 3237 const char *const doc = "<root/>"; 3238 for (int isFinal = 0; isFinal < 2; isFinal++) { 3239 set_subtest("isFinal=%d", isFinal); 3240 3241 XML_Parser parser = XML_ParserCreate(NULL); 3242 3243 if (XML_GetErrorCode(parser) != XML_ERROR_NONE) 3244 fail("There was not supposed to be any initial parse error."); 3245 3246 const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal); 3247 3248 if (status != XML_STATUS_ERROR) 3249 fail("Negative len was expected to fail the parse but did not."); 3250 3251 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT) 3252 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT."); 3253 3254 XML_ParserFree(parser); 3255 } 3256 } 3257 END_TEST 3258 3259 /* Test XML_ParseBuffer for len < 0 */ 3260 START_TEST(test_negative_len_parse_buffer) { 3261 const char *const doc = "<root/>"; 3262 for (int isFinal = 0; isFinal < 2; isFinal++) { 3263 set_subtest("isFinal=%d", isFinal); 3264 3265 XML_Parser parser = XML_ParserCreate(NULL); 3266 3267 if (XML_GetErrorCode(parser) != XML_ERROR_NONE) 3268 fail("There was not supposed to be any initial parse error."); 3269 3270 void *const buffer = XML_GetBuffer(parser, (int)strlen(doc)); 3271 3272 if (buffer == NULL) 3273 fail("XML_GetBuffer failed."); 3274 3275 memcpy(buffer, doc, strlen(doc)); 3276 3277 const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal); 3278 3279 if (status != XML_STATUS_ERROR) 3280 fail("Negative len was expected to fail the parse but did not."); 3281 3282 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT) 3283 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT."); 3284 3285 XML_ParserFree(parser); 3286 } 3287 } 3288 END_TEST 3289 3290 /* Test odd corners of the XML_GetBuffer interface */ 3291 static enum XML_Status 3292 get_feature(enum XML_FeatureEnum feature_id, long *presult) { 3293 const XML_Feature *feature = XML_GetFeatureList(); 3294 3295 if (feature == NULL) 3296 return XML_STATUS_ERROR; 3297 for (; feature->feature != XML_FEATURE_END; feature++) { 3298 if (feature->feature == feature_id) { 3299 *presult = feature->value; 3300 return XML_STATUS_OK; 3301 } 3302 } 3303 return XML_STATUS_ERROR; 3304 } 3305 3306 /* Test odd corners of the XML_GetBuffer interface */ 3307 START_TEST(test_get_buffer_1) { 3308 const char *text = get_buffer_test_text; 3309 void *buffer; 3310 long context_bytes; 3311 3312 /* Attempt to allocate a negative length buffer */ 3313 if (XML_GetBuffer(g_parser, -12) != NULL) 3314 fail("Negative length buffer not failed"); 3315 3316 /* Now get a small buffer and extend it past valid length */ 3317 buffer = XML_GetBuffer(g_parser, 1536); 3318 if (buffer == NULL) 3319 fail("1.5K buffer failed"); 3320 assert(buffer != NULL); 3321 memcpy(buffer, text, strlen(text)); 3322 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE) 3323 == XML_STATUS_ERROR) 3324 xml_failure(g_parser); 3325 if (XML_GetBuffer(g_parser, INT_MAX) != NULL) 3326 fail("INT_MAX buffer not failed"); 3327 3328 /* Now try extending it a more reasonable but still too large 3329 * amount. The allocator in XML_GetBuffer() doubles the buffer 3330 * size until it exceeds the requested amount or INT_MAX. If it 3331 * exceeds INT_MAX, it rejects the request, so we want a request 3332 * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable, 3333 * with an extra byte just to ensure that the request is off any 3334 * boundary. The request will be inflated internally by 3335 * XML_CONTEXT_BYTES (if >=1), so we subtract that from our 3336 * request. 3337 */ 3338 if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK) 3339 context_bytes = 0; 3340 if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL) 3341 fail("INT_MAX- buffer not failed"); 3342 3343 /* Now try extending it a carefully crafted amount */ 3344 if (XML_GetBuffer(g_parser, 1000) == NULL) 3345 fail("1000 buffer failed"); 3346 } 3347 END_TEST 3348 3349 /* Test more corners of the XML_GetBuffer interface */ 3350 START_TEST(test_get_buffer_2) { 3351 const char *text = get_buffer_test_text; 3352 void *buffer; 3353 3354 /* Now get a decent buffer */ 3355 buffer = XML_GetBuffer(g_parser, 1536); 3356 if (buffer == NULL) 3357 fail("1.5K buffer failed"); 3358 assert(buffer != NULL); 3359 memcpy(buffer, text, strlen(text)); 3360 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE) 3361 == XML_STATUS_ERROR) 3362 xml_failure(g_parser); 3363 3364 /* Extend it, to catch a different code path */ 3365 if (XML_GetBuffer(g_parser, 1024) == NULL) 3366 fail("1024 buffer failed"); 3367 } 3368 END_TEST 3369 3370 /* Test for signed integer overflow CVE-2022-23852 */ 3371 #if XML_CONTEXT_BYTES > 0 3372 START_TEST(test_get_buffer_3_overflow) { 3373 XML_Parser parser = XML_ParserCreate(NULL); 3374 assert(parser != NULL); 3375 3376 const char *const text = "\n"; 3377 const int expectedKeepValue = (int)strlen(text); 3378 3379 // After this call, variable "keep" in XML_GetBuffer will 3380 // have value expectedKeepValue 3381 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), 3382 XML_FALSE /* isFinal */) 3383 == XML_STATUS_ERROR) 3384 xml_failure(parser); 3385 3386 assert(expectedKeepValue > 0); 3387 if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL) 3388 fail("enlarging buffer not failed"); 3389 3390 XML_ParserFree(parser); 3391 } 3392 END_TEST 3393 #endif // XML_CONTEXT_BYTES > 0 3394 3395 START_TEST(test_buffer_can_grow_to_max) { 3396 const char *const prefixes[] = { 3397 "", 3398 "<", 3399 "<x a='", 3400 "<doc><x a='", 3401 "<document><x a='", 3402 "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand" 3403 "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif" 3404 "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin" 3405 "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping" 3406 "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"}; 3407 const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]); 3408 int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow 3409 #if defined(__MINGW32__) && ! defined(__MINGW64__) 3410 // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB 3411 // Can we make a big allocation? 3412 for (int i = 1; i <= 2; i++) { 3413 void *const big = malloc(maxbuf); 3414 if (big != NULL) { 3415 free(big); 3416 break; 3417 } 3418 // The big allocation failed. Let's be a little lenient. 3419 maxbuf = maxbuf / 2; 3420 fprintf(stderr, "Reducing maxbuf to %d...\n", maxbuf); 3421 } 3422 #endif 3423 3424 for (int i = 0; i < num_prefixes; ++i) { 3425 set_subtest("\"%s\"", prefixes[i]); 3426 XML_Parser parser = XML_ParserCreate(NULL); 3427 #if XML_GE == 1 3428 assert_true(XML_SetAllocTrackerActivationThreshold(parser, (size_t)-1) 3429 == XML_TRUE); // i.e. deactivate 3430 #endif 3431 const int prefix_len = (int)strlen(prefixes[i]); 3432 const enum XML_Status s 3433 = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE); 3434 if (s != XML_STATUS_OK) 3435 xml_failure(parser); 3436 3437 // XML_CONTEXT_BYTES of the prefix may remain in the buffer; 3438 // subtracting the whole prefix is easiest, and close enough. 3439 assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL); 3440 // The limit should be consistent; no prefix should allow us to 3441 // reach above the max buffer size. 3442 assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL); 3443 XML_ParserFree(parser); 3444 } 3445 } 3446 END_TEST 3447 3448 START_TEST(test_getbuffer_allocates_on_zero_len) { 3449 for (int first_len = 1; first_len >= 0; first_len--) { 3450 set_subtest("with len=%d first", first_len); 3451 XML_Parser parser = XML_ParserCreate(NULL); 3452 assert_true(parser != NULL); 3453 assert_true(XML_GetBuffer(parser, first_len) != NULL); 3454 assert_true(XML_GetBuffer(parser, 0) != NULL); 3455 if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK) 3456 xml_failure(parser); 3457 XML_ParserFree(parser); 3458 } 3459 } 3460 END_TEST 3461 3462 /* Test position information macros */ 3463 START_TEST(test_byte_info_at_end) { 3464 const char *text = "<doc></doc>"; 3465 3466 if (XML_GetCurrentByteIndex(g_parser) != -1 3467 || XML_GetCurrentByteCount(g_parser) != 0) 3468 fail("Byte index/count incorrect at start of parse"); 3469 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3470 == XML_STATUS_ERROR) 3471 xml_failure(g_parser); 3472 /* At end, the count will be zero and the index the end of string */ 3473 if (XML_GetCurrentByteCount(g_parser) != 0) 3474 fail("Terminal byte count incorrect"); 3475 if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text)) 3476 fail("Terminal byte index incorrect"); 3477 } 3478 END_TEST 3479 3480 /* Test position information from errors */ 3481 #define PRE_ERROR_STR "<doc></" 3482 #define POST_ERROR_STR "wombat></doc>" 3483 START_TEST(test_byte_info_at_error) { 3484 const char *text = PRE_ERROR_STR POST_ERROR_STR; 3485 3486 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3487 == XML_STATUS_OK) 3488 fail("Syntax error not faulted"); 3489 if (XML_GetCurrentByteCount(g_parser) != 0) 3490 fail("Error byte count incorrect"); 3491 if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR)) 3492 fail("Error byte index incorrect"); 3493 } 3494 END_TEST 3495 #undef PRE_ERROR_STR 3496 #undef POST_ERROR_STR 3497 3498 /* Test position information in handler */ 3499 #define START_ELEMENT "<e>" 3500 #define CDATA_TEXT "Hello" 3501 #define END_ELEMENT "</e>" 3502 START_TEST(test_byte_info_at_cdata) { 3503 const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT; 3504 int offset, size; 3505 ByteTestData data; 3506 3507 /* Check initial context is empty */ 3508 if (XML_GetInputContext(g_parser, &offset, &size) != NULL) 3509 fail("Unexpected context at start of parse"); 3510 3511 data.start_element_len = (int)strlen(START_ELEMENT); 3512 data.cdata_len = (int)strlen(CDATA_TEXT); 3513 data.total_string_len = (int)strlen(text); 3514 XML_SetCharacterDataHandler(g_parser, byte_character_handler); 3515 XML_SetUserData(g_parser, &data); 3516 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) 3517 xml_failure(g_parser); 3518 } 3519 END_TEST 3520 #undef START_ELEMENT 3521 #undef CDATA_TEXT 3522 #undef END_ELEMENT 3523 3524 /* Test predefined entities are correctly recognised */ 3525 START_TEST(test_predefined_entities) { 3526 const char *text = "<doc><>&"'</doc>"; 3527 const XML_Char *expected = XCS("<doc><>&"'</doc>"); 3528 const XML_Char *result = XCS("<>&\"'"); 3529 CharData storage; 3530 3531 XML_SetDefaultHandler(g_parser, accumulate_characters); 3532 /* run_character_check uses XML_SetCharacterDataHandler(), which 3533 * unfortunately heads off a code path that we need to exercise. 3534 */ 3535 CharData_Init(&storage); 3536 XML_SetUserData(g_parser, &storage); 3537 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3538 == XML_STATUS_ERROR) 3539 xml_failure(g_parser); 3540 /* The default handler doesn't translate the entities */ 3541 CharData_CheckXMLChars(&storage, expected); 3542 3543 /* Now try again and check the translation */ 3544 XML_ParserReset(g_parser, NULL); 3545 run_character_check(text, result); 3546 } 3547 END_TEST 3548 3549 /* Regression test that an invalid tag in an external parameter 3550 * reference in an external DTD is correctly faulted. 3551 * 3552 * Only a few specific tags are legal in DTDs ignoring comments and 3553 * processing instructions, all of which begin with an exclamation 3554 * mark. "<el/>" is not one of them, so the parser should raise an 3555 * error on encountering it. 3556 */ 3557 START_TEST(test_invalid_tag_in_dtd) { 3558 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3559 "<doc></doc>\n"; 3560 3561 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3562 XML_SetExternalEntityRefHandler(g_parser, external_entity_param); 3563 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3564 "Invalid tag IN DTD external param not rejected"); 3565 } 3566 END_TEST 3567 3568 /* Test entities not quite the predefined ones are not mis-recognised */ 3569 START_TEST(test_not_predefined_entities) { 3570 const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>", 3571 "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL}; 3572 int i = 0; 3573 3574 while (text[i] != NULL) { 3575 expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY, 3576 "Undefined entity not rejected"); 3577 XML_ParserReset(g_parser, NULL); 3578 i++; 3579 } 3580 } 3581 END_TEST 3582 3583 /* Test conditional inclusion (IGNORE) */ 3584 START_TEST(test_ignore_section) { 3585 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3586 "<doc><e>&entity;</e></doc>"; 3587 const XML_Char *expected 3588 = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;"); 3589 CharData storage; 3590 3591 CharData_Init(&storage); 3592 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3593 XML_SetUserData(g_parser, &storage); 3594 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore); 3595 XML_SetDefaultHandler(g_parser, accumulate_characters); 3596 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3597 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3598 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3599 XML_SetStartElementHandler(g_parser, dummy_start_element); 3600 XML_SetEndElementHandler(g_parser, dummy_end_element); 3601 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3602 == XML_STATUS_ERROR) 3603 xml_failure(g_parser); 3604 CharData_CheckXMLChars(&storage, expected); 3605 } 3606 END_TEST 3607 3608 START_TEST(test_ignore_section_utf16) { 3609 const char text[] = 3610 /* <!DOCTYPE d SYSTEM 's'> */ 3611 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " 3612 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0" 3613 /* <d><e>&en;</e></d> */ 3614 "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0"; 3615 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;"); 3616 CharData storage; 3617 3618 CharData_Init(&storage); 3619 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3620 XML_SetUserData(g_parser, &storage); 3621 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16); 3622 XML_SetDefaultHandler(g_parser, accumulate_characters); 3623 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3624 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3625 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3626 XML_SetStartElementHandler(g_parser, dummy_start_element); 3627 XML_SetEndElementHandler(g_parser, dummy_end_element); 3628 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 3629 == XML_STATUS_ERROR) 3630 xml_failure(g_parser); 3631 CharData_CheckXMLChars(&storage, expected); 3632 } 3633 END_TEST 3634 3635 START_TEST(test_ignore_section_utf16_be) { 3636 const char text[] = 3637 /* <!DOCTYPE d SYSTEM 's'> */ 3638 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " 3639 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n" 3640 /* <d><e>&en;</e></d> */ 3641 "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>"; 3642 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;"); 3643 CharData storage; 3644 3645 CharData_Init(&storage); 3646 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3647 XML_SetUserData(g_parser, &storage); 3648 XML_SetExternalEntityRefHandler(g_parser, 3649 external_entity_load_ignore_utf16_be); 3650 XML_SetDefaultHandler(g_parser, accumulate_characters); 3651 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3652 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3653 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3654 XML_SetStartElementHandler(g_parser, dummy_start_element); 3655 XML_SetEndElementHandler(g_parser, dummy_end_element); 3656 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 3657 == XML_STATUS_ERROR) 3658 xml_failure(g_parser); 3659 CharData_CheckXMLChars(&storage, expected); 3660 } 3661 END_TEST 3662 3663 /* Test mis-formatted conditional exclusion */ 3664 START_TEST(test_bad_ignore_section) { 3665 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3666 "<doc><e>&entity;</e></doc>"; 3667 ExtFaults faults[] 3668 = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL, 3669 XML_ERROR_SYNTAX}, 3670 {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL, 3671 XML_ERROR_INVALID_TOKEN}, 3672 {/* FIrst two bytes of a three-byte char */ 3673 "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL, 3674 XML_ERROR_PARTIAL_CHAR}, 3675 {NULL, NULL, NULL, XML_ERROR_NONE}}; 3676 ExtFaults *fault; 3677 3678 for (fault = &faults[0]; fault->parse_text != NULL; fault++) { 3679 set_subtest("%s", fault->parse_text); 3680 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3681 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 3682 XML_SetUserData(g_parser, fault); 3683 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3684 "Incomplete IGNORE section not failed"); 3685 XML_ParserReset(g_parser, NULL); 3686 } 3687 } 3688 END_TEST 3689 3690 struct bom_testdata { 3691 const char *external; 3692 int split; 3693 XML_Bool nested_callback_happened; 3694 }; 3695 3696 static int XMLCALL 3697 external_bom_checker(XML_Parser parser, const XML_Char *context, 3698 const XML_Char *base, const XML_Char *systemId, 3699 const XML_Char *publicId) { 3700 const char *text; 3701 UNUSED_P(base); 3702 UNUSED_P(systemId); 3703 UNUSED_P(publicId); 3704 3705 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); 3706 if (ext_parser == NULL) 3707 fail("Could not create external entity parser"); 3708 3709 if (! xcstrcmp(systemId, XCS("004-2.ent"))) { 3710 struct bom_testdata *const testdata = XML_GetUserData(parser); 3711 const char *const external = testdata->external; 3712 const int split = testdata->split; 3713 testdata->nested_callback_happened = XML_TRUE; 3714 3715 if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE) 3716 != XML_STATUS_OK) { 3717 xml_failure(ext_parser); 3718 } 3719 text = external + split; // the parse below will continue where we left off. 3720 } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) { 3721 text = "<!ELEMENT doc EMPTY>\n" 3722 "<!ENTITY % e1 SYSTEM '004-2.ent'>\n" 3723 "<!ENTITY % e2 '%e1;'>\n"; 3724 } else { 3725 fail("unknown systemId"); 3726 } 3727 3728 if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) 3729 != XML_STATUS_OK) 3730 xml_failure(ext_parser); 3731 3732 XML_ParserFree(ext_parser); 3733 return XML_STATUS_OK; 3734 } 3735 3736 /* regression test: BOM should be consumed when followed by a partial token. */ 3737 START_TEST(test_external_bom_consumed) { 3738 const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3739 "<doc></doc>\n"; 3740 const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>"; 3741 const int len = (int)strlen(external); 3742 for (int split = 0; split <= len; ++split) { 3743 set_subtest("split at byte %d", split); 3744 3745 struct bom_testdata testdata; 3746 testdata.external = external; 3747 testdata.split = split; 3748 testdata.nested_callback_happened = XML_FALSE; 3749 3750 XML_Parser parser = XML_ParserCreate(NULL); 3751 if (parser == NULL) { 3752 fail("Couldn't create parser"); 3753 } 3754 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3755 XML_SetExternalEntityRefHandler(parser, external_bom_checker); 3756 XML_SetUserData(parser, &testdata); 3757 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 3758 == XML_STATUS_ERROR) 3759 xml_failure(parser); 3760 if (! testdata.nested_callback_happened) { 3761 fail("ref handler not called"); 3762 } 3763 XML_ParserFree(parser); 3764 } 3765 } 3766 END_TEST 3767 3768 /* Test recursive parsing */ 3769 START_TEST(test_external_entity_values) { 3770 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3771 "<doc></doc>\n"; 3772 ExtFaults data_004_2[] = { 3773 {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE}, 3774 {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL, 3775 XML_ERROR_INVALID_TOKEN}, 3776 {"'wombat", "Unterminated string not faulted", NULL, 3777 XML_ERROR_UNCLOSED_TOKEN}, 3778 {"\xe2\x82", "Partial UTF-8 character not faulted", NULL, 3779 XML_ERROR_PARTIAL_CHAR}, 3780 {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE}, 3781 {"<?xml?>", "Malformed XML declaration not faulted", NULL, 3782 XML_ERROR_XML_DECL}, 3783 {/* UTF-8 BOM */ 3784 "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, 3785 XML_ERROR_NONE}, 3786 {"<?xml version='1.0' encoding='utf-8'?>\n$", 3787 "Invalid token after text declaration not faulted", NULL, 3788 XML_ERROR_INVALID_TOKEN}, 3789 {"<?xml version='1.0' encoding='utf-8'?>\n'wombat", 3790 "Unterminated string after text decl not faulted", NULL, 3791 XML_ERROR_UNCLOSED_TOKEN}, 3792 {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82", 3793 "Partial UTF-8 character after text decl not faulted", NULL, 3794 XML_ERROR_PARTIAL_CHAR}, 3795 {"%e1;", "Recursive parameter entity not faulted", NULL, 3796 XML_ERROR_RECURSIVE_ENTITY_REF}, 3797 {NULL, NULL, NULL, XML_ERROR_NONE}}; 3798 int i; 3799 3800 for (i = 0; data_004_2[i].parse_text != NULL; i++) { 3801 set_subtest("%s", data_004_2[i].parse_text); 3802 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3803 XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer); 3804 XML_SetUserData(g_parser, &data_004_2[i]); 3805 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3806 == XML_STATUS_ERROR) 3807 xml_failure(g_parser); 3808 XML_ParserReset(g_parser, NULL); 3809 } 3810 } 3811 END_TEST 3812 3813 /* Test the recursive parse interacts with a not standalone handler */ 3814 START_TEST(test_ext_entity_not_standalone) { 3815 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3816 "<doc></doc>"; 3817 3818 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3819 XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone); 3820 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3821 "Standalone rejection not caught"); 3822 } 3823 END_TEST 3824 3825 START_TEST(test_ext_entity_value_abort) { 3826 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3827 "<doc></doc>\n"; 3828 3829 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3830 XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter); 3831 g_resumable = XML_FALSE; 3832 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3833 == XML_STATUS_ERROR) 3834 xml_failure(g_parser); 3835 } 3836 END_TEST 3837 3838 START_TEST(test_bad_public_doctype) { 3839 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n" 3840 "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n" 3841 "<doc></doc>"; 3842 3843 /* Setting a handler provokes a particular code path */ 3844 XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler, 3845 dummy_end_doctype_handler); 3846 expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed"); 3847 } 3848 END_TEST 3849 3850 /* Test based on ibm/valid/P32/ibm32v04.xml */ 3851 START_TEST(test_attribute_enum_value) { 3852 const char *text = "<?xml version='1.0' standalone='no'?>\n" 3853 "<!DOCTYPE animal SYSTEM 'test.dtd'>\n" 3854 "<animal>This is a \n <a/> \n\nyellow tiger</animal>"; 3855 ExtTest dtd_data 3856 = {"<!ELEMENT animal (#PCDATA|a)*>\n" 3857 "<!ELEMENT a EMPTY>\n" 3858 "<!ATTLIST animal xml:space (default|preserve) 'preserve'>", 3859 NULL, NULL}; 3860 const XML_Char *expected = XCS("This is a \n \n\nyellow tiger"); 3861 3862 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3863 XML_SetUserData(g_parser, &dtd_data); 3864 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3865 /* An attribute list handler provokes a different code path */ 3866 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 3867 run_ext_character_check(text, &dtd_data, expected); 3868 } 3869 END_TEST 3870 3871 /* Slightly bizarrely, the library seems to silently ignore entity 3872 * definitions for predefined entities, even when they are wrong. The 3873 * language of the XML 1.0 spec is somewhat unhelpful as to what ought 3874 * to happen, so this is currently treated as acceptable. 3875 */ 3876 START_TEST(test_predefined_entity_redefinition) { 3877 const char *text = "<!DOCTYPE doc [\n" 3878 "<!ENTITY apos 'foo'>\n" 3879 "]>\n" 3880 "<doc>'</doc>"; 3881 run_character_check(text, XCS("'")); 3882 } 3883 END_TEST 3884 3885 /* Test that the parser stops processing the DTD after an unresolved 3886 * parameter entity is encountered. 3887 */ 3888 START_TEST(test_dtd_stop_processing) { 3889 const char *text = "<!DOCTYPE doc [\n" 3890 "%foo;\n" 3891 "<!ENTITY bar 'bas'>\n" 3892 "]><doc/>"; 3893 3894 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 3895 init_dummy_handlers(); 3896 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3897 == XML_STATUS_ERROR) 3898 xml_failure(g_parser); 3899 if (get_dummy_handler_flags() != 0) 3900 fail("DTD processing still going after undefined PE"); 3901 } 3902 END_TEST 3903 3904 /* Test public notations with no system ID */ 3905 START_TEST(test_public_notation_no_sysid) { 3906 const char *text = "<!DOCTYPE doc [\n" 3907 "<!NOTATION note PUBLIC 'foo'>\n" 3908 "<!ELEMENT doc EMPTY>\n" 3909 "]>\n<doc/>"; 3910 3911 init_dummy_handlers(); 3912 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); 3913 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3914 == XML_STATUS_ERROR) 3915 xml_failure(g_parser); 3916 if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG) 3917 fail("Notation declaration handler not called"); 3918 } 3919 END_TEST 3920 3921 START_TEST(test_nested_groups) { 3922 const char *text 3923 = "<!DOCTYPE doc [\n" 3924 "<!ELEMENT doc " 3925 /* Sixteen elements per line */ 3926 "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?," 3927 "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?" 3928 "))))))))))))))))))))))))))))))))>\n" 3929 "<!ELEMENT e EMPTY>" 3930 "]>\n" 3931 "<doc><e/></doc>"; 3932 CharData storage; 3933 3934 CharData_Init(&storage); 3935 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3936 XML_SetStartElementHandler(g_parser, record_element_start_handler); 3937 XML_SetUserData(g_parser, &storage); 3938 init_dummy_handlers(); 3939 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3940 == XML_STATUS_ERROR) 3941 xml_failure(g_parser); 3942 CharData_CheckXMLChars(&storage, XCS("doce")); 3943 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG) 3944 fail("Element handler not fired"); 3945 } 3946 END_TEST 3947 3948 START_TEST(test_group_choice) { 3949 const char *text = "<!DOCTYPE doc [\n" 3950 "<!ELEMENT doc (a|b|c)+>\n" 3951 "<!ELEMENT a EMPTY>\n" 3952 "<!ELEMENT b (#PCDATA)>\n" 3953 "<!ELEMENT c ANY>\n" 3954 "]>\n" 3955 "<doc>\n" 3956 "<a/>\n" 3957 "<b attr='foo'>This is a foo</b>\n" 3958 "<c></c>\n" 3959 "</doc>\n"; 3960 3961 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3962 init_dummy_handlers(); 3963 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3964 == XML_STATUS_ERROR) 3965 xml_failure(g_parser); 3966 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG) 3967 fail("Element handler flag not raised"); 3968 } 3969 END_TEST 3970 3971 START_TEST(test_standalone_parameter_entity) { 3972 const char *text = "<?xml version='1.0' standalone='yes'?>\n" 3973 "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n" 3974 "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n" 3975 "%entity;\n" 3976 "]>\n" 3977 "<doc></doc>"; 3978 char dtd_data[] = "<!ENTITY % e1 'foo'>\n"; 3979 3980 XML_SetUserData(g_parser, dtd_data); 3981 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3982 XML_SetExternalEntityRefHandler(g_parser, external_entity_public); 3983 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3984 == XML_STATUS_ERROR) 3985 xml_failure(g_parser); 3986 } 3987 END_TEST 3988 3989 /* Test skipping of parameter entity in an external DTD */ 3990 /* Derived from ibm/invalid/P69/ibm69i01.xml */ 3991 START_TEST(test_skipped_parameter_entity) { 3992 const char *text = "<?xml version='1.0'?>\n" 3993 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n" 3994 "<!ELEMENT root (#PCDATA|a)* >\n" 3995 "]>\n" 3996 "<root></root>"; 3997 ExtTest dtd_data = {"%pe2;", NULL, NULL}; 3998 3999 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 4000 XML_SetUserData(g_parser, &dtd_data); 4001 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4002 XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler); 4003 init_dummy_handlers(); 4004 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4005 == XML_STATUS_ERROR) 4006 xml_failure(g_parser); 4007 if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG) 4008 fail("Skip handler not executed"); 4009 } 4010 END_TEST 4011 4012 /* Test recursive parameter entity definition rejected in external DTD */ 4013 START_TEST(test_recursive_external_parameter_entity) { 4014 const char *text = "<?xml version='1.0'?>\n" 4015 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n" 4016 "<!ELEMENT root (#PCDATA|a)* >\n" 4017 "]>\n" 4018 "<root></root>"; 4019 ExtFaults dtd_data = {"<!ENTITY % pe2 '%pe2;'>\n%pe2;", 4020 "Recursive external parameter entity not faulted", NULL, 4021 XML_ERROR_RECURSIVE_ENTITY_REF}; 4022 4023 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 4024 XML_SetUserData(g_parser, &dtd_data); 4025 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4026 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 4027 "Recursive external parameter not spotted"); 4028 } 4029 END_TEST 4030 4031 /* Test undefined parameter entity in external entity handler */ 4032 START_TEST(test_undefined_ext_entity_in_external_dtd) { 4033 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 4034 "<doc></doc>\n"; 4035 4036 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4037 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer); 4038 XML_SetUserData(g_parser, NULL); 4039 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4040 == XML_STATUS_ERROR) 4041 xml_failure(g_parser); 4042 4043 /* Now repeat without the external entity ref handler invoking 4044 * another copy of itself. 4045 */ 4046 XML_ParserReset(g_parser, NULL); 4047 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4048 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer); 4049 XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */ 4050 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4051 == XML_STATUS_ERROR) 4052 xml_failure(g_parser); 4053 } 4054 END_TEST 4055 4056 /* Test suspending the parse on receiving an XML declaration works */ 4057 START_TEST(test_suspend_xdecl) { 4058 const char *text = long_character_data_text; 4059 4060 XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler); 4061 XML_SetUserData(g_parser, g_parser); 4062 g_resumable = XML_TRUE; 4063 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 4064 // we won't know exactly how much input we actually managed to give Expat. 4065 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 4066 != XML_STATUS_SUSPENDED) 4067 xml_failure(g_parser); 4068 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 4069 xml_failure(g_parser); 4070 /* Attempt to start a new parse while suspended */ 4071 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4072 != XML_STATUS_ERROR) 4073 fail("Attempt to parse while suspended not faulted"); 4074 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) 4075 fail("Suspended parse not faulted with correct error"); 4076 } 4077 END_TEST 4078 4079 /* Test aborting the parse in an epilog works */ 4080 START_TEST(test_abort_epilog) { 4081 const char *text = "<doc></doc>\n\r\n"; 4082 XML_Char trigger_char = XCS('\r'); 4083 4084 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 4085 XML_SetUserData(g_parser, &trigger_char); 4086 g_resumable = XML_FALSE; 4087 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4088 != XML_STATUS_ERROR) 4089 fail("Abort not triggered"); 4090 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED) 4091 xml_failure(g_parser); 4092 } 4093 END_TEST 4094 4095 /* Test a different code path for abort in the epilog */ 4096 START_TEST(test_abort_epilog_2) { 4097 const char *text = "<doc></doc>\n"; 4098 XML_Char trigger_char = XCS('\n'); 4099 4100 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 4101 XML_SetUserData(g_parser, &trigger_char); 4102 g_resumable = XML_FALSE; 4103 expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered"); 4104 } 4105 END_TEST 4106 4107 /* Test suspension from the epilog */ 4108 START_TEST(test_suspend_epilog) { 4109 const char *text = "<doc></doc>\n"; 4110 XML_Char trigger_char = XCS('\n'); 4111 4112 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 4113 XML_SetUserData(g_parser, &trigger_char); 4114 g_resumable = XML_TRUE; 4115 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4116 != XML_STATUS_SUSPENDED) 4117 xml_failure(g_parser); 4118 } 4119 END_TEST 4120 4121 START_TEST(test_suspend_in_sole_empty_tag) { 4122 const char *text = "<doc/>"; 4123 enum XML_Status rc; 4124 4125 XML_SetEndElementHandler(g_parser, suspending_end_handler); 4126 XML_SetUserData(g_parser, g_parser); 4127 rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE); 4128 if (rc == XML_STATUS_ERROR) 4129 xml_failure(g_parser); 4130 else if (rc != XML_STATUS_SUSPENDED) 4131 fail("Suspend not triggered"); 4132 rc = XML_ResumeParser(g_parser); 4133 if (rc == XML_STATUS_ERROR) 4134 xml_failure(g_parser); 4135 else if (rc != XML_STATUS_OK) 4136 fail("Resume failed"); 4137 } 4138 END_TEST 4139 4140 START_TEST(test_unfinished_epilog) { 4141 const char *text = "<doc></doc><"; 4142 4143 expect_failure(text, XML_ERROR_UNCLOSED_TOKEN, 4144 "Incomplete epilog entry not faulted"); 4145 } 4146 END_TEST 4147 4148 START_TEST(test_partial_char_in_epilog) { 4149 const char *text = "<doc></doc>\xe2\x82"; 4150 4151 /* First check that no fault is raised if the parse is not finished */ 4152 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 4153 == XML_STATUS_ERROR) 4154 xml_failure(g_parser); 4155 /* Now check that it is faulted once we finish */ 4156 if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR) 4157 fail("Partial character in epilog not faulted"); 4158 if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR) 4159 xml_failure(g_parser); 4160 } 4161 END_TEST 4162 4163 /* Test resuming a parse suspended in entity substitution */ 4164 START_TEST(test_suspend_resume_internal_entity) { 4165 const char *text 4166 = "<!DOCTYPE doc [\n" 4167 "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n" 4168 "]>\n" 4169 "<doc>&foo;</doc>\n"; 4170 const XML_Char *expected1 = XCS("Hi"); 4171 const XML_Char *expected2 = XCS("HiHo"); 4172 CharData storage; 4173 4174 CharData_Init(&storage); 4175 XML_SetStartElementHandler(g_parser, start_element_suspender); 4176 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 4177 XML_SetUserData(g_parser, &storage); 4178 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 4179 // we won't know exactly how much input we actually managed to give Expat. 4180 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 4181 != XML_STATUS_SUSPENDED) 4182 xml_failure(g_parser); 4183 CharData_CheckXMLChars(&storage, XCS("")); 4184 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED) 4185 xml_failure(g_parser); 4186 CharData_CheckXMLChars(&storage, expected1); 4187 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 4188 xml_failure(g_parser); 4189 CharData_CheckXMLChars(&storage, expected2); 4190 } 4191 END_TEST 4192 4193 START_TEST(test_suspend_resume_internal_entity_issue_629) { 4194 const char *const text 4195 = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n" 4196 "<" 4197 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4198 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4199 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4200 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4201 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4202 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4203 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4204 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4205 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4206 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4207 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4208 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4209 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4210 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4211 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4212 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4213 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4214 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4215 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4216 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4217 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4218 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4219 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4220 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4221 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4222 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4223 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4224 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4225 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4226 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4227 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4228 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4229 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4230 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4231 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4232 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4233 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4234 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4235 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4236 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 4237 "/>" 4238 "</b></a>"; 4239 const size_t firstChunkSizeBytes = 54; 4240 4241 XML_Parser parser = XML_ParserCreate(NULL); 4242 XML_SetUserData(parser, parser); 4243 XML_SetCommentHandler(parser, suspending_comment_handler); 4244 4245 if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE) 4246 != XML_STATUS_SUSPENDED) 4247 xml_failure(parser); 4248 if (XML_ResumeParser(parser) != XML_STATUS_OK) 4249 xml_failure(parser); 4250 if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes, 4251 (int)(strlen(text) - firstChunkSizeBytes), 4252 XML_TRUE) 4253 != XML_STATUS_OK) 4254 xml_failure(parser); 4255 XML_ParserFree(parser); 4256 } 4257 END_TEST 4258 4259 /* Test syntax error is caught at parse resumption */ 4260 START_TEST(test_resume_entity_with_syntax_error) { 4261 if (g_chunkSize != 0) { 4262 // this test does not use SINGLE_BYTES, because of suspension 4263 return; 4264 } 4265 4266 const char *text = "<!DOCTYPE doc [\n" 4267 "<!ENTITY foo '<suspend>Hi</wombat>'>\n" 4268 "]>\n" 4269 "<doc>&foo;</doc>\n"; 4270 4271 XML_SetStartElementHandler(g_parser, start_element_suspender); 4272 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 4273 // we won't know exactly how much input we actually managed to give Expat. 4274 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 4275 != XML_STATUS_SUSPENDED) 4276 xml_failure(g_parser); 4277 if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR) 4278 fail("Syntax error in entity not faulted"); 4279 if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH) 4280 xml_failure(g_parser); 4281 } 4282 END_TEST 4283 4284 /* Test suspending and resuming in a parameter entity substitution */ 4285 START_TEST(test_suspend_resume_parameter_entity) { 4286 const char *text = "<!DOCTYPE doc [\n" 4287 "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n" 4288 "%foo;\n" 4289 "]>\n" 4290 "<doc>Hello, world</doc>"; 4291 const XML_Char *expected = XCS("Hello, world"); 4292 CharData storage; 4293 4294 CharData_Init(&storage); 4295 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4296 XML_SetElementDeclHandler(g_parser, element_decl_suspender); 4297 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 4298 XML_SetUserData(g_parser, &storage); 4299 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 4300 != XML_STATUS_SUSPENDED) 4301 xml_failure(g_parser); 4302 CharData_CheckXMLChars(&storage, XCS("")); 4303 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 4304 xml_failure(g_parser); 4305 CharData_CheckXMLChars(&storage, expected); 4306 } 4307 END_TEST 4308 4309 /* Test attempting to use parser after an error is faulted */ 4310 START_TEST(test_restart_on_error) { 4311 const char *text = "<$doc><doc></doc>"; 4312 4313 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4314 != XML_STATUS_ERROR) 4315 fail("Invalid tag name not faulted"); 4316 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 4317 xml_failure(g_parser); 4318 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 4319 fail("Restarting invalid parse not faulted"); 4320 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 4321 xml_failure(g_parser); 4322 } 4323 END_TEST 4324 4325 /* Test that angle brackets in an attribute default value are faulted */ 4326 START_TEST(test_reject_lt_in_attribute_value) { 4327 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n" 4328 "<doc></doc>"; 4329 4330 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4331 "Bad attribute default not faulted"); 4332 } 4333 END_TEST 4334 4335 START_TEST(test_reject_unfinished_param_in_att_value) { 4336 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n" 4337 "<doc></doc>"; 4338 4339 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4340 "Bad attribute default not faulted"); 4341 } 4342 END_TEST 4343 4344 START_TEST(test_trailing_cr_in_att_value) { 4345 const char *text = "<doc a='value\r'/>"; 4346 4347 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4348 == XML_STATUS_ERROR) 4349 xml_failure(g_parser); 4350 } 4351 END_TEST 4352 4353 /* Try parsing a general entity within a parameter entity in a 4354 * standalone internal DTD. Covers a corner case in the parser. 4355 */ 4356 START_TEST(test_standalone_internal_entity) { 4357 const char *text = "<?xml version='1.0' standalone='yes' ?>\n" 4358 "<!DOCTYPE doc [\n" 4359 " <!ELEMENT doc (#PCDATA)>\n" 4360 " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"≥\">'>\n" 4361 " <!ENTITY ge 'AttDefaultValue'>\n" 4362 " %pe;\n" 4363 "]>\n" 4364 "<doc att2='any'/>"; 4365 4366 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4367 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4368 == XML_STATUS_ERROR) 4369 xml_failure(g_parser); 4370 } 4371 END_TEST 4372 4373 /* Test that a reference to an unknown external entity is skipped */ 4374 START_TEST(test_skipped_external_entity) { 4375 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" 4376 "<doc></doc>\n"; 4377 ExtTest test_data = {"<!ELEMENT doc EMPTY>\n" 4378 "<!ENTITY % e2 '%e1;'>\n", 4379 NULL, NULL}; 4380 4381 XML_SetUserData(g_parser, &test_data); 4382 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4383 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 4384 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4385 == XML_STATUS_ERROR) 4386 xml_failure(g_parser); 4387 } 4388 END_TEST 4389 4390 /* Test a different form of unknown external entity */ 4391 START_TEST(test_skipped_null_loaded_ext_entity) { 4392 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" 4393 "<doc />"; 4394 ExtHdlrData test_data 4395 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n" 4396 "<!ENTITY % pe2 '%pe1;'>\n" 4397 "%pe2;\n", 4398 external_entity_null_loader, NULL}; 4399 4400 XML_SetUserData(g_parser, &test_data); 4401 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4402 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader); 4403 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4404 == XML_STATUS_ERROR) 4405 xml_failure(g_parser); 4406 } 4407 END_TEST 4408 4409 START_TEST(test_skipped_unloaded_ext_entity) { 4410 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" 4411 "<doc />"; 4412 ExtHdlrData test_data 4413 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n" 4414 "<!ENTITY % pe2 '%pe1;'>\n" 4415 "%pe2;\n", 4416 NULL, NULL}; 4417 4418 XML_SetUserData(g_parser, &test_data); 4419 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4420 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader); 4421 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4422 == XML_STATUS_ERROR) 4423 xml_failure(g_parser); 4424 } 4425 END_TEST 4426 4427 /* Test that a parameter entity value ending with a carriage return 4428 * has it translated internally into a newline. 4429 */ 4430 START_TEST(test_param_entity_with_trailing_cr) { 4431 #define PARAM_ENTITY_NAME "pe" 4432 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">" 4433 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" 4434 "<doc/>"; 4435 ExtTest test_data 4436 = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n" 4437 "%" PARAM_ENTITY_NAME ";\n", 4438 NULL, NULL}; 4439 4440 XML_SetUserData(g_parser, &test_data); 4441 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4442 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 4443 XML_SetEntityDeclHandler(g_parser, param_entity_match_handler); 4444 param_entity_match_init(XCS(PARAM_ENTITY_NAME), 4445 XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n")); 4446 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4447 == XML_STATUS_ERROR) 4448 xml_failure(g_parser); 4449 int entity_match_flag = get_param_entity_match_flag(); 4450 if (entity_match_flag == ENTITY_MATCH_FAIL) 4451 fail("Parameter entity CR->NEWLINE conversion failed"); 4452 else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND) 4453 fail("Parameter entity not parsed"); 4454 } 4455 #undef PARAM_ENTITY_NAME 4456 #undef PARAM_ENTITY_CORE_VALUE 4457 END_TEST 4458 4459 START_TEST(test_invalid_character_entity) { 4460 const char *text = "<!DOCTYPE doc [\n" 4461 " <!ENTITY entity '�'>\n" 4462 "]>\n" 4463 "<doc>&entity;</doc>"; 4464 4465 expect_failure(text, XML_ERROR_BAD_CHAR_REF, 4466 "Out of range character reference not faulted"); 4467 } 4468 END_TEST 4469 4470 START_TEST(test_invalid_character_entity_2) { 4471 const char *text = "<!DOCTYPE doc [\n" 4472 " <!ENTITY entity '&#xg0;'>\n" 4473 "]>\n" 4474 "<doc>&entity;</doc>"; 4475 4476 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4477 "Out of range character reference not faulted"); 4478 } 4479 END_TEST 4480 4481 START_TEST(test_invalid_character_entity_3) { 4482 const char text[] = 4483 /* <!DOCTYPE doc [\n */ 4484 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" 4485 /* U+0E04 = KHO KHWAI 4486 * U+0E08 = CHO CHAN */ 4487 /* <!ENTITY entity '&\u0e04\u0e08;'>\n */ 4488 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 " 4489 "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n" 4490 /* ]>\n */ 4491 "\0]\0>\0\n" 4492 /* <doc>&entity;</doc> */ 4493 "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>"; 4494 4495 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4496 != XML_STATUS_ERROR) 4497 fail("Invalid start of entity name not faulted"); 4498 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY) 4499 xml_failure(g_parser); 4500 } 4501 END_TEST 4502 4503 START_TEST(test_invalid_character_entity_4) { 4504 const char *text = "<!DOCTYPE doc [\n" 4505 " <!ENTITY entity '�'>\n" /* = � */ 4506 "]>\n" 4507 "<doc>&entity;</doc>"; 4508 4509 expect_failure(text, XML_ERROR_BAD_CHAR_REF, 4510 "Out of range character reference not faulted"); 4511 } 4512 END_TEST 4513 4514 /* Test that processing instructions are picked up by a default handler */ 4515 START_TEST(test_pi_handled_in_default) { 4516 const char *text = "<?test processing instruction?>\n<doc/>"; 4517 const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>"); 4518 CharData storage; 4519 4520 CharData_Init(&storage); 4521 XML_SetDefaultHandler(g_parser, accumulate_characters); 4522 XML_SetUserData(g_parser, &storage); 4523 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4524 == XML_STATUS_ERROR) 4525 xml_failure(g_parser); 4526 CharData_CheckXMLChars(&storage, expected); 4527 } 4528 END_TEST 4529 4530 /* Test that comments are picked up by a default handler */ 4531 START_TEST(test_comment_handled_in_default) { 4532 const char *text = "<!-- This is a comment -->\n<doc/>"; 4533 const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>"); 4534 CharData storage; 4535 4536 CharData_Init(&storage); 4537 XML_SetDefaultHandler(g_parser, accumulate_characters); 4538 XML_SetUserData(g_parser, &storage); 4539 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4540 == XML_STATUS_ERROR) 4541 xml_failure(g_parser); 4542 CharData_CheckXMLChars(&storage, expected); 4543 } 4544 END_TEST 4545 4546 /* Test PIs that look almost but not quite like XML declarations */ 4547 START_TEST(test_pi_yml) { 4548 const char *text = "<?yml something like data?><doc/>"; 4549 const XML_Char *expected = XCS("yml: something like data\n"); 4550 CharData storage; 4551 4552 CharData_Init(&storage); 4553 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4554 XML_SetUserData(g_parser, &storage); 4555 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4556 == XML_STATUS_ERROR) 4557 xml_failure(g_parser); 4558 CharData_CheckXMLChars(&storage, expected); 4559 } 4560 END_TEST 4561 4562 START_TEST(test_pi_xnl) { 4563 const char *text = "<?xnl nothing like data?><doc/>"; 4564 const XML_Char *expected = XCS("xnl: nothing like data\n"); 4565 CharData storage; 4566 4567 CharData_Init(&storage); 4568 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4569 XML_SetUserData(g_parser, &storage); 4570 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4571 == XML_STATUS_ERROR) 4572 xml_failure(g_parser); 4573 CharData_CheckXMLChars(&storage, expected); 4574 } 4575 END_TEST 4576 4577 START_TEST(test_pi_xmm) { 4578 const char *text = "<?xmm everything like data?><doc/>"; 4579 const XML_Char *expected = XCS("xmm: everything like data\n"); 4580 CharData storage; 4581 4582 CharData_Init(&storage); 4583 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4584 XML_SetUserData(g_parser, &storage); 4585 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4586 == XML_STATUS_ERROR) 4587 xml_failure(g_parser); 4588 CharData_CheckXMLChars(&storage, expected); 4589 } 4590 END_TEST 4591 4592 START_TEST(test_utf16_pi) { 4593 const char text[] = 4594 /* <?{KHO KHWAI}{CHO CHAN}?> 4595 * where {KHO KHWAI} = U+0E04 4596 * and {CHO CHAN} = U+0E08 4597 */ 4598 "<\0?\0\x04\x0e\x08\x0e?\0>\0" 4599 /* <q/> */ 4600 "<\0q\0/\0>\0"; 4601 #ifdef XML_UNICODE 4602 const XML_Char *expected = XCS("\x0e04\x0e08: \n"); 4603 #else 4604 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n"); 4605 #endif 4606 CharData storage; 4607 4608 CharData_Init(&storage); 4609 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4610 XML_SetUserData(g_parser, &storage); 4611 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4612 == XML_STATUS_ERROR) 4613 xml_failure(g_parser); 4614 CharData_CheckXMLChars(&storage, expected); 4615 } 4616 END_TEST 4617 4618 START_TEST(test_utf16_be_pi) { 4619 const char text[] = 4620 /* <?{KHO KHWAI}{CHO CHAN}?> 4621 * where {KHO KHWAI} = U+0E04 4622 * and {CHO CHAN} = U+0E08 4623 */ 4624 "\0<\0?\x0e\x04\x0e\x08\0?\0>" 4625 /* <q/> */ 4626 "\0<\0q\0/\0>"; 4627 #ifdef XML_UNICODE 4628 const XML_Char *expected = XCS("\x0e04\x0e08: \n"); 4629 #else 4630 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n"); 4631 #endif 4632 CharData storage; 4633 4634 CharData_Init(&storage); 4635 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4636 XML_SetUserData(g_parser, &storage); 4637 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4638 == XML_STATUS_ERROR) 4639 xml_failure(g_parser); 4640 CharData_CheckXMLChars(&storage, expected); 4641 } 4642 END_TEST 4643 4644 /* Test that comments can be picked up and translated */ 4645 START_TEST(test_utf16_be_comment) { 4646 const char text[] = 4647 /* <!-- Comment A --> */ 4648 "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n" 4649 /* <doc/> */ 4650 "\0<\0d\0o\0c\0/\0>"; 4651 const XML_Char *expected = XCS(" Comment A "); 4652 CharData storage; 4653 4654 CharData_Init(&storage); 4655 XML_SetCommentHandler(g_parser, accumulate_comment); 4656 XML_SetUserData(g_parser, &storage); 4657 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4658 == XML_STATUS_ERROR) 4659 xml_failure(g_parser); 4660 CharData_CheckXMLChars(&storage, expected); 4661 } 4662 END_TEST 4663 4664 START_TEST(test_utf16_le_comment) { 4665 const char text[] = 4666 /* <!-- Comment B --> */ 4667 "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0" 4668 /* <doc/> */ 4669 "<\0d\0o\0c\0/\0>\0"; 4670 const XML_Char *expected = XCS(" Comment B "); 4671 CharData storage; 4672 4673 CharData_Init(&storage); 4674 XML_SetCommentHandler(g_parser, accumulate_comment); 4675 XML_SetUserData(g_parser, &storage); 4676 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4677 == XML_STATUS_ERROR) 4678 xml_failure(g_parser); 4679 CharData_CheckXMLChars(&storage, expected); 4680 } 4681 END_TEST 4682 4683 /* Test that the unknown encoding handler with map entries that expect 4684 * conversion but no conversion function is faulted 4685 */ 4686 START_TEST(test_missing_encoding_conversion_fn) { 4687 const char *text = "<?xml version='1.0' encoding='no-conv'?>\n" 4688 "<doc>\x81</doc>"; 4689 4690 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4691 /* MiscEncodingHandler sets up an encoding with every top-bit-set 4692 * character introducing a two-byte sequence. For this, it 4693 * requires a convert function. The above function call doesn't 4694 * pass one through, so when BadEncodingHandler actually gets 4695 * called it should supply an invalid encoding. 4696 */ 4697 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4698 "Encoding with missing convert() not faulted"); 4699 } 4700 END_TEST 4701 4702 START_TEST(test_failing_encoding_conversion_fn) { 4703 const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n" 4704 "<doc>\x81</doc>"; 4705 4706 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4707 /* BadEncodingHandler sets up an encoding with every top-bit-set 4708 * character introducing a two-byte sequence. For this, it 4709 * requires a convert function. The above function call passes 4710 * one that insists all possible sequences are invalid anyway. 4711 */ 4712 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4713 "Encoding with failing convert() not faulted"); 4714 } 4715 END_TEST 4716 4717 /* Test unknown encoding conversions */ 4718 START_TEST(test_unknown_encoding_success) { 4719 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4720 /* Equivalent to <eoc>Hello, world</eoc> */ 4721 "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>"; 4722 4723 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4724 run_character_check(text, XCS("Hello, world")); 4725 } 4726 END_TEST 4727 4728 /* Test bad name character in unknown encoding */ 4729 START_TEST(test_unknown_encoding_bad_name) { 4730 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4731 "<\xff\x64oc>Hello, world</\xff\x64oc>"; 4732 4733 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4734 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4735 "Bad name start in unknown encoding not faulted"); 4736 } 4737 END_TEST 4738 4739 /* Test bad mid-name character in unknown encoding */ 4740 START_TEST(test_unknown_encoding_bad_name_2) { 4741 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4742 "<d\xffoc>Hello, world</d\xffoc>"; 4743 4744 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4745 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4746 "Bad name in unknown encoding not faulted"); 4747 } 4748 END_TEST 4749 4750 /* Test element name that is long enough to fill the conversion buffer 4751 * in an unknown encoding, finishing with an encoded character. 4752 */ 4753 START_TEST(test_unknown_encoding_long_name_1) { 4754 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4755 "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>" 4756 "Hi" 4757 "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"; 4758 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop"); 4759 CharData storage; 4760 4761 CharData_Init(&storage); 4762 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4763 XML_SetStartElementHandler(g_parser, record_element_start_handler); 4764 XML_SetUserData(g_parser, &storage); 4765 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4766 == XML_STATUS_ERROR) 4767 xml_failure(g_parser); 4768 CharData_CheckXMLChars(&storage, expected); 4769 } 4770 END_TEST 4771 4772 /* Test element name that is long enough to fill the conversion buffer 4773 * in an unknown encoding, finishing with an simple character. 4774 */ 4775 START_TEST(test_unknown_encoding_long_name_2) { 4776 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4777 "<abcdefghabcdefghabcdefghijklmnop>" 4778 "Hi" 4779 "</abcdefghabcdefghabcdefghijklmnop>"; 4780 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop"); 4781 CharData storage; 4782 4783 CharData_Init(&storage); 4784 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4785 XML_SetStartElementHandler(g_parser, record_element_start_handler); 4786 XML_SetUserData(g_parser, &storage); 4787 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4788 == XML_STATUS_ERROR) 4789 xml_failure(g_parser); 4790 CharData_CheckXMLChars(&storage, expected); 4791 } 4792 END_TEST 4793 4794 START_TEST(test_invalid_unknown_encoding) { 4795 const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n" 4796 "<doc>Hello world</doc>"; 4797 4798 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4799 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4800 "Invalid unknown encoding not faulted"); 4801 } 4802 END_TEST 4803 4804 START_TEST(test_unknown_ascii_encoding_ok) { 4805 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n" 4806 "<doc>Hello, world</doc>"; 4807 4808 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4809 run_character_check(text, XCS("Hello, world")); 4810 } 4811 END_TEST 4812 4813 START_TEST(test_unknown_ascii_encoding_fail) { 4814 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n" 4815 "<doc>Hello, \x80 world</doc>"; 4816 4817 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4818 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4819 "Invalid character not faulted"); 4820 } 4821 END_TEST 4822 4823 START_TEST(test_unknown_encoding_invalid_length) { 4824 const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n" 4825 "<doc>Hello, world</doc>"; 4826 4827 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4828 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4829 "Invalid unknown encoding not faulted"); 4830 } 4831 END_TEST 4832 4833 START_TEST(test_unknown_encoding_invalid_topbit) { 4834 const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n" 4835 "<doc>Hello, world</doc>"; 4836 4837 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4838 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4839 "Invalid unknown encoding not faulted"); 4840 } 4841 END_TEST 4842 4843 START_TEST(test_unknown_encoding_invalid_surrogate) { 4844 const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n" 4845 "<doc>Hello, \x82 world</doc>"; 4846 4847 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4848 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4849 "Invalid unknown encoding not faulted"); 4850 } 4851 END_TEST 4852 4853 START_TEST(test_unknown_encoding_invalid_high) { 4854 const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n" 4855 "<doc>Hello, world</doc>"; 4856 4857 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4858 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4859 "Invalid unknown encoding not faulted"); 4860 } 4861 END_TEST 4862 4863 START_TEST(test_unknown_encoding_invalid_attr_value) { 4864 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4865 "<doc attr='\xff\x30'/>"; 4866 4867 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4868 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4869 "Invalid attribute valid not faulted"); 4870 } 4871 END_TEST 4872 4873 START_TEST(test_unknown_encoding_user_data_primary) { 4874 // This test is based on ideas contributed by Artiphishell Inc. 4875 const char *const text = "<?xml version='1.0' encoding='x-unk'?>\n" 4876 "<root />\n"; 4877 XML_Parser parser = XML_ParserCreate(NULL); 4878 XML_SetUnknownEncodingHandler(parser, 4879 user_data_checking_unknown_encoding_handler, 4880 (void *)(intptr_t)0xC0FFEE); 4881 4882 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 4883 == XML_STATUS_OK); 4884 4885 XML_ParserFree(parser); 4886 } 4887 END_TEST 4888 4889 START_TEST(test_unknown_encoding_user_data_secondary) { 4890 // This test is based on ideas contributed by Artiphishell Inc. 4891 const char *const text_main = "<!DOCTYPE r [\n" 4892 " <!ENTITY ext SYSTEM 'ext.ent'>\n" 4893 "]>\n" 4894 "<r>&ext;</r>\n"; 4895 const char *const text_external = "<?xml version='1.0' encoding='x-unk'?>\n" 4896 "<e>data</e>"; 4897 ExtTest2 test_data = {text_external, (int)strlen(text_external), NULL, NULL}; 4898 XML_Parser parser = XML_ParserCreate(NULL); 4899 XML_SetExternalEntityRefHandler(parser, external_entity_loader2); 4900 XML_SetUnknownEncodingHandler(parser, 4901 user_data_checking_unknown_encoding_handler, 4902 (void *)(intptr_t)0xC0FFEE); 4903 XML_SetUserData(parser, &test_data); 4904 4905 assert_true(_XML_Parse_SINGLE_BYTES(parser, text_main, (int)strlen(text_main), 4906 XML_TRUE) 4907 == XML_STATUS_OK); 4908 4909 XML_ParserFree(parser); 4910 } 4911 END_TEST 4912 4913 /* Test an external entity parser set to use latin-1 detects UTF-16 4914 * BOMs correctly. 4915 */ 4916 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */ 4917 START_TEST(test_ext_entity_latin1_utf16le_bom) { 4918 const char *text = "<!DOCTYPE doc [\n" 4919 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4920 "]>\n" 4921 "<doc>&en;</doc>"; 4922 ExtTest2 test_data 4923 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4924 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4925 * 0x4c = L and 0x20 is a space 4926 */ 4927 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL}; 4928 #ifdef XML_UNICODE 4929 const XML_Char *expected = XCS("\x00ff\x00feL "); 4930 #else 4931 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4932 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL "); 4933 #endif 4934 CharData storage; 4935 4936 CharData_Init(&storage); 4937 test_data.storage = &storage; 4938 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4939 XML_SetUserData(g_parser, &test_data); 4940 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4941 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4942 == XML_STATUS_ERROR) 4943 xml_failure(g_parser); 4944 CharData_CheckXMLChars(&storage, expected); 4945 } 4946 END_TEST 4947 4948 START_TEST(test_ext_entity_latin1_utf16be_bom) { 4949 const char *text = "<!DOCTYPE doc [\n" 4950 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4951 "]>\n" 4952 "<doc>&en;</doc>"; 4953 ExtTest2 test_data 4954 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4955 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4956 * 0x4c = L and 0x20 is a space 4957 */ 4958 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL}; 4959 #ifdef XML_UNICODE 4960 const XML_Char *expected = XCS("\x00fe\x00ff L"); 4961 #else 4962 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4963 const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L"); 4964 #endif 4965 CharData storage; 4966 4967 CharData_Init(&storage); 4968 test_data.storage = &storage; 4969 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4970 XML_SetUserData(g_parser, &test_data); 4971 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4972 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4973 == XML_STATUS_ERROR) 4974 xml_failure(g_parser); 4975 CharData_CheckXMLChars(&storage, expected); 4976 } 4977 END_TEST 4978 4979 /* Parsing the full buffer rather than a byte at a time makes a 4980 * difference to the encoding scanning code, so repeat the above tests 4981 * without breaking them down by byte. 4982 */ 4983 START_TEST(test_ext_entity_latin1_utf16le_bom2) { 4984 const char *text = "<!DOCTYPE doc [\n" 4985 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4986 "]>\n" 4987 "<doc>&en;</doc>"; 4988 ExtTest2 test_data 4989 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4990 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4991 * 0x4c = L and 0x20 is a space 4992 */ 4993 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL}; 4994 #ifdef XML_UNICODE 4995 const XML_Char *expected = XCS("\x00ff\x00feL "); 4996 #else 4997 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4998 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL "); 4999 #endif 5000 CharData storage; 5001 5002 CharData_Init(&storage); 5003 test_data.storage = &storage; 5004 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5005 XML_SetUserData(g_parser, &test_data); 5006 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5007 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5008 == XML_STATUS_ERROR) 5009 xml_failure(g_parser); 5010 CharData_CheckXMLChars(&storage, expected); 5011 } 5012 END_TEST 5013 5014 START_TEST(test_ext_entity_latin1_utf16be_bom2) { 5015 const char *text = "<!DOCTYPE doc [\n" 5016 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 5017 "]>\n" 5018 "<doc>&en;</doc>"; 5019 ExtTest2 test_data 5020 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 5021 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 5022 * 0x4c = L and 0x20 is a space 5023 */ 5024 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL}; 5025 #ifdef XML_UNICODE 5026 const XML_Char *expected = XCS("\x00fe\x00ff L"); 5027 #else 5028 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 5029 const XML_Char *expected = "\xc3\xbe\xc3\xbf L"; 5030 #endif 5031 CharData storage; 5032 5033 CharData_Init(&storage); 5034 test_data.storage = &storage; 5035 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5036 XML_SetUserData(g_parser, &test_data); 5037 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5038 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5039 == XML_STATUS_ERROR) 5040 xml_failure(g_parser); 5041 CharData_CheckXMLChars(&storage, expected); 5042 } 5043 END_TEST 5044 5045 /* Test little-endian UTF-16 given an explicit big-endian encoding */ 5046 START_TEST(test_ext_entity_utf16_be) { 5047 const char *text = "<!DOCTYPE doc [\n" 5048 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 5049 "]>\n" 5050 "<doc>&en;</doc>"; 5051 ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL}; 5052 #ifdef XML_UNICODE 5053 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00"); 5054 #else 5055 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */ 5056 "\xe6\x94\x80" /* U+6500 */ 5057 "\xe2\xbc\x80" /* U+2F00 */ 5058 "\xe3\xb8\x80"); /* U+3E00 */ 5059 #endif 5060 CharData storage; 5061 5062 CharData_Init(&storage); 5063 test_data.storage = &storage; 5064 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5065 XML_SetUserData(g_parser, &test_data); 5066 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5067 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5068 == XML_STATUS_ERROR) 5069 xml_failure(g_parser); 5070 CharData_CheckXMLChars(&storage, expected); 5071 } 5072 END_TEST 5073 5074 /* Test big-endian UTF-16 given an explicit little-endian encoding */ 5075 START_TEST(test_ext_entity_utf16_le) { 5076 const char *text = "<!DOCTYPE doc [\n" 5077 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 5078 "]>\n" 5079 "<doc>&en;</doc>"; 5080 ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL}; 5081 #ifdef XML_UNICODE 5082 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00"); 5083 #else 5084 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */ 5085 "\xe6\x94\x80" /* U+6500 */ 5086 "\xe2\xbc\x80" /* U+2F00 */ 5087 "\xe3\xb8\x80"); /* U+3E00 */ 5088 #endif 5089 CharData storage; 5090 5091 CharData_Init(&storage); 5092 test_data.storage = &storage; 5093 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5094 XML_SetUserData(g_parser, &test_data); 5095 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5096 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5097 == XML_STATUS_ERROR) 5098 xml_failure(g_parser); 5099 CharData_CheckXMLChars(&storage, expected); 5100 } 5101 END_TEST 5102 5103 /* Test little-endian UTF-16 given no explicit encoding. 5104 * The existing default encoding (UTF-8) is assumed to hold without a 5105 * BOM to contradict it, so the entity value will in fact provoke an 5106 * error because 0x00 is not a valid XML character. We parse the 5107 * whole buffer in one go rather than feeding it in byte by byte to 5108 * exercise different code paths in the initial scanning routines. 5109 */ 5110 START_TEST(test_ext_entity_utf16_unknown) { 5111 const char *text = "<!DOCTYPE doc [\n" 5112 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 5113 "]>\n" 5114 "<doc>&en;</doc>"; 5115 ExtFaults2 test_data 5116 = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL, 5117 XML_ERROR_INVALID_TOKEN}; 5118 5119 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2); 5120 XML_SetUserData(g_parser, &test_data); 5121 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 5122 "Invalid character should not have been accepted"); 5123 } 5124 END_TEST 5125 5126 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */ 5127 START_TEST(test_ext_entity_utf8_non_bom) { 5128 const char *text = "<!DOCTYPE doc [\n" 5129 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 5130 "]>\n" 5131 "<doc>&en;</doc>"; 5132 ExtTest2 test_data 5133 = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */ 5134 3, NULL, NULL}; 5135 #ifdef XML_UNICODE 5136 const XML_Char *expected = XCS("\xfec0"); 5137 #else 5138 const XML_Char *expected = XCS("\xef\xbb\x80"); 5139 #endif 5140 CharData storage; 5141 5142 CharData_Init(&storage); 5143 test_data.storage = &storage; 5144 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5145 XML_SetUserData(g_parser, &test_data); 5146 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5147 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5148 == XML_STATUS_ERROR) 5149 xml_failure(g_parser); 5150 CharData_CheckXMLChars(&storage, expected); 5151 } 5152 END_TEST 5153 5154 /* Test that UTF-8 in a CDATA section is correctly passed through */ 5155 START_TEST(test_utf8_in_cdata_section) { 5156 const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>"; 5157 #ifdef XML_UNICODE 5158 const XML_Char *expected = XCS("one \x00e9 two"); 5159 #else 5160 const XML_Char *expected = XCS("one \xc3\xa9 two"); 5161 #endif 5162 5163 run_character_check(text, expected); 5164 } 5165 END_TEST 5166 5167 /* Test that little-endian UTF-16 in a CDATA section is handled */ 5168 START_TEST(test_utf8_in_cdata_section_2) { 5169 const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>"; 5170 #ifdef XML_UNICODE 5171 const XML_Char *expected = XCS("\x00e9]\x00e9two"); 5172 #else 5173 const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two"); 5174 #endif 5175 5176 run_character_check(text, expected); 5177 } 5178 END_TEST 5179 5180 START_TEST(test_utf8_in_start_tags) { 5181 struct test_case { 5182 bool goodName; 5183 bool goodNameStart; 5184 const char *tagName; 5185 }; 5186 5187 // The idea with the tests below is this: 5188 // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences 5189 // go to isNever and are hence not a concern. 5190 // 5191 // We start with a character that is a valid name character 5192 // (or even name-start character, see XML 1.0r4 spec) and then we flip 5193 // single bits at places where (1) the result leaves the UTF-8 encoding space 5194 // and (2) we stay in the same n-byte sequence family. 5195 // 5196 // The flipped bits are highlighted in angle brackets in comments, 5197 // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped 5198 // the most significant bit to 1 to leave UTF-8 encoding space. 5199 struct test_case cases[] = { 5200 // 1-byte UTF-8: [0xxx xxxx] 5201 {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':' 5202 {false, false, "\xBA"}, // [<1>011 1010] 5203 {true, false, "\x39"}, // [0011 1001] = ASCII nine '9' 5204 {false, false, "\xB9"}, // [<1>011 1001] 5205 5206 // 2-byte UTF-8: [110x xxxx] [10xx xxxx] 5207 {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] = 5208 // Arabic small waw U+06E5 5209 {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101] 5210 {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101] 5211 {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101] 5212 {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] = 5213 // combining char U+0301 5214 {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001] 5215 {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001] 5216 {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001] 5217 5218 // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx] 5219 {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] = 5220 // Devanagari Letter A U+0905 5221 {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101] 5222 {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101] 5223 {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101] 5224 {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101] 5225 {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101] 5226 {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] = 5227 // combining char U+0901 5228 {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001] 5229 {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001] 5230 {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001] 5231 {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001] 5232 {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001] 5233 }; 5234 const bool atNameStart[] = {true, false}; 5235 5236 size_t i = 0; 5237 char doc[1024]; 5238 size_t failCount = 0; 5239 5240 // we need all the bytes to be parsed, but we don't want the errors that can 5241 // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on. 5242 if (g_reparseDeferralEnabledDefault) { 5243 return; 5244 } 5245 5246 for (; i < sizeof(cases) / sizeof(cases[0]); i++) { 5247 size_t j = 0; 5248 for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { 5249 const bool expectedSuccess 5250 = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName; 5251 snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a", 5252 cases[i].tagName); 5253 XML_Parser parser = XML_ParserCreate(NULL); 5254 5255 const enum XML_Status status = _XML_Parse_SINGLE_BYTES( 5256 parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE); 5257 5258 bool success = true; 5259 if ((status == XML_STATUS_OK) != expectedSuccess) { 5260 success = false; 5261 } 5262 if ((status == XML_STATUS_ERROR) 5263 && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) { 5264 success = false; 5265 } 5266 5267 if (! success) { 5268 fprintf( 5269 stderr, 5270 "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n", 5271 (unsigned)i + 1u, atNameStart[j] ? " " : "not ", 5272 (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser)); 5273 failCount++; 5274 } 5275 5276 XML_ParserFree(parser); 5277 } 5278 } 5279 5280 if (failCount > 0) { 5281 fail("UTF-8 regression detected"); 5282 } 5283 } 5284 END_TEST 5285 5286 /* Test trailing spaces in elements are accepted */ 5287 START_TEST(test_trailing_spaces_in_elements) { 5288 const char *text = "<doc >Hi</doc >"; 5289 const XML_Char *expected = XCS("doc/doc"); 5290 CharData storage; 5291 5292 CharData_Init(&storage); 5293 XML_SetElementHandler(g_parser, record_element_start_handler, 5294 record_element_end_handler); 5295 XML_SetUserData(g_parser, &storage); 5296 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5297 == XML_STATUS_ERROR) 5298 xml_failure(g_parser); 5299 CharData_CheckXMLChars(&storage, expected); 5300 } 5301 END_TEST 5302 5303 START_TEST(test_utf16_attribute) { 5304 const char text[] = 5305 /* <d {KHO KHWAI}{CHO CHAN}='a'/> 5306 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 5307 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 5308 */ 5309 "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0"; 5310 const XML_Char *expected = XCS("a"); 5311 CharData storage; 5312 5313 CharData_Init(&storage); 5314 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5315 XML_SetUserData(g_parser, &storage); 5316 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5317 == XML_STATUS_ERROR) 5318 xml_failure(g_parser); 5319 CharData_CheckXMLChars(&storage, expected); 5320 } 5321 END_TEST 5322 5323 START_TEST(test_utf16_second_attr) { 5324 /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/> 5325 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 5326 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 5327 */ 5328 const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0" 5329 "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0"; 5330 const XML_Char *expected = XCS("1"); 5331 CharData storage; 5332 5333 CharData_Init(&storage); 5334 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5335 XML_SetUserData(g_parser, &storage); 5336 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5337 == XML_STATUS_ERROR) 5338 xml_failure(g_parser); 5339 CharData_CheckXMLChars(&storage, expected); 5340 } 5341 END_TEST 5342 5343 START_TEST(test_attr_after_solidus) { 5344 const char *text = "<doc attr1='a' / attr2='b'>"; 5345 5346 expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted"); 5347 } 5348 END_TEST 5349 5350 START_TEST(test_utf16_pe) { 5351 /* <!DOCTYPE doc [ 5352 * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'> 5353 * %{KHO KHWAI}{CHO CHAN}; 5354 * ]> 5355 * <doc></doc> 5356 * 5357 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 5358 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 5359 */ 5360 const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" 5361 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 " 5362 "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 " 5363 "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n" 5364 "\0%\x0e\x04\x0e\x08\0;\0\n" 5365 "\0]\0>\0\n" 5366 "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>"; 5367 #ifdef XML_UNICODE 5368 const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n"); 5369 #else 5370 const XML_Char *expected 5371 = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n"); 5372 #endif 5373 CharData storage; 5374 5375 CharData_Init(&storage); 5376 XML_SetUserData(g_parser, &storage); 5377 XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl); 5378 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5379 == XML_STATUS_ERROR) 5380 xml_failure(g_parser); 5381 CharData_CheckXMLChars(&storage, expected); 5382 } 5383 END_TEST 5384 5385 /* Test that duff attribute description keywords are rejected */ 5386 START_TEST(test_bad_attr_desc_keyword) { 5387 const char *text = "<!DOCTYPE doc [\n" 5388 " <!ATTLIST doc attr CDATA #!IMPLIED>\n" 5389 "]>\n" 5390 "<doc />"; 5391 5392 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5393 "Bad keyword !IMPLIED not faulted"); 5394 } 5395 END_TEST 5396 5397 /* Test that an invalid attribute description keyword consisting of 5398 * UTF-16 characters with their top bytes non-zero are correctly 5399 * faulted 5400 */ 5401 START_TEST(test_bad_attr_desc_keyword_utf16) { 5402 /* <!DOCTYPE d [ 5403 * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}> 5404 * ]><d/> 5405 * 5406 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 5407 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 5408 */ 5409 const char text[] 5410 = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" 5411 "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 " 5412 "\0#\x0e\x04\x0e\x08\0>\0\n" 5413 "\0]\0>\0<\0d\0/\0>"; 5414 5415 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5416 != XML_STATUS_ERROR) 5417 fail("Invalid UTF16 attribute keyword not faulted"); 5418 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX) 5419 xml_failure(g_parser); 5420 } 5421 END_TEST 5422 5423 /* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this 5424 * using prefix-encoding (see above) to trigger specific code paths 5425 */ 5426 START_TEST(test_bad_doctype) { 5427 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 5428 "<!DOCTYPE doc [ \x80\x44 ]><doc/>"; 5429 5430 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 5431 expect_failure(text, XML_ERROR_SYNTAX, 5432 "Invalid bytes in DOCTYPE not faulted"); 5433 } 5434 END_TEST 5435 5436 START_TEST(test_bad_doctype_utf8) { 5437 const char *text = "<!DOCTYPE \xDB\x25" 5438 "doc><doc/>"; // [1101 1011] [<0>010 0101] 5439 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5440 "Invalid UTF-8 in DOCTYPE not faulted"); 5441 } 5442 END_TEST 5443 5444 START_TEST(test_bad_doctype_utf16) { 5445 const char text[] = 5446 /* <!DOCTYPE doc [ \x06f2 ]><doc/> 5447 * 5448 * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number 5449 * (name character) but not a valid letter (name start character) 5450 */ 5451 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 " 5452 "\x06\xf2" 5453 "\0 \0]\0>\0<\0d\0o\0c\0/\0>"; 5454 5455 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5456 != XML_STATUS_ERROR) 5457 fail("Invalid bytes in DOCTYPE not faulted"); 5458 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX) 5459 xml_failure(g_parser); 5460 } 5461 END_TEST 5462 5463 START_TEST(test_bad_doctype_plus) { 5464 const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n" 5465 "<1+>&foo;</1+>"; 5466 5467 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5468 "'+' in document name not faulted"); 5469 } 5470 END_TEST 5471 5472 START_TEST(test_bad_doctype_star) { 5473 const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n" 5474 "<1*>&foo;</1*>"; 5475 5476 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5477 "'*' in document name not faulted"); 5478 } 5479 END_TEST 5480 5481 START_TEST(test_bad_doctype_query) { 5482 const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n" 5483 "<1?>&foo;</1?>"; 5484 5485 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5486 "'?' in document name not faulted"); 5487 } 5488 END_TEST 5489 5490 START_TEST(test_unknown_encoding_bad_ignore) { 5491 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>" 5492 "<!DOCTYPE doc SYSTEM 'foo'>" 5493 "<doc><e>&entity;</e></doc>"; 5494 ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>", 5495 "Invalid character not faulted", XCS("prefix-conv"), 5496 XML_ERROR_INVALID_TOKEN}; 5497 5498 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 5499 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5500 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 5501 XML_SetUserData(g_parser, &fault); 5502 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 5503 "Bad IGNORE section with unknown encoding not failed"); 5504 } 5505 END_TEST 5506 5507 START_TEST(test_entity_in_utf16_be_attr) { 5508 const char text[] = 5509 /* <e a='ä ä'></e> */ 5510 "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 " 5511 "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>"; 5512 #ifdef XML_UNICODE 5513 const XML_Char *expected = XCS("\x00e4 \x00e4"); 5514 #else 5515 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4"); 5516 #endif 5517 CharData storage; 5518 5519 CharData_Init(&storage); 5520 XML_SetUserData(g_parser, &storage); 5521 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5522 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5523 == XML_STATUS_ERROR) 5524 xml_failure(g_parser); 5525 CharData_CheckXMLChars(&storage, expected); 5526 } 5527 END_TEST 5528 5529 START_TEST(test_entity_in_utf16_le_attr) { 5530 const char text[] = 5531 /* <e a='ä ä'></e> */ 5532 "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0" 5533 "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0"; 5534 #ifdef XML_UNICODE 5535 const XML_Char *expected = XCS("\x00e4 \x00e4"); 5536 #else 5537 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4"); 5538 #endif 5539 CharData storage; 5540 5541 CharData_Init(&storage); 5542 XML_SetUserData(g_parser, &storage); 5543 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5544 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5545 == XML_STATUS_ERROR) 5546 xml_failure(g_parser); 5547 CharData_CheckXMLChars(&storage, expected); 5548 } 5549 END_TEST 5550 5551 START_TEST(test_entity_public_utf16_be) { 5552 const char text[] = 5553 /* <!DOCTYPE d [ */ 5554 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" 5555 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */ 5556 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 " 5557 "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n" 5558 /* %e; */ 5559 "\0%\0e\0;\0\n" 5560 /* ]> */ 5561 "\0]\0>\0\n" 5562 /* <d>&j;</d> */ 5563 "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>"; 5564 ExtTest2 test_data 5565 = {/* <!ENTITY j 'baz'> */ 5566 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL}; 5567 const XML_Char *expected = XCS("baz"); 5568 CharData storage; 5569 5570 CharData_Init(&storage); 5571 test_data.storage = &storage; 5572 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5573 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5574 XML_SetUserData(g_parser, &test_data); 5575 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5576 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5577 == XML_STATUS_ERROR) 5578 xml_failure(g_parser); 5579 CharData_CheckXMLChars(&storage, expected); 5580 } 5581 END_TEST 5582 5583 START_TEST(test_entity_public_utf16_le) { 5584 const char text[] = 5585 /* <!DOCTYPE d [ */ 5586 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0" 5587 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */ 5588 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0" 5589 "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0" 5590 /* %e; */ 5591 "%\0e\0;\0\n\0" 5592 /* ]> */ 5593 "]\0>\0\n\0" 5594 /* <d>&j;</d> */ 5595 "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0"; 5596 ExtTest2 test_data 5597 = {/* <!ENTITY j 'baz'> */ 5598 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL}; 5599 const XML_Char *expected = XCS("baz"); 5600 CharData storage; 5601 5602 CharData_Init(&storage); 5603 test_data.storage = &storage; 5604 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5605 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5606 XML_SetUserData(g_parser, &test_data); 5607 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5608 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5609 == XML_STATUS_ERROR) 5610 xml_failure(g_parser); 5611 CharData_CheckXMLChars(&storage, expected); 5612 } 5613 END_TEST 5614 5615 /* Test that a doctype with neither an internal nor external subset is 5616 * faulted 5617 */ 5618 START_TEST(test_short_doctype) { 5619 const char *text = "<!DOCTYPE doc></doc>"; 5620 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5621 "DOCTYPE without subset not rejected"); 5622 } 5623 END_TEST 5624 5625 START_TEST(test_short_doctype_2) { 5626 const char *text = "<!DOCTYPE doc PUBLIC></doc>"; 5627 expect_failure(text, XML_ERROR_SYNTAX, 5628 "DOCTYPE without Public ID not rejected"); 5629 } 5630 END_TEST 5631 5632 START_TEST(test_short_doctype_3) { 5633 const char *text = "<!DOCTYPE doc SYSTEM></doc>"; 5634 expect_failure(text, XML_ERROR_SYNTAX, 5635 "DOCTYPE without System ID not rejected"); 5636 } 5637 END_TEST 5638 5639 START_TEST(test_long_doctype) { 5640 const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>"; 5641 expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected"); 5642 } 5643 END_TEST 5644 5645 START_TEST(test_bad_entity) { 5646 const char *text = "<!DOCTYPE doc [\n" 5647 " <!ENTITY foo PUBLIC>\n" 5648 "]>\n" 5649 "<doc/>"; 5650 expect_failure(text, XML_ERROR_SYNTAX, 5651 "ENTITY without Public ID is not rejected"); 5652 } 5653 END_TEST 5654 5655 /* Test unquoted value is faulted */ 5656 START_TEST(test_bad_entity_2) { 5657 const char *text = "<!DOCTYPE doc [\n" 5658 " <!ENTITY % foo bar>\n" 5659 "]>\n" 5660 "<doc/>"; 5661 expect_failure(text, XML_ERROR_SYNTAX, 5662 "ENTITY without Public ID is not rejected"); 5663 } 5664 END_TEST 5665 5666 START_TEST(test_bad_entity_3) { 5667 const char *text = "<!DOCTYPE doc [\n" 5668 " <!ENTITY % foo PUBLIC>\n" 5669 "]>\n" 5670 "<doc/>"; 5671 expect_failure(text, XML_ERROR_SYNTAX, 5672 "Parameter ENTITY without Public ID is not rejected"); 5673 } 5674 END_TEST 5675 5676 START_TEST(test_bad_entity_4) { 5677 const char *text = "<!DOCTYPE doc [\n" 5678 " <!ENTITY % foo SYSTEM>\n" 5679 "]>\n" 5680 "<doc/>"; 5681 expect_failure(text, XML_ERROR_SYNTAX, 5682 "Parameter ENTITY without Public ID is not rejected"); 5683 } 5684 END_TEST 5685 5686 START_TEST(test_bad_notation) { 5687 const char *text = "<!DOCTYPE doc [\n" 5688 " <!NOTATION n SYSTEM>\n" 5689 "]>\n" 5690 "<doc/>"; 5691 expect_failure(text, XML_ERROR_SYNTAX, 5692 "Notation without System ID is not rejected"); 5693 } 5694 END_TEST 5695 5696 /* Test for issue #11, wrongly suppressed default handler */ 5697 START_TEST(test_default_doctype_handler) { 5698 const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n" 5699 " <!ENTITY foo 'bar'>\n" 5700 "]>\n" 5701 "<doc>&foo;</doc>"; 5702 DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE}, 5703 {XCS("'test.dtd'"), 10, XML_FALSE}, 5704 {NULL, 0, XML_FALSE}}; 5705 int i; 5706 5707 XML_SetUserData(g_parser, &test_data); 5708 XML_SetDefaultHandler(g_parser, checking_default_handler); 5709 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 5710 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5711 == XML_STATUS_ERROR) 5712 xml_failure(g_parser); 5713 for (i = 0; test_data[i].expected != NULL; i++) 5714 if (! test_data[i].seen) 5715 fail("Default handler not run for public !DOCTYPE"); 5716 } 5717 END_TEST 5718 5719 START_TEST(test_empty_element_abort) { 5720 const char *text = "<abort/>"; 5721 5722 XML_SetStartElementHandler(g_parser, start_element_suspender); 5723 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5724 != XML_STATUS_ERROR) 5725 fail("Expected to error on abort"); 5726 } 5727 END_TEST 5728 5729 /* Regression test for GH issue #612: unfinished m_declAttributeType 5730 * allocation in ->m_tempPool can corrupt following allocation. 5731 */ 5732 START_TEST(test_pool_integrity_with_unfinished_attr) { 5733 const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n" 5734 "<!DOCTYPE foo [\n" 5735 "<!ELEMENT foo ANY>\n" 5736 "<!ENTITY % entp SYSTEM \"external.dtd\">\n" 5737 "%entp;\n" 5738 "]>\n" 5739 "<a></a>\n"; 5740 const XML_Char *expected = XCS("COMMENT"); 5741 CharData storage; 5742 5743 CharData_Init(&storage); 5744 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5745 XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist); 5746 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 5747 XML_SetCommentHandler(g_parser, accumulate_comment); 5748 XML_SetUserData(g_parser, &storage); 5749 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5750 == XML_STATUS_ERROR) 5751 xml_failure(g_parser); 5752 CharData_CheckXMLChars(&storage, expected); 5753 } 5754 END_TEST 5755 5756 /* Test a possible early return location in internalEntityProcessor */ 5757 START_TEST(test_entity_ref_no_elements) { 5758 const char *const text = "<!DOCTYPE foo [\n" 5759 "<!ENTITY e1 \"test\">\n" 5760 "]> <foo>&e1;"; // intentionally missing newline 5761 5762 XML_Parser parser = XML_ParserCreate(NULL); 5763 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 5764 == XML_STATUS_ERROR); 5765 assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS); 5766 XML_ParserFree(parser); 5767 } 5768 END_TEST 5769 5770 /* Tests if chained entity references lead to unbounded recursion */ 5771 START_TEST(test_deep_nested_entity) { 5772 const size_t N_LINES = 60000; 5773 const size_t SIZE_PER_LINE = 50; 5774 5775 char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE); 5776 if (text == NULL) { 5777 fail("malloc failed"); 5778 } 5779 5780 char *textPtr = text; 5781 5782 // Create the XML 5783 textPtr += snprintf(textPtr, SIZE_PER_LINE, 5784 "<!DOCTYPE foo [\n" 5785 " <!ENTITY s0 'deepText'>\n"); 5786 5787 for (size_t i = 1; i < N_LINES; ++i) { 5788 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n", 5789 (long unsigned)i, (long unsigned)(i - 1)); 5790 } 5791 5792 snprintf(textPtr, SIZE_PER_LINE, "]> <foo>&s%lu;</foo>\n", 5793 (long unsigned)(N_LINES - 1)); 5794 5795 const XML_Char *const expected = XCS("deepText"); 5796 5797 CharData storage; 5798 CharData_Init(&storage); 5799 5800 XML_Parser parser = XML_ParserCreate(NULL); 5801 5802 XML_SetCharacterDataHandler(parser, accumulate_characters); 5803 XML_SetUserData(parser, &storage); 5804 5805 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 5806 == XML_STATUS_ERROR) 5807 xml_failure(parser); 5808 5809 CharData_CheckXMLChars(&storage, expected); 5810 XML_ParserFree(parser); 5811 free(text); 5812 } 5813 END_TEST 5814 5815 /* Tests if chained entity references in attributes 5816 lead to unbounded recursion */ 5817 START_TEST(test_deep_nested_attribute_entity) { 5818 const size_t N_LINES = 60000; 5819 const size_t SIZE_PER_LINE = 100; 5820 5821 char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE); 5822 if (text == NULL) { 5823 fail("malloc failed"); 5824 } 5825 5826 char *textPtr = text; 5827 5828 // Create the XML 5829 textPtr += snprintf(textPtr, SIZE_PER_LINE, 5830 "<!DOCTYPE foo [\n" 5831 " <!ENTITY s0 'deepText'>\n"); 5832 5833 for (size_t i = 1; i < N_LINES; ++i) { 5834 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n", 5835 (long unsigned)i, (long unsigned)(i - 1)); 5836 } 5837 5838 snprintf(textPtr, SIZE_PER_LINE, "]> <foo name='&s%lu;'>mainText</foo>\n", 5839 (long unsigned)(N_LINES - 1)); 5840 5841 AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}}; 5842 ElementInfo info[] 5843 = {{XCS("foo"), 1, 0, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}}; 5844 5845 XML_Parser parser = XML_ParserCreate(NULL); 5846 ParserAndElementInfo parserPlusElemenInfo = {parser, info}; 5847 5848 XML_SetStartElementHandler(parser, counting_start_element_handler); 5849 XML_SetUserData(parser, &parserPlusElemenInfo); 5850 5851 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 5852 == XML_STATUS_ERROR) 5853 xml_failure(parser); 5854 5855 XML_ParserFree(parser); 5856 free(text); 5857 } 5858 END_TEST 5859 5860 START_TEST(test_deep_nested_entity_delayed_interpretation) { 5861 const size_t N_LINES = 70000; 5862 const size_t SIZE_PER_LINE = 100; 5863 5864 char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE); 5865 if (text == NULL) { 5866 fail("malloc failed"); 5867 } 5868 5869 char *textPtr = text; 5870 5871 // Create the XML 5872 textPtr += snprintf(textPtr, SIZE_PER_LINE, 5873 "<!DOCTYPE foo [\n" 5874 " <!ENTITY %% s0 'deepText'>\n"); 5875 5876 for (size_t i = 1; i < N_LINES; ++i) { 5877 textPtr += snprintf(textPtr, SIZE_PER_LINE, 5878 " <!ENTITY %% s%lu '%s%lu;'>\n", (long unsigned)i, 5879 (long unsigned)(i - 1)); 5880 } 5881 5882 snprintf(textPtr, SIZE_PER_LINE, 5883 " <!ENTITY %% define_g \"<!ENTITY g '%s%lu;'>\">\n" 5884 " %%define_g;\n" 5885 "]>\n" 5886 "<foo/>\n", 5887 (long unsigned)(N_LINES - 1)); 5888 5889 XML_Parser parser = XML_ParserCreate(NULL); 5890 5891 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5892 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 5893 == XML_STATUS_ERROR) 5894 xml_failure(parser); 5895 5896 XML_ParserFree(parser); 5897 free(text); 5898 } 5899 END_TEST 5900 5901 START_TEST(test_nested_entity_suspend) { 5902 const char *const text = "<!DOCTYPE a [\n" 5903 " <!ENTITY e1 '<!--e1-->'>\n" 5904 " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n" 5905 " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n" 5906 "]>\n" 5907 "<a><!--start-->&e3;<!--end--></a>"; 5908 const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head") 5909 XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end"); 5910 CharData storage; 5911 CharData_Init(&storage); 5912 XML_Parser parser = XML_ParserCreate(NULL); 5913 ParserPlusStorage parserPlusStorage = {parser, &storage}; 5914 5915 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5916 XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler); 5917 XML_SetUserData(parser, &parserPlusStorage); 5918 5919 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE); 5920 while (status == XML_STATUS_SUSPENDED) { 5921 status = XML_ResumeParser(parser); 5922 } 5923 if (status != XML_STATUS_OK) 5924 xml_failure(parser); 5925 5926 CharData_CheckXMLChars(&storage, expected); 5927 XML_ParserFree(parser); 5928 } 5929 END_TEST 5930 5931 START_TEST(test_nested_entity_suspend_2) { 5932 const char *const text = "<!DOCTYPE doc [\n" 5933 " <!ENTITY ge1 'head1Ztail1'>\n" 5934 " <!ENTITY ge2 'head2&ge1;tail2'>\n" 5935 " <!ENTITY ge3 'head3&ge2;tail3'>\n" 5936 "]>\n" 5937 "<doc>&ge3;</doc>"; 5938 const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1") 5939 XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3"); 5940 CharData storage; 5941 CharData_Init(&storage); 5942 XML_Parser parser = XML_ParserCreate(NULL); 5943 ParserPlusStorage parserPlusStorage = {parser, &storage}; 5944 5945 XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend); 5946 XML_SetUserData(parser, &parserPlusStorage); 5947 5948 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE); 5949 while (status == XML_STATUS_SUSPENDED) { 5950 status = XML_ResumeParser(parser); 5951 } 5952 if (status != XML_STATUS_OK) 5953 xml_failure(parser); 5954 5955 CharData_CheckXMLChars(&storage, expected); 5956 XML_ParserFree(parser); 5957 } 5958 END_TEST 5959 5960 /* Regression test for quadratic parsing on large tokens */ 5961 START_TEST(test_big_tokens_scale_linearly) { 5962 const struct { 5963 const char *pre; 5964 const char *post; 5965 } text[] = { 5966 {"<a>", "</a>"}, // assumed good, used as baseline 5967 {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch 5968 {"<c attr='", "'></c>"}, // big attribute, used to be O(N²) 5969 {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²) 5970 {"<e><", "/></e>"}, // big elem name, used to be O(N²) 5971 }; 5972 const int num_cases = sizeof(text) / sizeof(text[0]); 5973 char aaaaaa[4096]; 5974 const int fillsize = (int)sizeof(aaaaaa); 5975 const int fillcount = 100; 5976 const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post. 5977 const unsigned max_factor = 4; 5978 const unsigned max_scanned = max_factor * approx_bytes; 5979 5980 memset(aaaaaa, 'a', fillsize); 5981 5982 if (! g_reparseDeferralEnabledDefault) { 5983 return; // heuristic is disabled; we would get O(n^2) and fail. 5984 } 5985 5986 for (int i = 0; i < num_cases; ++i) { 5987 XML_Parser parser = XML_ParserCreate(NULL); 5988 assert_true(parser != NULL); 5989 enum XML_Status status; 5990 set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post); 5991 5992 // parse the start text 5993 g_bytesScanned = 0; 5994 status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre, 5995 (int)strlen(text[i].pre), XML_FALSE); 5996 if (status != XML_STATUS_OK) { 5997 xml_failure(parser); 5998 } 5999 6000 // parse lots of 'a', failing the test early if it takes too long 6001 unsigned past_max_count = 0; 6002 for (int f = 0; f < fillcount; ++f) { 6003 status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE); 6004 if (status != XML_STATUS_OK) { 6005 xml_failure(parser); 6006 } 6007 if (g_bytesScanned > max_scanned) { 6008 // We're not done, and have already passed the limit -- the test will 6009 // definitely fail. This block allows us to save time by failing early. 6010 const unsigned pushed 6011 = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize; 6012 fprintf( 6013 stderr, 6014 "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", 6015 f + 1, fillcount, pushed, g_bytesScanned, 6016 g_bytesScanned / (double)pushed, max_scanned, max_factor); 6017 past_max_count++; 6018 // We are failing, but allow a few log prints first. If we don't reach 6019 // a count of five, the test will fail after the loop instead. 6020 assert_true(past_max_count < 5); 6021 } 6022 } 6023 6024 // parse the end text 6025 status = _XML_Parse_SINGLE_BYTES(parser, text[i].post, 6026 (int)strlen(text[i].post), XML_TRUE); 6027 if (status != XML_STATUS_OK) { 6028 xml_failure(parser); 6029 } 6030 6031 assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working 6032 if (g_bytesScanned > max_scanned) { 6033 fprintf( 6034 stderr, 6035 "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", 6036 g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned, 6037 max_factor); 6038 fail("scanned too many bytes"); 6039 } 6040 6041 XML_ParserFree(parser); 6042 } 6043 } 6044 END_TEST 6045 6046 START_TEST(test_set_reparse_deferral) { 6047 const char *const pre = "<d>"; 6048 const char *const start = "<x attr='"; 6049 const char *const end = "'></x>"; 6050 char eeeeee[100]; 6051 const int fillsize = (int)sizeof(eeeeee); 6052 memset(eeeeee, 'e', fillsize); 6053 6054 for (int enabled = 0; enabled <= 1; enabled += 1) { 6055 set_subtest("deferral=%d", enabled); 6056 6057 XML_Parser parser = XML_ParserCreate(NULL); 6058 assert_true(parser != NULL); 6059 assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); 6060 // pre-grow the buffer to avoid reparsing due to almost-fullness 6061 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); 6062 6063 CharData storage; 6064 CharData_Init(&storage); 6065 XML_SetUserData(parser, &storage); 6066 XML_SetStartElementHandler(parser, start_element_event_handler); 6067 6068 enum XML_Status status; 6069 // parse the start text 6070 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 6071 if (status != XML_STATUS_OK) { 6072 xml_failure(parser); 6073 } 6074 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done 6075 6076 // ..and the start of the token 6077 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); 6078 if (status != XML_STATUS_OK) { 6079 xml_failure(parser); 6080 } 6081 CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one 6082 6083 // try to parse lots of 'e', but the token isn't finished 6084 for (int c = 0; c < 100; ++c) { 6085 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 6086 if (status != XML_STATUS_OK) { 6087 xml_failure(parser); 6088 } 6089 } 6090 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one 6091 6092 // end the <x> token. 6093 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 6094 if (status != XML_STATUS_OK) { 6095 xml_failure(parser); 6096 } 6097 6098 if (enabled) { 6099 // In general, we may need to push more data to trigger a reparse attempt, 6100 // but in this test, the data is constructed to always require it. 6101 CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect 6102 // 2x the token length should suffice; the +1 covers the start and end. 6103 for (int c = 0; c < 101; ++c) { 6104 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 6105 if (status != XML_STATUS_OK) { 6106 xml_failure(parser); 6107 } 6108 } 6109 } 6110 CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done 6111 6112 XML_ParserFree(parser); 6113 } 6114 } 6115 END_TEST 6116 6117 struct element_decl_data { 6118 XML_Parser parser; 6119 int count; 6120 }; 6121 6122 static void 6123 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) { 6124 UNUSED_P(name); 6125 struct element_decl_data *testdata = (struct element_decl_data *)userData; 6126 testdata->count += 1; 6127 XML_FreeContentModel(testdata->parser, model); 6128 } 6129 6130 static int 6131 external_inherited_parser(XML_Parser p, const XML_Char *context, 6132 const XML_Char *base, const XML_Char *systemId, 6133 const XML_Char *publicId) { 6134 UNUSED_P(base); 6135 UNUSED_P(systemId); 6136 UNUSED_P(publicId); 6137 const char *const pre = "<!ELEMENT document ANY>\n"; 6138 const char *const start = "<!ELEMENT "; 6139 const char *const end = " ANY>\n"; 6140 const char *const post = "<!ELEMENT xyz ANY>\n"; 6141 const int enabled = *(int *)XML_GetUserData(p); 6142 char eeeeee[100]; 6143 char spaces[100]; 6144 const int fillsize = (int)sizeof(eeeeee); 6145 assert_true(fillsize == (int)sizeof(spaces)); 6146 memset(eeeeee, 'e', fillsize); 6147 memset(spaces, ' ', fillsize); 6148 6149 XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL); 6150 assert_true(parser != NULL); 6151 // pre-grow the buffer to avoid reparsing due to almost-fullness 6152 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); 6153 6154 struct element_decl_data testdata; 6155 testdata.parser = parser; 6156 testdata.count = 0; 6157 XML_SetUserData(parser, &testdata); 6158 XML_SetElementDeclHandler(parser, element_decl_counter); 6159 6160 enum XML_Status status; 6161 // parse the initial text 6162 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 6163 if (status != XML_STATUS_OK) { 6164 xml_failure(parser); 6165 } 6166 assert_true(testdata.count == 1); // first element should be done 6167 6168 // ..and the start of the big token 6169 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); 6170 if (status != XML_STATUS_OK) { 6171 xml_failure(parser); 6172 } 6173 assert_true(testdata.count == 1); // still just the first one 6174 6175 // try to parse lots of 'e', but the token isn't finished 6176 for (int c = 0; c < 100; ++c) { 6177 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 6178 if (status != XML_STATUS_OK) { 6179 xml_failure(parser); 6180 } 6181 } 6182 assert_true(testdata.count == 1); // *still* just the first one 6183 6184 // end the big token. 6185 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 6186 if (status != XML_STATUS_OK) { 6187 xml_failure(parser); 6188 } 6189 6190 if (enabled) { 6191 // In general, we may need to push more data to trigger a reparse attempt, 6192 // but in this test, the data is constructed to always require it. 6193 assert_true(testdata.count == 1); // or the test is incorrect 6194 // 2x the token length should suffice; the +1 covers the start and end. 6195 for (int c = 0; c < 101; ++c) { 6196 status = XML_Parse(parser, spaces, fillsize, XML_FALSE); 6197 if (status != XML_STATUS_OK) { 6198 xml_failure(parser); 6199 } 6200 } 6201 } 6202 assert_true(testdata.count == 2); // the big token should be done 6203 6204 // parse the final text 6205 status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE); 6206 if (status != XML_STATUS_OK) { 6207 xml_failure(parser); 6208 } 6209 assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done 6210 6211 XML_ParserFree(parser); 6212 return XML_STATUS_OK; 6213 } 6214 6215 START_TEST(test_reparse_deferral_is_inherited) { 6216 const char *const text 6217 = "<!DOCTYPE document SYSTEM 'something.ext'><document/>"; 6218 for (int enabled = 0; enabled <= 1; ++enabled) { 6219 set_subtest("deferral=%d", enabled); 6220 6221 XML_Parser parser = XML_ParserCreate(NULL); 6222 assert_true(parser != NULL); 6223 XML_SetUserData(parser, (void *)&enabled); 6224 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 6225 // this handler creates a sub-parser and checks that its deferral behavior 6226 // is what we expected, based on the value of `enabled` (in userdata). 6227 XML_SetExternalEntityRefHandler(parser, external_inherited_parser); 6228 assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); 6229 if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) 6230 xml_failure(parser); 6231 6232 XML_ParserFree(parser); 6233 } 6234 } 6235 END_TEST 6236 6237 START_TEST(test_set_reparse_deferral_on_null_parser) { 6238 assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE); 6239 assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE); 6240 assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE); 6241 assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE); 6242 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN) 6243 == XML_FALSE); 6244 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX) 6245 == XML_FALSE); 6246 } 6247 END_TEST 6248 6249 START_TEST(test_set_reparse_deferral_on_the_fly) { 6250 const char *const pre = "<d><x attr='"; 6251 const char *const end = "'></x>"; 6252 char iiiiii[100]; 6253 const int fillsize = (int)sizeof(iiiiii); 6254 memset(iiiiii, 'i', fillsize); 6255 6256 XML_Parser parser = XML_ParserCreate(NULL); 6257 assert_true(parser != NULL); 6258 assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE)); 6259 6260 CharData storage; 6261 CharData_Init(&storage); 6262 XML_SetUserData(parser, &storage); 6263 XML_SetStartElementHandler(parser, start_element_event_handler); 6264 6265 enum XML_Status status; 6266 // parse the start text 6267 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 6268 if (status != XML_STATUS_OK) { 6269 xml_failure(parser); 6270 } 6271 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done 6272 6273 // try to parse some 'i', but the token isn't finished 6274 status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE); 6275 if (status != XML_STATUS_OK) { 6276 xml_failure(parser); 6277 } 6278 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one 6279 6280 // end the <x> token. 6281 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 6282 if (status != XML_STATUS_OK) { 6283 xml_failure(parser); 6284 } 6285 CharData_CheckXMLChars(&storage, XCS("d")); // not yet. 6286 6287 // now change the heuristic setting and add *no* data 6288 assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE)); 6289 // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic. 6290 status = XML_Parse(parser, "", 0, XML_FALSE); 6291 if (status != XML_STATUS_OK) { 6292 xml_failure(parser); 6293 } 6294 CharData_CheckXMLChars(&storage, XCS("dx")); 6295 6296 XML_ParserFree(parser); 6297 } 6298 END_TEST 6299 6300 START_TEST(test_set_bad_reparse_option) { 6301 XML_Parser parser = XML_ParserCreate(NULL); 6302 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2)); 6303 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3)); 6304 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99)); 6305 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127)); 6306 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128)); 6307 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129)); 6308 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255)); 6309 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0)); 6310 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1)); 6311 XML_ParserFree(parser); 6312 } 6313 END_TEST 6314 6315 static size_t g_totalAlloc = 0; 6316 static size_t g_biggestAlloc = 0; 6317 6318 static void * 6319 counting_realloc(void *ptr, size_t size) { 6320 g_totalAlloc += size; 6321 if (size > g_biggestAlloc) { 6322 g_biggestAlloc = size; 6323 } 6324 return realloc(ptr, size); 6325 } 6326 6327 static void * 6328 counting_malloc(size_t size) { 6329 return counting_realloc(NULL, size); 6330 } 6331 6332 START_TEST(test_bypass_heuristic_when_close_to_bufsize) { 6333 if (g_chunkSize != 0) { 6334 // this test does not use SINGLE_BYTES, because it depends on very precise 6335 // buffer fills. 6336 return; 6337 } 6338 if (! g_reparseDeferralEnabledDefault) { 6339 return; // this test is irrelevant when the deferral heuristic is disabled. 6340 } 6341 6342 const int document_length = 65536; 6343 char *const document = malloc(document_length); 6344 assert_true(document != NULL); 6345 6346 const XML_Memory_Handling_Suite memfuncs = { 6347 counting_malloc, 6348 counting_realloc, 6349 free, 6350 }; 6351 6352 const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1}; 6353 const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1}; 6354 const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1}; 6355 6356 for (const int *leading = leading_list; *leading >= 0; leading++) { 6357 for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) { 6358 for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) { 6359 set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken, 6360 *fillsize); 6361 // start by checking that the test looks reasonably valid 6362 assert_true(*leading + *bigtoken <= document_length); 6363 6364 // put 'x' everywhere; some will be overwritten by elements. 6365 memset(document, 'x', document_length); 6366 // maybe add an initial tag 6367 if (*leading) { 6368 assert_true(*leading >= 3); // or the test case is invalid 6369 memcpy(document, "<a>", 3); 6370 } 6371 // add the large token 6372 document[*leading + 0] = '<'; 6373 document[*leading + 1] = 'b'; 6374 memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token 6375 document[*leading + *bigtoken - 1] = '>'; 6376 6377 // 1 for 'b', plus 1 or 0 depending on the presence of 'a' 6378 const int expected_elem_total = 1 + (*leading ? 1 : 0); 6379 6380 XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL); 6381 assert_true(parser != NULL); 6382 6383 CharData storage; 6384 CharData_Init(&storage); 6385 XML_SetUserData(parser, &storage); 6386 XML_SetStartElementHandler(parser, start_element_event_handler); 6387 6388 g_biggestAlloc = 0; 6389 g_totalAlloc = 0; 6390 int offset = 0; 6391 // fill data until the big token is covered (but not necessarily parsed) 6392 while (offset < *leading + *bigtoken) { 6393 assert_true(offset + *fillsize <= document_length); 6394 const enum XML_Status status 6395 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 6396 if (status != XML_STATUS_OK) { 6397 xml_failure(parser); 6398 } 6399 offset += *fillsize; 6400 } 6401 // Now, check that we've had a buffer allocation that could fit the 6402 // context bytes and our big token. In order to detect a special case, 6403 // we need to know how many bytes of our big token were included in the 6404 // first push that contained _any_ bytes of the big token: 6405 const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize); 6406 if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) { 6407 // Special case: we aren't saving any context, and the whole big token 6408 // was covered by a single fill, so Expat may have parsed directly 6409 // from our input pointer, without allocating an internal buffer. 6410 } else if (*leading < XML_CONTEXT_BYTES) { 6411 assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken); 6412 } else { 6413 assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken); 6414 } 6415 // fill data until the big token is actually parsed 6416 while (storage.count < expected_elem_total) { 6417 const size_t alloc_before = g_totalAlloc; 6418 assert_true(offset + *fillsize <= document_length); 6419 const enum XML_Status status 6420 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 6421 if (status != XML_STATUS_OK) { 6422 xml_failure(parser); 6423 } 6424 offset += *fillsize; 6425 // since all the bytes of the big token are already in the buffer, 6426 // the bufsize ceiling should make us finish its parsing without any 6427 // further buffer allocations. We assume that there will be no other 6428 // large allocations in this test. 6429 assert_true(g_totalAlloc - alloc_before < 4096); 6430 } 6431 // test-the-test: was our alloc even called? 6432 assert_true(g_totalAlloc > 0); 6433 // test-the-test: there shouldn't be any extra start elements 6434 assert_true(storage.count == expected_elem_total); 6435 6436 XML_ParserFree(parser); 6437 } 6438 } 6439 } 6440 free(document); 6441 } 6442 END_TEST 6443 6444 START_TEST(test_varying_buffer_fills) { 6445 const int KiB = 1024; 6446 const int MiB = 1024 * KiB; 6447 const int document_length = 16 * MiB; 6448 const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB 6449 6450 if (g_chunkSize != 0) { 6451 return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES(). 6452 } 6453 6454 char *const document = malloc(document_length); 6455 assert_true(document != NULL); 6456 memset(document, 'x', document_length); 6457 document[0] = '<'; 6458 document[1] = 't'; 6459 memset(&document[2], ' ', big - 2); // a very spacy token 6460 document[big - 1] = '>'; 6461 6462 // Each testcase is a list of buffer fill sizes, terminated by a value < 0. 6463 // When reparse deferral is enabled, the final (negated) value is the expected 6464 // maximum number of bytes scanned in parse attempts. 6465 const int testcases[][30] = { 6466 {8 * MiB, -8 * MiB}, 6467 {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total 6468 // zero-size fills shouldn't trigger the bypass 6469 {4 * MiB, 0, 4 * MiB, -12 * MiB}, 6470 {4 * MiB, 0, 0, 4 * MiB, -12 * MiB}, 6471 {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB}, 6472 // try to hit the buffer ceiling only once (at the end) 6473 {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB}, 6474 // try to hit the same buffer ceiling multiple times 6475 {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB}, 6476 6477 // try to hit every ceiling, by always landing 1K shy of the buffer size 6478 {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB, 6479 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB}, 6480 6481 // try to avoid every ceiling, by always landing 1B past the buffer size 6482 // the normal 2x heuristic threshold still forces parse attempts. 6483 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 6484 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 6485 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 6486 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 6487 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 6488 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 6489 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7 6490 -(10 * MiB + 682 * KiB + 7)}, 6491 // try to avoid every ceiling again, except on our last fill. 6492 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 6493 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 6494 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 6495 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 6496 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 6497 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 6498 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6 6499 -(10 * MiB + 682 * KiB + 6)}, 6500 6501 // try to hit ceilings on the way multiple times 6502 {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer 6503 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer 6504 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer 6505 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer 6506 // we'll make a parse attempt at every parse call 6507 -(45 * MiB + 12)}, 6508 }; 6509 const int testcount = sizeof(testcases) / sizeof(testcases[0]); 6510 for (int test_i = 0; test_i < testcount; test_i++) { 6511 const int *fillsize = testcases[test_i]; 6512 set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1], 6513 fillsize[2], fillsize[3]); 6514 XML_Parser parser = XML_ParserCreate(NULL); 6515 assert_true(parser != NULL); 6516 6517 CharData storage; 6518 CharData_Init(&storage); 6519 XML_SetUserData(parser, &storage); 6520 XML_SetStartElementHandler(parser, start_element_event_handler); 6521 6522 g_bytesScanned = 0; 6523 int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call) 6524 int offset = 0; 6525 while (*fillsize >= 0) { 6526 assert_true(offset + *fillsize <= document_length); // or test is invalid 6527 const enum XML_Status status 6528 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 6529 if (status != XML_STATUS_OK) { 6530 xml_failure(parser); 6531 } 6532 offset += *fillsize; 6533 fillsize++; 6534 assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow 6535 worstcase_bytes += offset; // we might've tried to parse all pending bytes 6536 } 6537 assert_true(storage.count == 1); // the big token should've been parsed 6538 assert_true(g_bytesScanned > 0); // test-the-test: does our counter work? 6539 if (g_reparseDeferralEnabledDefault) { 6540 // heuristic is enabled; some XML_Parse calls may have deferred reparsing 6541 const unsigned max_bytes_scanned = -*fillsize; 6542 if (g_bytesScanned > max_bytes_scanned) { 6543 fprintf(stderr, 6544 "bytes scanned in parse attempts: actual=%u limit=%u \n", 6545 g_bytesScanned, max_bytes_scanned); 6546 fail("too many bytes scanned in parse attempts"); 6547 } 6548 } 6549 assert_true(g_bytesScanned <= (unsigned)worstcase_bytes); 6550 6551 XML_ParserFree(parser); 6552 } 6553 free(document); 6554 } 6555 END_TEST 6556 6557 START_TEST(test_empty_ext_param_entity_in_value) { 6558 const char *text = "<!DOCTYPE r SYSTEM \"ext.dtd\"><r/>"; 6559 ExtOption options[] = { 6560 {XCS("ext.dtd"), "<!ENTITY % pe SYSTEM \"empty\">" 6561 "<!ENTITY ge \"%pe;\">"}, 6562 {XCS("empty"), ""}, 6563 {NULL, NULL}, 6564 }; 6565 6566 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 6567 XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner); 6568 XML_SetUserData(g_parser, options); 6569 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 6570 == XML_STATUS_ERROR) 6571 xml_failure(g_parser); 6572 } 6573 END_TEST 6574 6575 void 6576 make_basic_test_case(Suite *s) { 6577 TCase *tc_basic = tcase_create("basic tests"); 6578 6579 suite_add_tcase(s, tc_basic); 6580 tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown); 6581 6582 tcase_add_test(tc_basic, test_nul_byte); 6583 tcase_add_test(tc_basic, test_u0000_char); 6584 tcase_add_test(tc_basic, test_siphash_self); 6585 tcase_add_test(tc_basic, test_siphash_spec); 6586 tcase_add_test(tc_basic, test_bom_utf8); 6587 tcase_add_test(tc_basic, test_bom_utf16_be); 6588 tcase_add_test(tc_basic, test_bom_utf16_le); 6589 tcase_add_test(tc_basic, test_nobom_utf16_le); 6590 tcase_add_test(tc_basic, test_hash_collision); 6591 tcase_add_test(tc_basic, test_hash_salt_setter); 6592 tcase_add_test(tc_basic, test_illegal_utf8); 6593 tcase_add_test(tc_basic, test_utf8_auto_align); 6594 tcase_add_test(tc_basic, test_utf16); 6595 tcase_add_test(tc_basic, test_utf16_le_epilog_newline); 6596 tcase_add_test(tc_basic, test_not_utf16); 6597 tcase_add_test(tc_basic, test_bad_encoding); 6598 tcase_add_test(tc_basic, test_latin1_umlauts); 6599 tcase_add_test(tc_basic, test_long_utf8_character); 6600 tcase_add_test(tc_basic, test_long_latin1_attribute); 6601 tcase_add_test(tc_basic, test_long_ascii_attribute); 6602 /* Regression test for SF bug #491986. */ 6603 tcase_add_test(tc_basic, test_danish_latin1); 6604 /* Regression test for SF bug #514281. */ 6605 tcase_add_test(tc_basic, test_french_charref_hexidecimal); 6606 tcase_add_test(tc_basic, test_french_charref_decimal); 6607 tcase_add_test(tc_basic, test_french_latin1); 6608 tcase_add_test(tc_basic, test_french_utf8); 6609 tcase_add_test(tc_basic, test_utf8_false_rejection); 6610 tcase_add_test(tc_basic, test_line_number_after_parse); 6611 tcase_add_test(tc_basic, test_column_number_after_parse); 6612 tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers); 6613 tcase_add_test(tc_basic, test_line_number_after_error); 6614 tcase_add_test(tc_basic, test_column_number_after_error); 6615 tcase_add_test(tc_basic, test_really_long_lines); 6616 tcase_add_test(tc_basic, test_really_long_encoded_lines); 6617 tcase_add_test(tc_basic, test_end_element_events); 6618 tcase_add_test(tc_basic, test_helper_is_whitespace_normalized); 6619 tcase_add_test(tc_basic, test_attr_whitespace_normalization); 6620 tcase_add_test(tc_basic, test_xmldecl_misplaced); 6621 tcase_add_test(tc_basic, test_xmldecl_invalid); 6622 tcase_add_test(tc_basic, test_xmldecl_missing_attr); 6623 tcase_add_test(tc_basic, test_xmldecl_missing_value); 6624 tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity); 6625 tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity); 6626 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding); 6627 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler); 6628 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom); 6629 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding); 6630 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2); 6631 tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset); 6632 tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset); 6633 tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone); 6634 tcase_add_test(tc_basic, 6635 test_wfc_undeclared_entity_with_external_subset_standalone); 6636 tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone); 6637 tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset); 6638 tcase_add_test(tc_basic, test_not_standalone_handler_reject); 6639 tcase_add_test(tc_basic, test_not_standalone_handler_accept); 6640 tcase_add_test(tc_basic, test_entity_start_tag_level_greater_than_one); 6641 tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs); 6642 tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs); 6643 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse); 6644 tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling); 6645 tcase_add_test(tc_basic, test_dtd_attr_handling); 6646 tcase_add_test(tc_basic, test_empty_ns_without_namespaces); 6647 tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces); 6648 tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls); 6649 tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls); 6650 tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls); 6651 tcase_add_test(tc_basic, test_good_cdata_ascii); 6652 tcase_add_test(tc_basic, test_good_cdata_utf16); 6653 tcase_add_test(tc_basic, test_good_cdata_utf16_le); 6654 tcase_add_test(tc_basic, test_long_cdata_utf16); 6655 tcase_add_test(tc_basic, test_multichar_cdata_utf16); 6656 tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair); 6657 tcase_add_test(tc_basic, test_bad_cdata); 6658 tcase_add_test(tc_basic, test_bad_cdata_utf16); 6659 tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls); 6660 tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls); 6661 tcase_add_test(tc_basic, test_memory_allocation); 6662 tcase_add_test__if_xml_ge(tc_basic, test_default_current); 6663 tcase_add_test(tc_basic, test_dtd_elements); 6664 tcase_add_test(tc_basic, test_dtd_elements_nesting); 6665 tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd); 6666 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone); 6667 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd); 6668 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype); 6669 tcase_add_test__ifdef_xml_dtd(tc_basic, 6670 test_foreign_dtd_without_external_subset); 6671 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd); 6672 tcase_add_test(tc_basic, test_set_base); 6673 tcase_add_test(tc_basic, test_attributes); 6674 tcase_add_test(tc_basic, test_duplicate_cdata_attribute); 6675 tcase_add_test(tc_basic, test_duplicate_id_attribute_1); 6676 tcase_add_test(tc_basic, test_duplicate_id_attribute_2); 6677 tcase_add_test(tc_basic, test_duplicate_cdata_attribute_multiple_attlistdecl); 6678 tcase_add_test(tc_basic, 6679 test_duplicate_cdata_attribute_multiple_attlistdecl_2); 6680 tcase_add_test(tc_basic, 6681 test_duplicate_cdata_attribute_multiple_attlistdecl_3); 6682 tcase_add_test(tc_basic, test_duplicate_id_attribute_multiple_attlistdecl); 6683 tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity); 6684 tcase_add_test(tc_basic, test_resume_invalid_parse); 6685 tcase_add_test(tc_basic, test_resume_resuspended); 6686 tcase_add_test(tc_basic, test_cdata_default); 6687 tcase_add_test(tc_basic, test_subordinate_reset); 6688 tcase_add_test(tc_basic, test_subordinate_suspend); 6689 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend); 6690 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort); 6691 tcase_add_test__ifdef_xml_dtd(tc_basic, 6692 test_ext_entity_invalid_suspended_parse); 6693 tcase_add_test(tc_basic, test_explicit_encoding); 6694 tcase_add_test(tc_basic, test_trailing_cr); 6695 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr); 6696 tcase_add_test(tc_basic, test_trailing_rsqb); 6697 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb); 6698 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata); 6699 tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters); 6700 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter); 6701 tcase_add_test(tc_basic, test_empty_parse); 6702 tcase_add_test(tc_basic, test_negative_len_parse); 6703 tcase_add_test(tc_basic, test_negative_len_parse_buffer); 6704 tcase_add_test(tc_basic, test_get_buffer_1); 6705 tcase_add_test(tc_basic, test_get_buffer_2); 6706 #if XML_CONTEXT_BYTES > 0 6707 tcase_add_test(tc_basic, test_get_buffer_3_overflow); 6708 #endif 6709 tcase_add_test(tc_basic, test_buffer_can_grow_to_max); 6710 tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len); 6711 tcase_add_test(tc_basic, test_byte_info_at_end); 6712 tcase_add_test(tc_basic, test_byte_info_at_error); 6713 tcase_add_test(tc_basic, test_byte_info_at_cdata); 6714 tcase_add_test(tc_basic, test_predefined_entities); 6715 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd); 6716 tcase_add_test(tc_basic, test_not_predefined_entities); 6717 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section); 6718 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16); 6719 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be); 6720 tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section); 6721 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed); 6722 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values); 6723 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone); 6724 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort); 6725 tcase_add_test(tc_basic, test_bad_public_doctype); 6726 tcase_add_test(tc_basic, test_attribute_enum_value); 6727 tcase_add_test(tc_basic, test_predefined_entity_redefinition); 6728 tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing); 6729 tcase_add_test(tc_basic, test_public_notation_no_sysid); 6730 tcase_add_test(tc_basic, test_nested_groups); 6731 tcase_add_test(tc_basic, test_group_choice); 6732 tcase_add_test(tc_basic, test_standalone_parameter_entity); 6733 tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity); 6734 tcase_add_test__ifdef_xml_dtd(tc_basic, 6735 test_recursive_external_parameter_entity); 6736 tcase_add_test__ifdef_xml_dtd(tc_basic, 6737 test_recursive_external_parameter_entity_2); 6738 tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd); 6739 tcase_add_test(tc_basic, test_suspend_xdecl); 6740 tcase_add_test(tc_basic, test_abort_epilog); 6741 tcase_add_test(tc_basic, test_abort_epilog_2); 6742 tcase_add_test(tc_basic, test_suspend_epilog); 6743 tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag); 6744 tcase_add_test(tc_basic, test_unfinished_epilog); 6745 tcase_add_test(tc_basic, test_partial_char_in_epilog); 6746 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity); 6747 tcase_add_test__ifdef_xml_dtd(tc_basic, 6748 test_suspend_resume_internal_entity_issue_629); 6749 tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error); 6750 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity); 6751 tcase_add_test(tc_basic, test_restart_on_error); 6752 tcase_add_test(tc_basic, test_reject_lt_in_attribute_value); 6753 tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value); 6754 tcase_add_test(tc_basic, test_trailing_cr_in_att_value); 6755 tcase_add_test(tc_basic, test_standalone_internal_entity); 6756 tcase_add_test(tc_basic, test_skipped_external_entity); 6757 tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity); 6758 tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity); 6759 tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr); 6760 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity); 6761 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2); 6762 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3); 6763 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4); 6764 tcase_add_test(tc_basic, test_pi_handled_in_default); 6765 tcase_add_test(tc_basic, test_comment_handled_in_default); 6766 tcase_add_test(tc_basic, test_pi_yml); 6767 tcase_add_test(tc_basic, test_pi_xnl); 6768 tcase_add_test(tc_basic, test_pi_xmm); 6769 tcase_add_test(tc_basic, test_utf16_pi); 6770 tcase_add_test(tc_basic, test_utf16_be_pi); 6771 tcase_add_test(tc_basic, test_utf16_be_comment); 6772 tcase_add_test(tc_basic, test_utf16_le_comment); 6773 tcase_add_test(tc_basic, test_missing_encoding_conversion_fn); 6774 tcase_add_test(tc_basic, test_failing_encoding_conversion_fn); 6775 tcase_add_test(tc_basic, test_unknown_encoding_success); 6776 tcase_add_test(tc_basic, test_unknown_encoding_bad_name); 6777 tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2); 6778 tcase_add_test(tc_basic, test_unknown_encoding_long_name_1); 6779 tcase_add_test(tc_basic, test_unknown_encoding_long_name_2); 6780 tcase_add_test(tc_basic, test_invalid_unknown_encoding); 6781 tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok); 6782 tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail); 6783 tcase_add_test(tc_basic, test_unknown_encoding_invalid_length); 6784 tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit); 6785 tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate); 6786 tcase_add_test(tc_basic, test_unknown_encoding_invalid_high); 6787 tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value); 6788 tcase_add_test(tc_basic, test_unknown_encoding_user_data_primary); 6789 tcase_add_test(tc_basic, test_unknown_encoding_user_data_secondary); 6790 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom); 6791 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom); 6792 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2); 6793 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2); 6794 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be); 6795 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le); 6796 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown); 6797 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom); 6798 tcase_add_test(tc_basic, test_utf8_in_cdata_section); 6799 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2); 6800 tcase_add_test(tc_basic, test_utf8_in_start_tags); 6801 tcase_add_test(tc_basic, test_trailing_spaces_in_elements); 6802 tcase_add_test(tc_basic, test_utf16_attribute); 6803 tcase_add_test(tc_basic, test_utf16_second_attr); 6804 tcase_add_test(tc_basic, test_attr_after_solidus); 6805 tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe); 6806 tcase_add_test(tc_basic, test_bad_attr_desc_keyword); 6807 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16); 6808 tcase_add_test(tc_basic, test_bad_doctype); 6809 tcase_add_test(tc_basic, test_bad_doctype_utf8); 6810 tcase_add_test(tc_basic, test_bad_doctype_utf16); 6811 tcase_add_test(tc_basic, test_bad_doctype_plus); 6812 tcase_add_test(tc_basic, test_bad_doctype_star); 6813 tcase_add_test(tc_basic, test_bad_doctype_query); 6814 tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore); 6815 tcase_add_test(tc_basic, test_entity_in_utf16_be_attr); 6816 tcase_add_test(tc_basic, test_entity_in_utf16_le_attr); 6817 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be); 6818 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le); 6819 tcase_add_test(tc_basic, test_short_doctype); 6820 tcase_add_test(tc_basic, test_short_doctype_2); 6821 tcase_add_test(tc_basic, test_short_doctype_3); 6822 tcase_add_test(tc_basic, test_long_doctype); 6823 tcase_add_test(tc_basic, test_bad_entity); 6824 tcase_add_test(tc_basic, test_bad_entity_2); 6825 tcase_add_test(tc_basic, test_bad_entity_3); 6826 tcase_add_test(tc_basic, test_bad_entity_4); 6827 tcase_add_test(tc_basic, test_bad_notation); 6828 tcase_add_test(tc_basic, test_default_doctype_handler); 6829 tcase_add_test(tc_basic, test_empty_element_abort); 6830 tcase_add_test__ifdef_xml_dtd(tc_basic, 6831 test_pool_integrity_with_unfinished_attr); 6832 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_ext_param_entity_in_value); 6833 tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements); 6834 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity); 6835 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity); 6836 tcase_add_test__if_xml_ge(tc_basic, 6837 test_deep_nested_entity_delayed_interpretation); 6838 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend); 6839 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2); 6840 tcase_add_test(tc_basic, test_big_tokens_scale_linearly); 6841 tcase_add_test(tc_basic, test_set_reparse_deferral); 6842 tcase_add_test(tc_basic, test_reparse_deferral_is_inherited); 6843 tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser); 6844 tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly); 6845 tcase_add_test(tc_basic, test_set_bad_reparse_option); 6846 tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize); 6847 tcase_add_test(tc_basic, test_varying_buffer_fills); 6848 } 6849