1 /* Tests in the "basic" test case for the Expat test suite 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> 11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> 13 Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> 14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 15 Copyright (c) 2017 Joe Orton <jorton@redhat.com> 16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> 17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com> 20 Copyright (c) 2021 Donghee Na <donghee.na@python.org> 21 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com> 22 Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com> 23 Licensed under the MIT license: 24 25 Permission is hereby granted, free of charge, to any person obtaining 26 a copy of this software and associated documentation files (the 27 "Software"), to deal in the Software without restriction, including 28 without limitation the rights to use, copy, modify, merge, publish, 29 distribute, sublicense, and/or sell copies of the Software, and to permit 30 persons to whom the Software is furnished to do so, subject to the 31 following conditions: 32 33 The above copyright notice and this permission notice shall be included 34 in all copies or substantial portions of the Software. 35 36 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 37 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 38 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 39 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 40 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 41 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 42 USE OR OTHER DEALINGS IN THE SOFTWARE. 43 */ 44 45 #if defined(NDEBUG) 46 # undef NDEBUG /* because test suite relies on assert(...) at the moment */ 47 #endif 48 49 #include <assert.h> 50 51 #include <stdio.h> 52 #include <string.h> 53 #include <time.h> 54 55 #if ! defined(__cplusplus) 56 # include <stdbool.h> 57 #endif 58 59 #include "expat_config.h" 60 61 #include "expat.h" 62 #include "internal.h" 63 #include "minicheck.h" 64 #include "structdata.h" 65 #include "common.h" 66 #include "dummy.h" 67 #include "handlers.h" 68 #include "siphash.h" 69 #include "basic_tests.h" 70 71 static void 72 basic_setup(void) { 73 g_parser = XML_ParserCreate(NULL); 74 if (g_parser == NULL) 75 fail("Parser not created."); 76 } 77 78 /* 79 * Character & encoding tests. 80 */ 81 82 START_TEST(test_nul_byte) { 83 char text[] = "<doc>\0</doc>"; 84 85 /* test that a NUL byte (in US-ASCII data) is an error */ 86 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 87 == XML_STATUS_OK) 88 fail("Parser did not report error on NUL-byte."); 89 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 90 xml_failure(g_parser); 91 } 92 END_TEST 93 94 START_TEST(test_u0000_char) { 95 /* test that a NUL byte (in US-ASCII data) is an error */ 96 expect_failure("<doc>�</doc>", XML_ERROR_BAD_CHAR_REF, 97 "Parser did not report error on NUL-byte."); 98 } 99 END_TEST 100 101 START_TEST(test_siphash_self) { 102 if (! sip24_valid()) 103 fail("SipHash self-test failed"); 104 } 105 END_TEST 106 107 START_TEST(test_siphash_spec) { 108 /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */ 109 const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" 110 "\x0a\x0b\x0c\x0d\x0e"; 111 const size_t len = sizeof(message) - 1; 112 const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U); 113 struct siphash state; 114 struct sipkey key; 115 116 sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" 117 "\x0a\x0b\x0c\x0d\x0e\x0f"); 118 sip24_init(&state, &key); 119 120 /* Cover spread across calls */ 121 sip24_update(&state, message, 4); 122 sip24_update(&state, message + 4, len - 4); 123 124 /* Cover null length */ 125 sip24_update(&state, message, 0); 126 127 if (sip24_final(&state) != expected) 128 fail("sip24_final failed spec test\n"); 129 130 /* Cover wrapper */ 131 if (siphash24(message, len, &key) != expected) 132 fail("siphash24 failed spec test\n"); 133 } 134 END_TEST 135 136 START_TEST(test_bom_utf8) { 137 /* This test is really just making sure we don't core on a UTF-8 BOM. */ 138 const char *text = "\357\273\277<e/>"; 139 140 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 141 == XML_STATUS_ERROR) 142 xml_failure(g_parser); 143 } 144 END_TEST 145 146 START_TEST(test_bom_utf16_be) { 147 char text[] = "\376\377\0<\0e\0/\0>"; 148 149 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 150 == XML_STATUS_ERROR) 151 xml_failure(g_parser); 152 } 153 END_TEST 154 155 START_TEST(test_bom_utf16_le) { 156 char text[] = "\377\376<\0e\0/\0>\0"; 157 158 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 159 == XML_STATUS_ERROR) 160 xml_failure(g_parser); 161 } 162 END_TEST 163 164 START_TEST(test_nobom_utf16_le) { 165 char text[] = " \0<\0e\0/\0>\0"; 166 167 if (g_chunkSize == 1) { 168 // TODO: with just the first byte, we can't tell the difference between 169 // UTF-16-LE and UTF-8. Avoid the failure for now. 170 return; 171 } 172 173 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 174 == XML_STATUS_ERROR) 175 xml_failure(g_parser); 176 } 177 END_TEST 178 179 START_TEST(test_hash_collision) { 180 /* For full coverage of the lookup routine, we need to ensure a 181 * hash collision even though we can only tell that we have one 182 * through breakpoint debugging or coverage statistics. The 183 * following will cause a hash collision on machines with a 64-bit 184 * long type; others will have to experiment. The full coverage 185 * tests invoked from qa.sh usually provide a hash collision, but 186 * not always. This is an attempt to provide insurance. 187 */ 188 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U) 189 const char *text 190 = "<doc>\n" 191 "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n" 192 "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n" 193 "<b5></b5><b6></b6><b7></b7><b8></b8>\n" 194 "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n" 195 "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n" 196 "<d8>This triggers the table growth and collides with b2</d8>\n" 197 "</doc>\n"; 198 199 XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT); 200 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 201 == XML_STATUS_ERROR) 202 xml_failure(g_parser); 203 } 204 END_TEST 205 #undef COLLIDING_HASH_SALT 206 207 /* Regression test for SF bug #491986. */ 208 START_TEST(test_danish_latin1) { 209 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 210 "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>"; 211 #ifdef XML_UNICODE 212 const XML_Char *expected 213 = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5"); 214 #else 215 const XML_Char *expected 216 = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85"); 217 #endif 218 run_character_check(text, expected); 219 } 220 END_TEST 221 222 /* Regression test for SF bug #514281. */ 223 START_TEST(test_french_charref_hexidecimal) { 224 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 225 "<doc>éèàçêÈ</doc>"; 226 #ifdef XML_UNICODE 227 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 228 #else 229 const XML_Char *expected 230 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 231 #endif 232 run_character_check(text, expected); 233 } 234 END_TEST 235 236 START_TEST(test_french_charref_decimal) { 237 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 238 "<doc>éèàçêÈ</doc>"; 239 #ifdef XML_UNICODE 240 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 241 #else 242 const XML_Char *expected 243 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 244 #endif 245 run_character_check(text, expected); 246 } 247 END_TEST 248 249 START_TEST(test_french_latin1) { 250 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 251 "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>"; 252 #ifdef XML_UNICODE 253 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 254 #else 255 const XML_Char *expected 256 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 257 #endif 258 run_character_check(text, expected); 259 } 260 END_TEST 261 262 START_TEST(test_french_utf8) { 263 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n" 264 "<doc>\xC3\xA9</doc>"; 265 #ifdef XML_UNICODE 266 const XML_Char *expected = XCS("\x00e9"); 267 #else 268 const XML_Char *expected = XCS("\xC3\xA9"); 269 #endif 270 run_character_check(text, expected); 271 } 272 END_TEST 273 274 /* Regression test for SF bug #600479. 275 XXX There should be a test that exercises all legal XML Unicode 276 characters as PCDATA and attribute value content, and XML Name 277 characters as part of element and attribute names. 278 */ 279 START_TEST(test_utf8_false_rejection) { 280 const char *text = "<doc>\xEF\xBA\xBF</doc>"; 281 #ifdef XML_UNICODE 282 const XML_Char *expected = XCS("\xfebf"); 283 #else 284 const XML_Char *expected = XCS("\xEF\xBA\xBF"); 285 #endif 286 run_character_check(text, expected); 287 } 288 END_TEST 289 290 /* Regression test for SF bug #477667. 291 This test assures that any 8-bit character followed by a 7-bit 292 character will not be mistakenly interpreted as a valid UTF-8 293 sequence. 294 */ 295 START_TEST(test_illegal_utf8) { 296 char text[100]; 297 int i; 298 299 for (i = 128; i <= 255; ++i) { 300 snprintf(text, sizeof(text), "<e>%ccd</e>", i); 301 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 302 == XML_STATUS_OK) { 303 snprintf(text, sizeof(text), 304 "expected token error for '%c' (ordinal %d) in UTF-8 text", i, 305 i); 306 fail(text); 307 } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 308 xml_failure(g_parser); 309 /* Reset the parser since we use the same parser repeatedly. */ 310 XML_ParserReset(g_parser, NULL); 311 } 312 } 313 END_TEST 314 315 /* Examples, not masks: */ 316 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */ 317 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */ 318 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */ 319 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */ 320 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */ 321 322 START_TEST(test_utf8_auto_align) { 323 struct TestCase { 324 ptrdiff_t expectedMovementInChars; 325 const char *input; 326 }; 327 328 struct TestCase cases[] = { 329 {00, ""}, 330 331 {00, UTF8_LEAD_1}, 332 333 {-1, UTF8_LEAD_2}, 334 {00, UTF8_LEAD_2 UTF8_FOLLOW}, 335 336 {-1, UTF8_LEAD_3}, 337 {-2, UTF8_LEAD_3 UTF8_FOLLOW}, 338 {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW}, 339 340 {-1, UTF8_LEAD_4}, 341 {-2, UTF8_LEAD_4 UTF8_FOLLOW}, 342 {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW}, 343 {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW}, 344 }; 345 346 size_t i = 0; 347 bool success = true; 348 for (; i < sizeof(cases) / sizeof(*cases); i++) { 349 const char *fromLim = cases[i].input + strlen(cases[i].input); 350 const char *const fromLimInitially = fromLim; 351 ptrdiff_t actualMovementInChars; 352 353 _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim); 354 355 actualMovementInChars = (fromLim - fromLimInitially); 356 if (actualMovementInChars != cases[i].expectedMovementInChars) { 357 size_t j = 0; 358 success = false; 359 printf("[-] UTF-8 case %2u: Expected movement by %2d chars" 360 ", actually moved by %2d chars: \"", 361 (unsigned)(i + 1), (int)cases[i].expectedMovementInChars, 362 (int)actualMovementInChars); 363 for (; j < strlen(cases[i].input); j++) { 364 printf("\\x%02x", (unsigned char)cases[i].input[j]); 365 } 366 printf("\"\n"); 367 } 368 } 369 370 if (! success) { 371 fail("UTF-8 auto-alignment is not bullet-proof\n"); 372 } 373 } 374 END_TEST 375 376 START_TEST(test_utf16) { 377 /* <?xml version="1.0" encoding="UTF-16"?> 378 * <doc a='123'>some {A} text</doc> 379 * 380 * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A 381 */ 382 char text[] 383 = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o" 384 "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o" 385 "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066" 386 "\000'\000?\000>\000\n" 387 "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>" 388 "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000" 389 "<\000/\000d\000o\000c\000>"; 390 #ifdef XML_UNICODE 391 const XML_Char *expected = XCS("some \xff21 text"); 392 #else 393 const XML_Char *expected = XCS("some \357\274\241 text"); 394 #endif 395 CharData storage; 396 397 CharData_Init(&storage); 398 XML_SetUserData(g_parser, &storage); 399 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 400 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 401 == XML_STATUS_ERROR) 402 xml_failure(g_parser); 403 CharData_CheckXMLChars(&storage, expected); 404 } 405 END_TEST 406 407 START_TEST(test_utf16_le_epilog_newline) { 408 unsigned int first_chunk_bytes = 17; 409 char text[] = "\xFF\xFE" /* BOM */ 410 "<\000e\000/\000>\000" /* document element */ 411 "\r\000\n\000\r\000\n\000"; /* epilog */ 412 413 if (first_chunk_bytes >= sizeof(text) - 1) 414 fail("bad value of first_chunk_bytes"); 415 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)first_chunk_bytes, XML_FALSE) 416 == XML_STATUS_ERROR) 417 xml_failure(g_parser); 418 else { 419 enum XML_Status rc; 420 rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes, 421 (int)(sizeof(text) - first_chunk_bytes - 1), 422 XML_TRUE); 423 if (rc == XML_STATUS_ERROR) 424 xml_failure(g_parser); 425 } 426 } 427 END_TEST 428 429 /* Test that an outright lie in the encoding is faulted */ 430 START_TEST(test_not_utf16) { 431 const char *text = "<?xml version='1.0' encoding='utf-16'?>" 432 "<doc>Hi</doc>"; 433 434 /* Use a handler to provoke the appropriate code paths */ 435 XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler); 436 expect_failure(text, XML_ERROR_INCORRECT_ENCODING, 437 "UTF-16 declared in UTF-8 not faulted"); 438 } 439 END_TEST 440 441 /* Test that an unknown encoding is rejected */ 442 START_TEST(test_bad_encoding) { 443 const char *text = "<doc>Hi</doc>"; 444 445 if (! XML_SetEncoding(g_parser, XCS("unknown-encoding"))) 446 fail("XML_SetEncoding failed"); 447 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 448 "Unknown encoding not faulted"); 449 } 450 END_TEST 451 452 /* Regression test for SF bug #481609, #774028. */ 453 START_TEST(test_latin1_umlauts) { 454 const char *text 455 = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 456 "<e a='\xE4 \xF6 \xFC ä ö ü ä ö ü >'\n" 457 " >\xE4 \xF6 \xFC ä ö ü ä ö ü ></e>"; 458 #ifdef XML_UNICODE 459 /* Expected results in UTF-16 */ 460 const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ") 461 XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >"); 462 #else 463 /* Expected results in UTF-8 */ 464 const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") 465 XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >"); 466 #endif 467 468 run_character_check(text, expected); 469 XML_ParserReset(g_parser, NULL); 470 run_attribute_check(text, expected); 471 /* Repeat with a default handler */ 472 XML_ParserReset(g_parser, NULL); 473 XML_SetDefaultHandler(g_parser, dummy_default_handler); 474 run_character_check(text, expected); 475 XML_ParserReset(g_parser, NULL); 476 XML_SetDefaultHandler(g_parser, dummy_default_handler); 477 run_attribute_check(text, expected); 478 } 479 END_TEST 480 481 /* Test that an element name with a 4-byte UTF-8 character is rejected */ 482 START_TEST(test_long_utf8_character) { 483 const char *text 484 = "<?xml version='1.0' encoding='utf-8'?>\n" 485 /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */ 486 "<do\xf0\x90\x80\x80/>"; 487 expect_failure(text, XML_ERROR_INVALID_TOKEN, 488 "4-byte UTF-8 character in element name not faulted"); 489 } 490 END_TEST 491 492 /* Test that a long latin-1 attribute (too long to convert in one go) 493 * is correctly converted 494 */ 495 START_TEST(test_long_latin1_attribute) { 496 const char *text 497 = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 498 "<doc att='" 499 /* 64 characters per line */ 500 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 501 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 502 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 503 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 504 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 505 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 506 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 507 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 508 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 509 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 510 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 511 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 512 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 513 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 514 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 515 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" 516 /* Last character splits across a buffer boundary */ 517 "\xe4'>\n</doc>"; 518 519 const XML_Char *expected = 520 /* 64 characters per line */ 521 /* clang-format off */ 522 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 523 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 524 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 525 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 526 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 527 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 528 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 529 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 530 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 531 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 532 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 533 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 534 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 535 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 536 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 537 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO") 538 /* clang-format on */ 539 #ifdef XML_UNICODE 540 XCS("\x00e4"); 541 #else 542 XCS("\xc3\xa4"); 543 #endif 544 545 run_attribute_check(text, expected); 546 } 547 END_TEST 548 549 /* Test that a long ASCII attribute (too long to convert in one go) 550 * is correctly converted 551 */ 552 START_TEST(test_long_ascii_attribute) { 553 const char *text 554 = "<?xml version='1.0' encoding='us-ascii'?>\n" 555 "<doc att='" 556 /* 64 characters per line */ 557 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 558 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 559 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 560 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 561 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 562 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 563 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 564 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 565 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 566 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 567 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 568 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 569 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 570 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 571 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 572 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 573 "01234'>\n</doc>"; 574 const XML_Char *expected = 575 /* 64 characters per line */ 576 /* clang-format off */ 577 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 578 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 579 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 580 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 581 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 582 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 583 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 584 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 585 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 586 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 587 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 588 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 589 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 590 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 591 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 592 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 593 XCS("01234"); 594 /* clang-format on */ 595 596 run_attribute_check(text, expected); 597 } 598 END_TEST 599 600 /* Regression test #1 for SF bug #653180. */ 601 START_TEST(test_line_number_after_parse) { 602 const char *text = "<tag>\n" 603 "\n" 604 "\n</tag>"; 605 XML_Size lineno; 606 607 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 608 == XML_STATUS_ERROR) 609 xml_failure(g_parser); 610 lineno = XML_GetCurrentLineNumber(g_parser); 611 if (lineno != 4) { 612 char buffer[100]; 613 snprintf(buffer, sizeof(buffer), 614 "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno); 615 fail(buffer); 616 } 617 } 618 END_TEST 619 620 /* Regression test #2 for SF bug #653180. */ 621 START_TEST(test_column_number_after_parse) { 622 const char *text = "<tag></tag>"; 623 XML_Size colno; 624 625 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 626 == XML_STATUS_ERROR) 627 xml_failure(g_parser); 628 colno = XML_GetCurrentColumnNumber(g_parser); 629 if (colno != 11) { 630 char buffer[100]; 631 snprintf(buffer, sizeof(buffer), 632 "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno); 633 fail(buffer); 634 } 635 } 636 END_TEST 637 638 /* Regression test #3 for SF bug #653180. */ 639 START_TEST(test_line_and_column_numbers_inside_handlers) { 640 const char *text = "<a>\n" /* Unix end-of-line */ 641 " <b>\r\n" /* Windows end-of-line */ 642 " <c/>\r" /* Mac OS end-of-line */ 643 " </b>\n" 644 " <d>\n" 645 " <f/>\n" 646 " </d>\n" 647 "</a>"; 648 const StructDataEntry expected[] 649 = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG}, 650 {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG}, 651 {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG}, 652 {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG}, 653 {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}}; 654 const int expected_count = sizeof(expected) / sizeof(StructDataEntry); 655 StructData storage; 656 657 StructData_Init(&storage); 658 XML_SetUserData(g_parser, &storage); 659 XML_SetStartElementHandler(g_parser, start_element_event_handler2); 660 XML_SetEndElementHandler(g_parser, end_element_event_handler2); 661 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 662 == XML_STATUS_ERROR) 663 xml_failure(g_parser); 664 665 StructData_CheckItems(&storage, expected, expected_count); 666 StructData_Dispose(&storage); 667 } 668 END_TEST 669 670 /* Regression test #4 for SF bug #653180. */ 671 START_TEST(test_line_number_after_error) { 672 const char *text = "<a>\n" 673 " <b>\n" 674 " </a>"; /* missing </b> */ 675 XML_Size lineno; 676 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 677 != XML_STATUS_ERROR) 678 fail("Expected a parse error"); 679 680 lineno = XML_GetCurrentLineNumber(g_parser); 681 if (lineno != 3) { 682 char buffer[100]; 683 snprintf(buffer, sizeof(buffer), 684 "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno); 685 fail(buffer); 686 } 687 } 688 END_TEST 689 690 /* Regression test #5 for SF bug #653180. */ 691 START_TEST(test_column_number_after_error) { 692 const char *text = "<a>\n" 693 " <b>\n" 694 " </a>"; /* missing </b> */ 695 XML_Size colno; 696 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 697 != XML_STATUS_ERROR) 698 fail("Expected a parse error"); 699 700 colno = XML_GetCurrentColumnNumber(g_parser); 701 if (colno != 4) { 702 char buffer[100]; 703 snprintf(buffer, sizeof(buffer), 704 "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno); 705 fail(buffer); 706 } 707 } 708 END_TEST 709 710 /* Regression test for SF bug #478332. */ 711 START_TEST(test_really_long_lines) { 712 /* This parses an input line longer than INIT_DATA_BUF_SIZE 713 characters long (defined to be 1024 in xmlparse.c). We take a 714 really cheesy approach to building the input buffer, because 715 this avoids writing bugs in buffer-filling code. 716 */ 717 const char *text 718 = "<e>" 719 /* 64 chars */ 720 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 721 /* until we have at least 1024 characters on the line: */ 722 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 723 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 724 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 725 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 726 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 727 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 728 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 729 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 730 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 731 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 732 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 733 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 734 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 735 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 736 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 737 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 738 "</e>"; 739 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 740 == XML_STATUS_ERROR) 741 xml_failure(g_parser); 742 } 743 END_TEST 744 745 /* Test cdata processing across a buffer boundary */ 746 START_TEST(test_really_long_encoded_lines) { 747 /* As above, except that we want to provoke an output buffer 748 * overflow with a non-trivial encoding. For this we need to pass 749 * the whole cdata in one go, not byte-by-byte. 750 */ 751 void *buffer; 752 const char *text 753 = "<?xml version='1.0' encoding='iso-8859-1'?>" 754 "<e>" 755 /* 64 chars */ 756 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 757 /* until we have at least 1024 characters on the line: */ 758 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 759 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 760 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 761 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 762 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 763 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 764 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 765 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 766 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 767 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 768 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 769 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 770 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 771 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 772 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 773 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 774 "</e>"; 775 int parse_len = (int)strlen(text); 776 777 /* Need a cdata handler to provoke the code path we want to test */ 778 XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler); 779 buffer = XML_GetBuffer(g_parser, parse_len); 780 if (buffer == NULL) 781 fail("Could not allocate parse buffer"); 782 assert(buffer != NULL); 783 memcpy(buffer, text, parse_len); 784 if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR) 785 xml_failure(g_parser); 786 } 787 END_TEST 788 789 /* 790 * Element event tests. 791 */ 792 793 START_TEST(test_end_element_events) { 794 const char *text = "<a><b><c/></b><d><f/></d></a>"; 795 const XML_Char *expected = XCS("/c/b/f/d/a"); 796 CharData storage; 797 798 CharData_Init(&storage); 799 XML_SetUserData(g_parser, &storage); 800 XML_SetEndElementHandler(g_parser, end_element_event_handler); 801 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 802 == XML_STATUS_ERROR) 803 xml_failure(g_parser); 804 CharData_CheckXMLChars(&storage, expected); 805 } 806 END_TEST 807 808 /* 809 * Attribute tests. 810 */ 811 812 /* Helper used by the following tests; this checks any "attr" and "refs" 813 attributes to make sure whitespace has been normalized. 814 815 Return true if whitespace has been normalized in a string, using 816 the rules for attribute value normalization. The 'is_cdata' flag 817 is needed since CDATA attributes don't need to have multiple 818 whitespace characters collapsed to a single space, while other 819 attribute data types do. (Section 3.3.3 of the recommendation.) 820 */ 821 static int 822 is_whitespace_normalized(const XML_Char *s, int is_cdata) { 823 int blanks = 0; 824 int at_start = 1; 825 while (*s) { 826 if (*s == XCS(' ')) 827 ++blanks; 828 else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r')) 829 return 0; 830 else { 831 if (at_start) { 832 at_start = 0; 833 if (blanks && ! is_cdata) 834 /* illegal leading blanks */ 835 return 0; 836 } else if (blanks > 1 && ! is_cdata) 837 return 0; 838 blanks = 0; 839 } 840 ++s; 841 } 842 if (blanks && ! is_cdata) 843 return 0; 844 return 1; 845 } 846 847 /* Check the attribute whitespace checker: */ 848 START_TEST(test_helper_is_whitespace_normalized) { 849 assert(is_whitespace_normalized(XCS("abc"), 0)); 850 assert(is_whitespace_normalized(XCS("abc"), 1)); 851 assert(is_whitespace_normalized(XCS("abc def ghi"), 0)); 852 assert(is_whitespace_normalized(XCS("abc def ghi"), 1)); 853 assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0)); 854 assert(is_whitespace_normalized(XCS(" abc def ghi"), 1)); 855 assert(! is_whitespace_normalized(XCS("abc def ghi"), 0)); 856 assert(is_whitespace_normalized(XCS("abc def ghi"), 1)); 857 assert(! is_whitespace_normalized(XCS("abc def ghi "), 0)); 858 assert(is_whitespace_normalized(XCS("abc def ghi "), 1)); 859 assert(! is_whitespace_normalized(XCS(" "), 0)); 860 assert(is_whitespace_normalized(XCS(" "), 1)); 861 assert(! is_whitespace_normalized(XCS("\t"), 0)); 862 assert(! is_whitespace_normalized(XCS("\t"), 1)); 863 assert(! is_whitespace_normalized(XCS("\n"), 0)); 864 assert(! is_whitespace_normalized(XCS("\n"), 1)); 865 assert(! is_whitespace_normalized(XCS("\r"), 0)); 866 assert(! is_whitespace_normalized(XCS("\r"), 1)); 867 assert(! is_whitespace_normalized(XCS("abc\t def"), 1)); 868 } 869 END_TEST 870 871 static void XMLCALL 872 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name, 873 const XML_Char **atts) { 874 int i; 875 UNUSED_P(userData); 876 UNUSED_P(name); 877 for (i = 0; atts[i] != NULL; i += 2) { 878 const XML_Char *attrname = atts[i]; 879 const XML_Char *value = atts[i + 1]; 880 if (xcstrcmp(XCS("attr"), attrname) == 0 881 || xcstrcmp(XCS("ents"), attrname) == 0 882 || xcstrcmp(XCS("refs"), attrname) == 0) { 883 if (! is_whitespace_normalized(value, 0)) { 884 char buffer[256]; 885 snprintf(buffer, sizeof(buffer), 886 "attribute value not normalized: %" XML_FMT_STR 887 "='%" XML_FMT_STR "'", 888 attrname, value); 889 fail(buffer); 890 } 891 } 892 } 893 } 894 895 START_TEST(test_attr_whitespace_normalization) { 896 const char *text 897 = "<!DOCTYPE doc [\n" 898 " <!ATTLIST doc\n" 899 " attr NMTOKENS #REQUIRED\n" 900 " ents ENTITIES #REQUIRED\n" 901 " refs IDREFS #REQUIRED>\n" 902 "]>\n" 903 "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n" 904 " ents=' ent-1 \t\r\n" 905 " ent-2 ' >\n" 906 " <e id='id-1'/>\n" 907 " <e id='id-2'/>\n" 908 "</doc>"; 909 910 XML_SetStartElementHandler(g_parser, 911 check_attr_contains_normalized_whitespace); 912 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 913 == XML_STATUS_ERROR) 914 xml_failure(g_parser); 915 } 916 END_TEST 917 918 /* 919 * XML declaration tests. 920 */ 921 922 START_TEST(test_xmldecl_misplaced) { 923 expect_failure("\n" 924 "<?xml version='1.0'?>\n" 925 "<a/>", 926 XML_ERROR_MISPLACED_XML_PI, 927 "failed to report misplaced XML declaration"); 928 } 929 END_TEST 930 931 START_TEST(test_xmldecl_invalid) { 932 expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL, 933 "Failed to report invalid XML declaration"); 934 } 935 END_TEST 936 937 START_TEST(test_xmldecl_missing_attr) { 938 expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL, 939 "Failed to report missing XML declaration attribute"); 940 } 941 END_TEST 942 943 START_TEST(test_xmldecl_missing_value) { 944 expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n" 945 "<doc/>", 946 XML_ERROR_XML_DECL, 947 "Failed to report missing attribute value"); 948 } 949 END_TEST 950 951 /* Regression test for SF bug #584832. */ 952 START_TEST(test_unknown_encoding_internal_entity) { 953 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n" 954 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n" 955 "<test a='&foo;'/>"; 956 957 XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL); 958 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 959 == XML_STATUS_ERROR) 960 xml_failure(g_parser); 961 } 962 END_TEST 963 964 /* Test unrecognised encoding handler */ 965 START_TEST(test_unrecognised_encoding_internal_entity) { 966 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n" 967 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n" 968 "<test a='&foo;'/>"; 969 970 XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL); 971 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 972 != XML_STATUS_ERROR) 973 fail("Unrecognised encoding not rejected"); 974 } 975 END_TEST 976 977 /* Regression test for SF bug #620106. */ 978 START_TEST(test_ext_entity_set_encoding) { 979 const char *text = "<!DOCTYPE doc [\n" 980 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 981 "]>\n" 982 "<doc>&en;</doc>"; 983 ExtTest test_data 984 = {/* This text says it's an unsupported encoding, but it's really 985 UTF-8, which we tell Expat using XML_SetEncoding(). 986 */ 987 "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL}; 988 #ifdef XML_UNICODE 989 const XML_Char *expected = XCS("\x00e9"); 990 #else 991 const XML_Char *expected = XCS("\xc3\xa9"); 992 #endif 993 994 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 995 run_ext_character_check(text, &test_data, expected); 996 } 997 END_TEST 998 999 /* Test external entities with no handler */ 1000 START_TEST(test_ext_entity_no_handler) { 1001 const char *text = "<!DOCTYPE doc [\n" 1002 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1003 "]>\n" 1004 "<doc>&en;</doc>"; 1005 1006 XML_SetDefaultHandler(g_parser, dummy_default_handler); 1007 run_character_check(text, XCS("")); 1008 } 1009 END_TEST 1010 1011 /* Test UTF-8 BOM is accepted */ 1012 START_TEST(test_ext_entity_set_bom) { 1013 const char *text = "<!DOCTYPE doc [\n" 1014 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1015 "]>\n" 1016 "<doc>&en;</doc>"; 1017 ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */ 1018 "<?xml encoding='iso-8859-3'?>" 1019 "\xC3\xA9", 1020 XCS("utf-8"), NULL}; 1021 #ifdef XML_UNICODE 1022 const XML_Char *expected = XCS("\x00e9"); 1023 #else 1024 const XML_Char *expected = XCS("\xc3\xa9"); 1025 #endif 1026 1027 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1028 run_ext_character_check(text, &test_data, expected); 1029 } 1030 END_TEST 1031 1032 /* Test that bad encodings are faulted */ 1033 START_TEST(test_ext_entity_bad_encoding) { 1034 const char *text = "<!DOCTYPE doc [\n" 1035 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1036 "]>\n" 1037 "<doc>&en;</doc>"; 1038 ExtFaults fault 1039 = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted", 1040 XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING}; 1041 1042 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1043 XML_SetUserData(g_parser, &fault); 1044 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1045 "Bad encoding should not have been accepted"); 1046 } 1047 END_TEST 1048 1049 /* Try handing an invalid encoding to an external entity parser */ 1050 START_TEST(test_ext_entity_bad_encoding_2) { 1051 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1052 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1053 "<doc>&entity;</doc>"; 1054 ExtFaults fault 1055 = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted", 1056 XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING}; 1057 1058 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1059 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1060 XML_SetUserData(g_parser, &fault); 1061 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1062 "Bad encoding not faulted in external entity handler"); 1063 } 1064 END_TEST 1065 1066 /* Test that no error is reported for unknown entities if we don't 1067 read an external subset. This was fixed in Expat 1.95.5. 1068 */ 1069 START_TEST(test_wfc_undeclared_entity_unread_external_subset) { 1070 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 1071 "<doc>&entity;</doc>"; 1072 1073 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1074 == XML_STATUS_ERROR) 1075 xml_failure(g_parser); 1076 } 1077 END_TEST 1078 1079 /* Test that an error is reported for unknown entities if we don't 1080 have an external subset. 1081 */ 1082 START_TEST(test_wfc_undeclared_entity_no_external_subset) { 1083 expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY, 1084 "Parser did not report undefined entity w/out a DTD."); 1085 } 1086 END_TEST 1087 1088 /* Test that an error is reported for unknown entities if we don't 1089 read an external subset, but have been declared standalone. 1090 */ 1091 START_TEST(test_wfc_undeclared_entity_standalone) { 1092 const char *text 1093 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1094 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1095 "<doc>&entity;</doc>"; 1096 1097 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1098 "Parser did not report undefined entity (standalone)."); 1099 } 1100 END_TEST 1101 1102 /* Test that an error is reported for unknown entities if we have read 1103 an external subset, and standalone is true. 1104 */ 1105 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) { 1106 const char *text 1107 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1108 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1109 "<doc>&entity;</doc>"; 1110 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1111 1112 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1113 XML_SetUserData(g_parser, &test_data); 1114 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1115 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1116 "Parser did not report undefined entity (external DTD)."); 1117 } 1118 END_TEST 1119 1120 /* Test that external entity handling is not done if the parsing flag 1121 * is set to UNLESS_STANDALONE 1122 */ 1123 START_TEST(test_entity_with_external_subset_unless_standalone) { 1124 const char *text 1125 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1126 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1127 "<doc>&entity;</doc>"; 1128 ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL}; 1129 1130 XML_SetParamEntityParsing(g_parser, 1131 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); 1132 XML_SetUserData(g_parser, &test_data); 1133 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1134 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1135 "Parser did not report undefined entity"); 1136 } 1137 END_TEST 1138 1139 /* Test that no error is reported for unknown entities if we have read 1140 an external subset, and standalone is false. 1141 */ 1142 START_TEST(test_wfc_undeclared_entity_with_external_subset) { 1143 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1144 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1145 "<doc>&entity;</doc>"; 1146 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1147 1148 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1149 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1150 run_ext_character_check(text, &test_data, XCS("")); 1151 } 1152 END_TEST 1153 1154 /* Test that an error is reported if our NotStandalone handler fails */ 1155 START_TEST(test_not_standalone_handler_reject) { 1156 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1157 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1158 "<doc>&entity;</doc>"; 1159 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1160 1161 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1162 XML_SetUserData(g_parser, &test_data); 1163 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1164 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 1165 expect_failure(text, XML_ERROR_NOT_STANDALONE, 1166 "NotStandalone handler failed to reject"); 1167 1168 /* Try again but without external entity handling */ 1169 XML_ParserReset(g_parser, NULL); 1170 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 1171 expect_failure(text, XML_ERROR_NOT_STANDALONE, 1172 "NotStandalone handler failed to reject"); 1173 } 1174 END_TEST 1175 1176 /* Test that no error is reported if our NotStandalone handler succeeds */ 1177 START_TEST(test_not_standalone_handler_accept) { 1178 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1179 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1180 "<doc>&entity;</doc>"; 1181 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1182 1183 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1184 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1185 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler); 1186 run_ext_character_check(text, &test_data, XCS("")); 1187 1188 /* Repeat without the external entity handler */ 1189 XML_ParserReset(g_parser, NULL); 1190 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler); 1191 run_character_check(text, XCS("")); 1192 } 1193 END_TEST 1194 1195 START_TEST(test_entity_start_tag_level_greater_than_one) { 1196 const char *const text = "<!DOCTYPE t1 [\n" 1197 " <!ENTITY e1 'hello'>\n" 1198 "]>\n" 1199 "<t1>\n" 1200 " <t2>&e1;</t2>\n" 1201 "</t1>\n"; 1202 1203 XML_Parser parser = XML_ParserCreate(NULL); 1204 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), 1205 /*isFinal*/ XML_TRUE) 1206 == XML_STATUS_OK); 1207 XML_ParserFree(parser); 1208 } 1209 END_TEST 1210 1211 START_TEST(test_wfc_no_recursive_entity_refs) { 1212 const char *text = "<!DOCTYPE doc [\n" 1213 " <!ENTITY entity '&entity;'>\n" 1214 "]>\n" 1215 "<doc>&entity;</doc>"; 1216 1217 expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF, 1218 "Parser did not report recursive entity reference."); 1219 } 1220 END_TEST 1221 1222 START_TEST(test_no_indirectly_recursive_entity_refs) { 1223 struct TestCase { 1224 const char *doc; 1225 bool usesParameterEntities; 1226 }; 1227 1228 const struct TestCase cases[] = { 1229 // general entity + character data 1230 {"<!DOCTYPE a [\n" 1231 " <!ENTITY e1 '&e2;'>\n" 1232 " <!ENTITY e2 '&e1;'>\n" 1233 "]><a>&e2;</a>\n", 1234 false}, 1235 1236 // general entity + attribute value 1237 {"<!DOCTYPE a [\n" 1238 " <!ENTITY e1 '&e2;'>\n" 1239 " <!ENTITY e2 '&e1;'>\n" 1240 "]><a k1='&e2;' />\n", 1241 false}, 1242 1243 // parameter entity 1244 {"<!DOCTYPE doc [\n" 1245 " <!ENTITY % p1 '%p2;'>\n" 1246 " <!ENTITY % p2 '%p1;'>\n" 1247 " <!ENTITY % define_g \"<!ENTITY g '%p2;'>\">\n" 1248 " %define_g;\n" 1249 "]>\n" 1250 "<doc/>\n", 1251 true}, 1252 }; 1253 const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE}; 1254 1255 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { 1256 for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]); 1257 j++) { 1258 const XML_Bool reset_wanted = reset_or_not[j]; 1259 const char *const doc = cases[i].doc; 1260 const bool usesParameterEntities = cases[i].usesParameterEntities; 1261 1262 set_subtest("[%i,reset=%i] %s", (int)i, (int)j, doc); 1263 1264 #ifdef XML_DTD // both GE and DTD 1265 const bool rejection_expected = true; 1266 #elif XML_GE == 1 // GE but not DTD 1267 const bool rejection_expected = ! usesParameterEntities; 1268 #else // neither DTD nor GE 1269 const bool rejection_expected = false; 1270 #endif 1271 1272 XML_Parser parser = XML_ParserCreate(NULL); 1273 1274 #ifdef XML_DTD 1275 if (usesParameterEntities) { 1276 assert_true( 1277 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS) 1278 == 1); 1279 } 1280 #else 1281 UNUSED_P(usesParameterEntities); 1282 #endif // XML_DTD 1283 1284 const enum XML_Status status 1285 = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), 1286 /*isFinal*/ XML_TRUE); 1287 1288 if (rejection_expected) { 1289 assert_true(status == XML_STATUS_ERROR); 1290 assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF); 1291 } else { 1292 assert_true(status == XML_STATUS_OK); 1293 } 1294 1295 if (reset_wanted) { 1296 // This covers free'ing of (eventually) all three open entity lists by 1297 // XML_ParserReset. 1298 XML_ParserReset(parser, NULL); 1299 } 1300 1301 // This covers free'ing of (eventually) all three open entity lists by 1302 // XML_ParserFree (unless XML_ParserReset has already done that above). 1303 XML_ParserFree(parser); 1304 } 1305 } 1306 } 1307 END_TEST 1308 1309 START_TEST(test_recursive_external_parameter_entity_2) { 1310 struct TestCase { 1311 const char *doc; 1312 enum XML_Status expectedStatus; 1313 }; 1314 1315 struct TestCase cases[] = { 1316 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR}, 1317 {"<!ENTITY % p1 '%p1;'>" 1318 "<!ENTITY % p1 'first declaration wins'>", 1319 XML_STATUS_ERROR}, 1320 {"<!ENTITY % p1 'first declaration wins'>" 1321 "<!ENTITY % p1 '%p1;'>", 1322 XML_STATUS_OK}, 1323 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_OK}, 1324 }; 1325 1326 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { 1327 const char *const doc = cases[i].doc; 1328 const enum XML_Status expectedStatus = cases[i].expectedStatus; 1329 set_subtest("%s", doc); 1330 1331 XML_Parser parser = XML_ParserCreate(NULL); 1332 assert_true(parser != NULL); 1333 1334 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); 1335 assert_true(ext_parser != NULL); 1336 1337 const enum XML_Status actualStatus 1338 = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE); 1339 1340 assert_true(actualStatus == expectedStatus); 1341 if (actualStatus != XML_STATUS_OK) { 1342 assert_true(XML_GetErrorCode(ext_parser) 1343 == XML_ERROR_RECURSIVE_ENTITY_REF); 1344 } 1345 1346 XML_ParserFree(ext_parser); 1347 XML_ParserFree(parser); 1348 } 1349 } 1350 END_TEST 1351 1352 /* Test incomplete external entities are faulted */ 1353 START_TEST(test_ext_entity_invalid_parse) { 1354 const char *text = "<!DOCTYPE doc [\n" 1355 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1356 "]>\n" 1357 "<doc>&en;</doc>"; 1358 const ExtFaults faults[] 1359 = {{"<", "Incomplete element declaration not faulted", NULL, 1360 XML_ERROR_UNCLOSED_TOKEN}, 1361 {"<\xe2\x82", /* First two bytes of a three-byte char */ 1362 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, 1363 {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL, 1364 XML_ERROR_PARTIAL_CHAR}, 1365 {NULL, NULL, NULL, XML_ERROR_NONE}}; 1366 const ExtFaults *fault = faults; 1367 1368 for (; fault->parse_text != NULL; fault++) { 1369 set_subtest("\"%s\"", fault->parse_text); 1370 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1371 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1372 XML_SetUserData(g_parser, (void *)fault); 1373 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1374 "Parser did not report external entity error"); 1375 XML_ParserReset(g_parser, NULL); 1376 } 1377 } 1378 END_TEST 1379 1380 /* Regression test for SF bug #483514. */ 1381 START_TEST(test_dtd_default_handling) { 1382 const char *text = "<!DOCTYPE doc [\n" 1383 "<!ENTITY e SYSTEM 'http://example.org/e'>\n" 1384 "<!NOTATION n SYSTEM 'http://example.org/n'>\n" 1385 "<!ELEMENT doc EMPTY>\n" 1386 "<!ATTLIST doc a CDATA #IMPLIED>\n" 1387 "<?pi in dtd?>\n" 1388 "<!--comment in dtd-->\n" 1389 "]><doc/>"; 1390 1391 XML_SetDefaultHandler(g_parser, accumulate_characters); 1392 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 1393 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 1394 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 1395 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); 1396 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 1397 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 1398 XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler); 1399 XML_SetCommentHandler(g_parser, dummy_comment_handler); 1400 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler); 1401 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler); 1402 run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>")); 1403 } 1404 END_TEST 1405 1406 /* Test handling of attribute declarations */ 1407 START_TEST(test_dtd_attr_handling) { 1408 const char *prolog = "<!DOCTYPE doc [\n" 1409 "<!ELEMENT doc EMPTY>\n"; 1410 AttTest attr_data[] 1411 = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n" 1412 "]>" 1413 "<doc a='two'/>", 1414 XCS("doc"), XCS("a"), 1415 XCS("(one|two|three)"), /* Extraneous spaces will be removed */ 1416 NULL, XML_TRUE}, 1417 {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n" 1418 "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n" 1419 "]>" 1420 "<doc/>", 1421 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE}, 1422 {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n" 1423 "]>" 1424 "<doc/>", 1425 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE}, 1426 {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n" 1427 "]>" 1428 "<doc/>", 1429 XCS("doc"), XCS("a"), XCS("CDATA"), 1430 #ifdef XML_UNICODE 1431 XCS("\x06f2"), 1432 #else 1433 XCS("\xdb\xb2"), 1434 #endif 1435 XML_FALSE}, 1436 {NULL, NULL, NULL, NULL, NULL, XML_FALSE}}; 1437 AttTest *test; 1438 1439 for (test = attr_data; test->definition != NULL; test++) { 1440 set_subtest("%s", test->definition); 1441 XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler); 1442 XML_SetUserData(g_parser, test); 1443 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog), 1444 XML_FALSE) 1445 == XML_STATUS_ERROR) 1446 xml_failure(g_parser); 1447 if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition, 1448 (int)strlen(test->definition), XML_TRUE) 1449 == XML_STATUS_ERROR) 1450 xml_failure(g_parser); 1451 XML_ParserReset(g_parser, NULL); 1452 } 1453 } 1454 END_TEST 1455 1456 /* See related SF bug #673791. 1457 When namespace processing is enabled, setting the namespace URI for 1458 a prefix is not allowed; this test ensures that it *is* allowed 1459 when namespace processing is not enabled. 1460 (See Namespaces in XML, section 2.) 1461 */ 1462 START_TEST(test_empty_ns_without_namespaces) { 1463 const char *text = "<doc xmlns:prefix='http://example.org/'>\n" 1464 " <e xmlns:prefix=''/>\n" 1465 "</doc>"; 1466 1467 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1468 == XML_STATUS_ERROR) 1469 xml_failure(g_parser); 1470 } 1471 END_TEST 1472 1473 /* Regression test for SF bug #824420. 1474 Checks that an xmlns:prefix attribute set in an attribute's default 1475 value isn't misinterpreted. 1476 */ 1477 START_TEST(test_ns_in_attribute_default_without_namespaces) { 1478 const char *text = "<!DOCTYPE e:element [\n" 1479 " <!ATTLIST e:element\n" 1480 " xmlns:e CDATA 'http://example.org/'>\n" 1481 " ]>\n" 1482 "<e:element/>"; 1483 1484 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1485 == XML_STATUS_ERROR) 1486 xml_failure(g_parser); 1487 } 1488 END_TEST 1489 1490 /* Regression test for SF bug #1515266: missing check of stopped 1491 parser in doContext() 'for' loop. */ 1492 START_TEST(test_stop_parser_between_char_data_calls) { 1493 /* The sample data must be big enough that there are two calls to 1494 the character data handler from within the inner "for" loop of 1495 the XML_TOK_DATA_CHARS case in doContent(), and the character 1496 handler must stop the parser and clear the character data 1497 handler. 1498 */ 1499 const char *text = long_character_data_text; 1500 1501 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1502 g_resumable = XML_FALSE; 1503 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1504 != XML_STATUS_ERROR) 1505 xml_failure(g_parser); 1506 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED) 1507 xml_failure(g_parser); 1508 } 1509 END_TEST 1510 1511 /* Regression test for SF bug #1515266: missing check of stopped 1512 parser in doContext() 'for' loop. */ 1513 START_TEST(test_suspend_parser_between_char_data_calls) { 1514 /* The sample data must be big enough that there are two calls to 1515 the character data handler from within the inner "for" loop of 1516 the XML_TOK_DATA_CHARS case in doContent(), and the character 1517 handler must stop the parser and clear the character data 1518 handler. 1519 */ 1520 const char *text = long_character_data_text; 1521 1522 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1523 g_resumable = XML_TRUE; 1524 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 1525 // we won't know exactly how much input we actually managed to give Expat. 1526 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 1527 != XML_STATUS_SUSPENDED) 1528 xml_failure(g_parser); 1529 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 1530 xml_failure(g_parser); 1531 /* Try parsing directly */ 1532 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1533 != XML_STATUS_ERROR) 1534 fail("Attempt to continue parse while suspended not faulted"); 1535 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) 1536 fail("Suspended parse not faulted with correct error"); 1537 } 1538 END_TEST 1539 1540 /* Test repeated calls to XML_StopParser are handled correctly */ 1541 START_TEST(test_repeated_stop_parser_between_char_data_calls) { 1542 const char *text = long_character_data_text; 1543 1544 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1545 g_resumable = XML_FALSE; 1546 g_abortable = XML_FALSE; 1547 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1548 != XML_STATUS_ERROR) 1549 fail("Failed to double-stop parser"); 1550 1551 XML_ParserReset(g_parser, NULL); 1552 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1553 g_resumable = XML_TRUE; 1554 g_abortable = XML_FALSE; 1555 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 1556 // we won't know exactly how much input we actually managed to give Expat. 1557 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 1558 != XML_STATUS_SUSPENDED) 1559 fail("Failed to double-suspend parser"); 1560 1561 XML_ParserReset(g_parser, NULL); 1562 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1563 g_resumable = XML_TRUE; 1564 g_abortable = XML_TRUE; 1565 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1566 != XML_STATUS_ERROR) 1567 fail("Failed to suspend-abort parser"); 1568 } 1569 END_TEST 1570 1571 START_TEST(test_good_cdata_ascii) { 1572 const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>"; 1573 const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>"); 1574 1575 CharData storage; 1576 CharData_Init(&storage); 1577 XML_SetUserData(g_parser, &storage); 1578 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1579 /* Add start and end handlers for coverage */ 1580 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler); 1581 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler); 1582 1583 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1584 == XML_STATUS_ERROR) 1585 xml_failure(g_parser); 1586 CharData_CheckXMLChars(&storage, expected); 1587 1588 /* Try again, this time with a default handler */ 1589 XML_ParserReset(g_parser, NULL); 1590 CharData_Init(&storage); 1591 XML_SetUserData(g_parser, &storage); 1592 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1593 XML_SetDefaultHandler(g_parser, dummy_default_handler); 1594 1595 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1596 == XML_STATUS_ERROR) 1597 xml_failure(g_parser); 1598 CharData_CheckXMLChars(&storage, expected); 1599 } 1600 END_TEST 1601 1602 START_TEST(test_good_cdata_utf16) { 1603 /* Test data is: 1604 * <?xml version='1.0' encoding='utf-16'?> 1605 * <a><![CDATA[hello]]></a> 1606 */ 1607 const char text[] 1608 = "\0<\0?\0x\0m\0l\0" 1609 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1610 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1611 "1\0" 1612 "6\0'" 1613 "\0?\0>\0\n" 1614 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>"; 1615 const XML_Char *expected = XCS("hello"); 1616 1617 CharData storage; 1618 CharData_Init(&storage); 1619 XML_SetUserData(g_parser, &storage); 1620 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1621 1622 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1623 == XML_STATUS_ERROR) 1624 xml_failure(g_parser); 1625 CharData_CheckXMLChars(&storage, expected); 1626 } 1627 END_TEST 1628 1629 START_TEST(test_good_cdata_utf16_le) { 1630 /* Test data is: 1631 * <?xml version='1.0' encoding='utf-16'?> 1632 * <a><![CDATA[hello]]></a> 1633 */ 1634 const char text[] 1635 = "<\0?\0x\0m\0l\0" 1636 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1637 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1638 "1\0" 1639 "6\0'" 1640 "\0?\0>\0\n" 1641 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0"; 1642 const XML_Char *expected = XCS("hello"); 1643 1644 CharData storage; 1645 CharData_Init(&storage); 1646 XML_SetUserData(g_parser, &storage); 1647 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1648 1649 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1650 == XML_STATUS_ERROR) 1651 xml_failure(g_parser); 1652 CharData_CheckXMLChars(&storage, expected); 1653 } 1654 END_TEST 1655 1656 /* Test UTF16 conversion of a long cdata string */ 1657 1658 /* 16 characters: handy macro to reduce visual clutter */ 1659 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P" 1660 1661 START_TEST(test_long_cdata_utf16) { 1662 /* Test data is: 1663 * <?xlm version='1.0' encoding='utf-16'?> 1664 * <a><![CDATA[ 1665 * ABCDEFGHIJKLMNOP 1666 * ]]></a> 1667 */ 1668 const char text[] 1669 = "\0<\0?\0x\0m\0l\0 " 1670 "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 " 1671 "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>" 1672 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1673 /* 64 characters per line */ 1674 /* clang-format off */ 1675 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1676 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1677 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1678 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1679 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1680 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1681 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1682 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1683 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1684 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1685 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1686 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1687 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1688 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1689 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1690 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1691 A_TO_P_IN_UTF16 1692 /* clang-format on */ 1693 "\0]\0]\0>\0<\0/\0a\0>"; 1694 const XML_Char *expected = 1695 /* clang-format off */ 1696 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1697 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1698 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1699 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1700 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1701 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1702 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1703 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1704 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1705 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1706 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1707 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1708 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1709 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1710 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1711 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1712 XCS("ABCDEFGHIJKLMNOP"); 1713 /* clang-format on */ 1714 CharData storage; 1715 void *buffer; 1716 1717 CharData_Init(&storage); 1718 XML_SetUserData(g_parser, &storage); 1719 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1720 buffer = XML_GetBuffer(g_parser, sizeof(text) - 1); 1721 if (buffer == NULL) 1722 fail("Could not allocate parse buffer"); 1723 assert(buffer != NULL); 1724 memcpy(buffer, text, sizeof(text) - 1); 1725 if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) 1726 xml_failure(g_parser); 1727 CharData_CheckXMLChars(&storage, expected); 1728 } 1729 END_TEST 1730 1731 /* Test handling of multiple unit UTF-16 characters */ 1732 START_TEST(test_multichar_cdata_utf16) { 1733 /* Test data is: 1734 * <?xml version='1.0' encoding='utf-16'?> 1735 * <a><![CDATA[{MINIM}{CROTCHET}]]></a> 1736 * 1737 * where {MINIM} is U+1d15e (a minim or half-note) 1738 * UTF-16: 0xd834 0xdd5e 1739 * UTF-8: 0xf0 0x9d 0x85 0x9e 1740 * and {CROTCHET} is U+1d15f (a crotchet or quarter-note) 1741 * UTF-16: 0xd834 0xdd5f 1742 * UTF-8: 0xf0 0x9d 0x85 0x9f 1743 */ 1744 const char text[] = "\0<\0?\0x\0m\0l\0" 1745 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1746 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1747 "1\0" 1748 "6\0'" 1749 "\0?\0>\0\n" 1750 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1751 "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f" 1752 "\0]\0]\0>\0<\0/\0a\0>"; 1753 #ifdef XML_UNICODE 1754 const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f"); 1755 #else 1756 const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f"); 1757 #endif 1758 CharData storage; 1759 1760 CharData_Init(&storage); 1761 XML_SetUserData(g_parser, &storage); 1762 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1763 1764 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1765 == XML_STATUS_ERROR) 1766 xml_failure(g_parser); 1767 CharData_CheckXMLChars(&storage, expected); 1768 } 1769 END_TEST 1770 1771 /* Test that an element name with a UTF-16 surrogate pair is rejected */ 1772 START_TEST(test_utf16_bad_surrogate_pair) { 1773 /* Test data is: 1774 * <?xml version='1.0' encoding='utf-16'?> 1775 * <a><![CDATA[{BADLINB}]]></a> 1776 * 1777 * where {BADLINB} is U+10000 (the first Linear B character) 1778 * with the UTF-16 surrogate pair in the wrong order, i.e. 1779 * 0xdc00 0xd800 1780 */ 1781 const char text[] = "\0<\0?\0x\0m\0l\0" 1782 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1783 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1784 "1\0" 1785 "6\0'" 1786 "\0?\0>\0\n" 1787 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1788 "\xdc\x00\xd8\x00" 1789 "\0]\0]\0>\0<\0/\0a\0>"; 1790 1791 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1792 != XML_STATUS_ERROR) 1793 fail("Reversed UTF-16 surrogate pair not faulted"); 1794 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 1795 xml_failure(g_parser); 1796 } 1797 END_TEST 1798 1799 START_TEST(test_bad_cdata) { 1800 struct CaseData { 1801 const char *text; 1802 enum XML_Error expectedError; 1803 }; 1804 1805 struct CaseData cases[] 1806 = {{"<a><", XML_ERROR_UNCLOSED_TOKEN}, 1807 {"<a><!", XML_ERROR_UNCLOSED_TOKEN}, 1808 {"<a><![", XML_ERROR_UNCLOSED_TOKEN}, 1809 {"<a><![C", XML_ERROR_UNCLOSED_TOKEN}, 1810 {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN}, 1811 {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN}, 1812 {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN}, 1813 {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN}, 1814 1815 {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1816 {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1817 {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1818 1819 {"<a><!<a/>", XML_ERROR_INVALID_TOKEN}, 1820 {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ 1821 {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ 1822 {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN}, 1823 {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN}, 1824 {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN}, 1825 {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN}, 1826 1827 {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1828 {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1829 {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}}; 1830 1831 size_t i = 0; 1832 for (; i < sizeof(cases) / sizeof(struct CaseData); i++) { 1833 set_subtest("%s", cases[i].text); 1834 const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES( 1835 g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE); 1836 const enum XML_Error actualError = XML_GetErrorCode(g_parser); 1837 1838 assert(actualStatus == XML_STATUS_ERROR); 1839 1840 if (actualError != cases[i].expectedError) { 1841 char message[100]; 1842 snprintf(message, sizeof(message), 1843 "Expected error %d but got error %d for case %u: \"%s\"\n", 1844 cases[i].expectedError, actualError, (unsigned int)i + 1, 1845 cases[i].text); 1846 fail(message); 1847 } 1848 1849 XML_ParserReset(g_parser, NULL); 1850 } 1851 } 1852 END_TEST 1853 1854 /* Test failures in UTF-16 CDATA */ 1855 START_TEST(test_bad_cdata_utf16) { 1856 struct CaseData { 1857 size_t text_bytes; 1858 const char *text; 1859 enum XML_Error expected_error; 1860 }; 1861 1862 const char prolog[] = "\0<\0?\0x\0m\0l\0" 1863 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1864 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1865 "1\0" 1866 "6\0'" 1867 "\0?\0>\0\n" 1868 "\0<\0a\0>"; 1869 struct CaseData cases[] = { 1870 {1, "\0", XML_ERROR_UNCLOSED_TOKEN}, 1871 {2, "\0<", XML_ERROR_UNCLOSED_TOKEN}, 1872 {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN}, 1873 {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN}, 1874 {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN}, 1875 {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN}, 1876 {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN}, 1877 {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN}, 1878 {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN}, 1879 {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN}, 1880 {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN}, 1881 {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN}, 1882 {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN}, 1883 {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN}, 1884 {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN}, 1885 {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN}, 1886 {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN}, 1887 {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1888 {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1889 {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1890 /* Now add a four-byte UTF-16 character */ 1891 {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8", 1892 XML_ERROR_UNCLOSED_CDATA_SECTION}, 1893 {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR}, 1894 {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd", 1895 XML_ERROR_PARTIAL_CHAR}, 1896 {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e", 1897 XML_ERROR_UNCLOSED_CDATA_SECTION}}; 1898 size_t i; 1899 1900 for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) { 1901 set_subtest("case %lu", (long unsigned)(i + 1)); 1902 enum XML_Status actual_status; 1903 enum XML_Error actual_error; 1904 1905 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1, 1906 XML_FALSE) 1907 == XML_STATUS_ERROR) 1908 xml_failure(g_parser); 1909 actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text, 1910 (int)cases[i].text_bytes, XML_TRUE); 1911 assert(actual_status == XML_STATUS_ERROR); 1912 actual_error = XML_GetErrorCode(g_parser); 1913 if (actual_error != cases[i].expected_error) { 1914 char message[1024]; 1915 1916 snprintf(message, sizeof(message), 1917 "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR 1918 ") for case %lu\n", 1919 cases[i].expected_error, 1920 XML_ErrorString(cases[i].expected_error), actual_error, 1921 XML_ErrorString(actual_error), (long unsigned)(i + 1)); 1922 fail(message); 1923 } 1924 XML_ParserReset(g_parser, NULL); 1925 } 1926 } 1927 END_TEST 1928 1929 /* Test stopping the parser in cdata handler */ 1930 START_TEST(test_stop_parser_between_cdata_calls) { 1931 const char *text = long_cdata_text; 1932 1933 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1934 g_resumable = XML_FALSE; 1935 expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler"); 1936 } 1937 END_TEST 1938 1939 /* Test suspending the parser in cdata handler */ 1940 START_TEST(test_suspend_parser_between_cdata_calls) { 1941 if (g_chunkSize != 0) { 1942 // this test does not use SINGLE_BYTES, because of suspension 1943 return; 1944 } 1945 1946 const char *text = long_cdata_text; 1947 enum XML_Status result; 1948 1949 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1950 g_resumable = XML_TRUE; 1951 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 1952 // we won't know exactly how much input we actually managed to give Expat. 1953 result = XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE); 1954 if (result != XML_STATUS_SUSPENDED) { 1955 if (result == XML_STATUS_ERROR) 1956 xml_failure(g_parser); 1957 fail("Parse not suspended in CDATA handler"); 1958 } 1959 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 1960 xml_failure(g_parser); 1961 } 1962 END_TEST 1963 1964 /* Test memory allocation functions */ 1965 START_TEST(test_memory_allocation) { 1966 char *buffer = (char *)XML_MemMalloc(g_parser, 256); 1967 char *p; 1968 1969 if (buffer == NULL) { 1970 fail("Allocation failed"); 1971 } else { 1972 /* Try writing to memory; some OSes try to cheat! */ 1973 buffer[0] = 'T'; 1974 buffer[1] = 'E'; 1975 buffer[2] = 'S'; 1976 buffer[3] = 'T'; 1977 buffer[4] = '\0'; 1978 if (strcmp(buffer, "TEST") != 0) { 1979 fail("Memory not writable"); 1980 } else { 1981 p = (char *)XML_MemRealloc(g_parser, buffer, 512); 1982 if (p == NULL) { 1983 fail("Reallocation failed"); 1984 } else { 1985 /* Write again, just to be sure */ 1986 buffer = p; 1987 buffer[0] = 'V'; 1988 if (strcmp(buffer, "VEST") != 0) { 1989 fail("Reallocated memory not writable"); 1990 } 1991 } 1992 } 1993 XML_MemFree(g_parser, buffer); 1994 } 1995 } 1996 END_TEST 1997 1998 /* Test XML_DefaultCurrent() passes handling on correctly */ 1999 START_TEST(test_default_current) { 2000 const char *text = "<doc>hell]</doc>"; 2001 const char *entity_text = "<!DOCTYPE doc [\n" 2002 "<!ENTITY entity '%'>\n" 2003 "]>\n" 2004 "<doc>&entity;</doc>"; 2005 2006 set_subtest("with defaulting"); 2007 { 2008 struct handler_record_list storage; 2009 storage.count = 0; 2010 XML_SetDefaultHandler(g_parser, record_default_handler); 2011 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2012 XML_SetUserData(g_parser, &storage); 2013 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2014 == XML_STATUS_ERROR) 2015 xml_failure(g_parser); 2016 int i = 0; 2017 assert_record_handler_called(&storage, i++, "record_default_handler", 5); 2018 // we should have gotten one or more cdata callbacks, totaling 5 chars 2019 int cdata_len_remaining = 5; 2020 while (cdata_len_remaining > 0) { 2021 const struct handler_record_entry *c_entry 2022 = handler_record_get(&storage, i++); 2023 assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0); 2024 assert_true(c_entry->arg > 0); 2025 assert_true(c_entry->arg <= cdata_len_remaining); 2026 cdata_len_remaining -= c_entry->arg; 2027 // default handler must follow, with the exact same len argument. 2028 assert_record_handler_called(&storage, i++, "record_default_handler", 2029 c_entry->arg); 2030 } 2031 assert_record_handler_called(&storage, i++, "record_default_handler", 6); 2032 assert_true(storage.count == i); 2033 } 2034 2035 /* Again, without the defaulting */ 2036 set_subtest("no defaulting"); 2037 { 2038 struct handler_record_list storage; 2039 storage.count = 0; 2040 XML_ParserReset(g_parser, NULL); 2041 XML_SetDefaultHandler(g_parser, record_default_handler); 2042 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); 2043 XML_SetUserData(g_parser, &storage); 2044 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2045 == XML_STATUS_ERROR) 2046 xml_failure(g_parser); 2047 int i = 0; 2048 assert_record_handler_called(&storage, i++, "record_default_handler", 5); 2049 // we should have gotten one or more cdata callbacks, totaling 5 chars 2050 int cdata_len_remaining = 5; 2051 while (cdata_len_remaining > 0) { 2052 const struct handler_record_entry *c_entry 2053 = handler_record_get(&storage, i++); 2054 assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0); 2055 assert_true(c_entry->arg > 0); 2056 assert_true(c_entry->arg <= cdata_len_remaining); 2057 cdata_len_remaining -= c_entry->arg; 2058 } 2059 assert_record_handler_called(&storage, i++, "record_default_handler", 6); 2060 assert_true(storage.count == i); 2061 } 2062 2063 /* Now with an internal entity to complicate matters */ 2064 set_subtest("with internal entity"); 2065 { 2066 struct handler_record_list storage; 2067 storage.count = 0; 2068 XML_ParserReset(g_parser, NULL); 2069 XML_SetDefaultHandler(g_parser, record_default_handler); 2070 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2071 XML_SetUserData(g_parser, &storage); 2072 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2073 XML_TRUE) 2074 == XML_STATUS_ERROR) 2075 xml_failure(g_parser); 2076 /* The default handler suppresses the entity */ 2077 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2078 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2079 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2080 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2081 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2082 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2083 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2084 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2085 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2086 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2087 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2088 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2089 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2090 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2091 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2092 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2093 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2094 assert_record_handler_called(&storage, 17, "record_default_handler", 8); 2095 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2096 assert_true(storage.count == 19); 2097 } 2098 2099 /* Again, with a skip handler */ 2100 set_subtest("with skip handler"); 2101 { 2102 struct handler_record_list storage; 2103 storage.count = 0; 2104 XML_ParserReset(g_parser, NULL); 2105 XML_SetDefaultHandler(g_parser, record_default_handler); 2106 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2107 XML_SetSkippedEntityHandler(g_parser, record_skip_handler); 2108 XML_SetUserData(g_parser, &storage); 2109 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2110 XML_TRUE) 2111 == XML_STATUS_ERROR) 2112 xml_failure(g_parser); 2113 /* The default handler suppresses the entity */ 2114 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2115 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2116 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2117 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2118 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2119 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2120 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2121 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2122 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2123 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2124 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2125 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2126 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2127 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2128 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2129 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2130 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2131 assert_record_handler_called(&storage, 17, "record_skip_handler", 0); 2132 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2133 assert_true(storage.count == 19); 2134 } 2135 2136 /* This time, allow the entity through */ 2137 set_subtest("allow entity"); 2138 { 2139 struct handler_record_list storage; 2140 storage.count = 0; 2141 XML_ParserReset(g_parser, NULL); 2142 XML_SetDefaultHandlerExpand(g_parser, record_default_handler); 2143 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2144 XML_SetUserData(g_parser, &storage); 2145 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2146 XML_TRUE) 2147 == XML_STATUS_ERROR) 2148 xml_failure(g_parser); 2149 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2150 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2151 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2152 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2153 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2154 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2155 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2156 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2157 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2158 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2159 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2160 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2161 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2162 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2163 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2164 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2165 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2166 assert_record_handler_called(&storage, 17, "record_cdata_handler", 1); 2167 assert_record_handler_called(&storage, 18, "record_default_handler", 1); 2168 assert_record_handler_called(&storage, 19, "record_default_handler", 6); 2169 assert_true(storage.count == 20); 2170 } 2171 2172 /* Finally, without passing the cdata to the default handler */ 2173 set_subtest("not passing cdata"); 2174 { 2175 struct handler_record_list storage; 2176 storage.count = 0; 2177 XML_ParserReset(g_parser, NULL); 2178 XML_SetDefaultHandlerExpand(g_parser, record_default_handler); 2179 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); 2180 XML_SetUserData(g_parser, &storage); 2181 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2182 XML_TRUE) 2183 == XML_STATUS_ERROR) 2184 xml_failure(g_parser); 2185 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2186 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2187 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2188 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2189 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2190 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2191 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2192 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2193 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2194 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2195 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2196 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2197 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2198 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2199 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2200 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2201 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2202 assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler", 2203 1); 2204 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2205 assert_true(storage.count == 19); 2206 } 2207 } 2208 END_TEST 2209 2210 /* Test DTD element parsing code paths */ 2211 START_TEST(test_dtd_elements) { 2212 const char *text = "<!DOCTYPE doc [\n" 2213 "<!ELEMENT doc (chapter)>\n" 2214 "<!ELEMENT chapter (#PCDATA)>\n" 2215 "]>\n" 2216 "<doc><chapter>Wombats are go</chapter></doc>"; 2217 2218 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 2219 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2220 == XML_STATUS_ERROR) 2221 xml_failure(g_parser); 2222 } 2223 END_TEST 2224 2225 static void XMLCALL 2226 element_decl_check_model(void *userData, const XML_Char *name, 2227 XML_Content *model) { 2228 UNUSED_P(userData); 2229 uint32_t errorFlags = 0; 2230 2231 /* Expected model array structure is this: 2232 * [0] (type 6, quant 0) 2233 * [1] (type 5, quant 0) 2234 * [3] (type 4, quant 0, name "bar") 2235 * [4] (type 4, quant 0, name "foo") 2236 * [5] (type 4, quant 3, name "xyz") 2237 * [2] (type 4, quant 2, name "zebra") 2238 */ 2239 errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0)); 2240 errorFlags |= ((model != NULL) ? 0 : (1u << 1)); 2241 2242 if (model != NULL) { 2243 errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2)); 2244 errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3)); 2245 errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4)); 2246 errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5)); 2247 errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6)); 2248 2249 errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7)); 2250 errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8)); 2251 errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9)); 2252 errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10)); 2253 errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11)); 2254 2255 errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12)); 2256 errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13)); 2257 errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14)); 2258 errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15)); 2259 errorFlags 2260 |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16)); 2261 2262 errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17)); 2263 errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18)); 2264 errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19)); 2265 errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20)); 2266 errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21)); 2267 2268 errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22)); 2269 errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23)); 2270 errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24)); 2271 errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25)); 2272 errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26)); 2273 2274 errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27)); 2275 errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28)); 2276 errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29)); 2277 errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30)); 2278 errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31)); 2279 } 2280 2281 XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags); 2282 XML_FreeContentModel(g_parser, model); 2283 } 2284 2285 START_TEST(test_dtd_elements_nesting) { 2286 // Payload inspired by a test in Perl's XML::Parser 2287 const char *text = "<!DOCTYPE foo [\n" 2288 "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n" 2289 "]>\n" 2290 "<foo/>"; 2291 2292 XML_SetUserData(g_parser, (void *)(uintptr_t)-1); 2293 2294 XML_SetElementDeclHandler(g_parser, element_decl_check_model); 2295 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2296 == XML_STATUS_ERROR) 2297 xml_failure(g_parser); 2298 2299 if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0) 2300 fail("Element declaration model regression detected"); 2301 } 2302 END_TEST 2303 2304 /* Test foreign DTD handling */ 2305 START_TEST(test_set_foreign_dtd) { 2306 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"; 2307 const char *text2 = "<doc>&entity;</doc>"; 2308 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2309 2310 /* Check hash salt is passed through too */ 2311 XML_SetHashSalt(g_parser, 0x12345678); 2312 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2313 XML_SetUserData(g_parser, &test_data); 2314 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2315 /* Add a default handler to exercise more code paths */ 2316 XML_SetDefaultHandler(g_parser, dummy_default_handler); 2317 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2318 fail("Could not set foreign DTD"); 2319 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2320 == XML_STATUS_ERROR) 2321 xml_failure(g_parser); 2322 2323 /* Ensure that trying to set the DTD after parsing has started 2324 * is faulted, even if it's the same setting. 2325 */ 2326 if (XML_UseForeignDTD(g_parser, XML_TRUE) 2327 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) 2328 fail("Failed to reject late foreign DTD setting"); 2329 /* Ditto for the hash salt */ 2330 if (XML_SetHashSalt(g_parser, 0x23456789)) 2331 fail("Failed to reject late hash salt change"); 2332 2333 /* Now finish the parse */ 2334 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2335 == XML_STATUS_ERROR) 2336 xml_failure(g_parser); 2337 } 2338 END_TEST 2339 2340 /* Test foreign DTD handling with a failing NotStandalone handler */ 2341 START_TEST(test_foreign_dtd_not_standalone) { 2342 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2343 "<doc>&entity;</doc>"; 2344 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2345 2346 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2347 XML_SetUserData(g_parser, &test_data); 2348 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2349 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 2350 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2351 fail("Could not set foreign DTD"); 2352 expect_failure(text, XML_ERROR_NOT_STANDALONE, 2353 "NotStandalonehandler failed to reject"); 2354 } 2355 END_TEST 2356 2357 /* Test invalid character in a foreign DTD is faulted */ 2358 START_TEST(test_invalid_foreign_dtd) { 2359 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2360 "<doc>&entity;</doc>"; 2361 ExtFaults test_data 2362 = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN}; 2363 2364 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2365 XML_SetUserData(g_parser, &test_data); 2366 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 2367 XML_UseForeignDTD(g_parser, XML_TRUE); 2368 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 2369 "Bad DTD should not have been accepted"); 2370 } 2371 END_TEST 2372 2373 /* Test foreign DTD use with a doctype */ 2374 START_TEST(test_foreign_dtd_with_doctype) { 2375 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n" 2376 "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n"; 2377 const char *text2 = "<doc>&entity;</doc>"; 2378 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2379 2380 /* Check hash salt is passed through too */ 2381 XML_SetHashSalt(g_parser, 0x12345678); 2382 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2383 XML_SetUserData(g_parser, &test_data); 2384 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2385 /* Add a default handler to exercise more code paths */ 2386 XML_SetDefaultHandler(g_parser, dummy_default_handler); 2387 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2388 fail("Could not set foreign DTD"); 2389 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2390 == XML_STATUS_ERROR) 2391 xml_failure(g_parser); 2392 2393 /* Ensure that trying to set the DTD after parsing has started 2394 * is faulted, even if it's the same setting. 2395 */ 2396 if (XML_UseForeignDTD(g_parser, XML_TRUE) 2397 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) 2398 fail("Failed to reject late foreign DTD setting"); 2399 /* Ditto for the hash salt */ 2400 if (XML_SetHashSalt(g_parser, 0x23456789)) 2401 fail("Failed to reject late hash salt change"); 2402 2403 /* Now finish the parse */ 2404 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2405 == XML_STATUS_ERROR) 2406 xml_failure(g_parser); 2407 } 2408 END_TEST 2409 2410 /* Test XML_UseForeignDTD with no external subset present */ 2411 START_TEST(test_foreign_dtd_without_external_subset) { 2412 const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n" 2413 "<doc>&foo;</doc>"; 2414 2415 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2416 XML_SetUserData(g_parser, NULL); 2417 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); 2418 XML_UseForeignDTD(g_parser, XML_TRUE); 2419 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2420 == XML_STATUS_ERROR) 2421 xml_failure(g_parser); 2422 } 2423 END_TEST 2424 2425 START_TEST(test_empty_foreign_dtd) { 2426 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2427 "<doc>&entity;</doc>"; 2428 2429 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2430 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); 2431 XML_UseForeignDTD(g_parser, XML_TRUE); 2432 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 2433 "Undefined entity not faulted"); 2434 } 2435 END_TEST 2436 2437 /* Test XML Base is set and unset appropriately */ 2438 START_TEST(test_set_base) { 2439 const XML_Char *old_base; 2440 const XML_Char *new_base = XCS("/local/file/name.xml"); 2441 2442 old_base = XML_GetBase(g_parser); 2443 if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK) 2444 fail("Unable to set base"); 2445 if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0) 2446 fail("Base setting not correct"); 2447 if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK) 2448 fail("Unable to NULL base"); 2449 if (XML_GetBase(g_parser) != NULL) 2450 fail("Base setting not nulled"); 2451 XML_SetBase(g_parser, old_base); 2452 } 2453 END_TEST 2454 2455 /* Test attribute counts, indexing, etc */ 2456 START_TEST(test_attributes) { 2457 const char *text = "<!DOCTYPE doc [\n" 2458 "<!ELEMENT doc (tag)>\n" 2459 "<!ATTLIST doc id ID #REQUIRED>\n" 2460 "]>" 2461 "<doc a='1' id='one' b='2'>" 2462 "<tag c='3'/>" 2463 "</doc>"; 2464 AttrInfo doc_info[] = {{XCS("a"), XCS("1")}, 2465 {XCS("b"), XCS("2")}, 2466 {XCS("id"), XCS("one")}, 2467 {NULL, NULL}}; 2468 AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}}; 2469 ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL}, 2470 {XCS("tag"), 1, NULL, NULL}, 2471 {NULL, 0, NULL, NULL}}; 2472 info[0].attributes = doc_info; 2473 info[1].attributes = tag_info; 2474 2475 XML_Parser parser = XML_ParserCreate(NULL); 2476 assert_true(parser != NULL); 2477 ParserAndElementInfo parserAndElementInfos = { 2478 parser, 2479 info, 2480 }; 2481 2482 XML_SetStartElementHandler(parser, counting_start_element_handler); 2483 XML_SetUserData(parser, &parserAndElementInfos); 2484 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2485 == XML_STATUS_ERROR) 2486 xml_failure(parser); 2487 2488 XML_ParserFree(parser); 2489 } 2490 END_TEST 2491 2492 /* Test reset works correctly in the middle of processing an internal 2493 * entity. Exercises some obscure code in XML_ParserReset(). 2494 */ 2495 START_TEST(test_reset_in_entity) { 2496 if (g_chunkSize != 0) { 2497 // this test does not use SINGLE_BYTES, because of suspension 2498 return; 2499 } 2500 2501 const char *text = "<!DOCTYPE doc [\n" 2502 "<!ENTITY wombat 'wom'>\n" 2503 "<!ENTITY entity 'hi &wom; there'>\n" 2504 "]>\n" 2505 "<doc>&entity;</doc>"; 2506 XML_ParsingStatus status; 2507 2508 g_resumable = XML_TRUE; 2509 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2510 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 2511 // we won't know exactly how much input we actually managed to give Expat. 2512 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2513 == XML_STATUS_ERROR) 2514 xml_failure(g_parser); 2515 XML_GetParsingStatus(g_parser, &status); 2516 if (status.parsing != XML_SUSPENDED) 2517 fail("Parsing status not SUSPENDED"); 2518 XML_ParserReset(g_parser, NULL); 2519 XML_GetParsingStatus(g_parser, &status); 2520 if (status.parsing != XML_INITIALIZED) 2521 fail("Parsing status doesn't reset to INITIALIZED"); 2522 } 2523 END_TEST 2524 2525 /* Test that resume correctly passes through parse errors */ 2526 START_TEST(test_resume_invalid_parse) { 2527 const char *text = "<doc>Hello</doc"; /* Missing closing wedge */ 2528 2529 g_resumable = XML_TRUE; 2530 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2531 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2532 == XML_STATUS_ERROR) 2533 xml_failure(g_parser); 2534 if (XML_ResumeParser(g_parser) == XML_STATUS_OK) 2535 fail("Resumed invalid parse not faulted"); 2536 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN) 2537 fail("Invalid parse not correctly faulted"); 2538 } 2539 END_TEST 2540 2541 /* Test that re-suspended parses are correctly passed through */ 2542 START_TEST(test_resume_resuspended) { 2543 const char *text = "<doc>Hello<meep/>world</doc>"; 2544 2545 g_resumable = XML_TRUE; 2546 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2547 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2548 == XML_STATUS_ERROR) 2549 xml_failure(g_parser); 2550 g_resumable = XML_TRUE; 2551 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2552 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED) 2553 fail("Resumption not suspended"); 2554 /* This one should succeed and finish up */ 2555 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 2556 xml_failure(g_parser); 2557 } 2558 END_TEST 2559 2560 /* Test that CDATA shows up correctly through a default handler */ 2561 START_TEST(test_cdata_default) { 2562 const char *text = "<doc><![CDATA[Hello\nworld]]></doc>"; 2563 const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>"); 2564 CharData storage; 2565 2566 CharData_Init(&storage); 2567 XML_SetUserData(g_parser, &storage); 2568 XML_SetDefaultHandler(g_parser, accumulate_characters); 2569 2570 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2571 == XML_STATUS_ERROR) 2572 xml_failure(g_parser); 2573 CharData_CheckXMLChars(&storage, expected); 2574 } 2575 END_TEST 2576 2577 /* Test resetting a subordinate parser does exactly nothing */ 2578 START_TEST(test_subordinate_reset) { 2579 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2580 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2581 "<doc>&entity;</doc>"; 2582 2583 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2584 XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter); 2585 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2586 == XML_STATUS_ERROR) 2587 xml_failure(g_parser); 2588 } 2589 END_TEST 2590 2591 /* Test suspending a subordinate parser */ 2592 START_TEST(test_subordinate_suspend) { 2593 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2594 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2595 "<doc>&entity;</doc>"; 2596 2597 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2598 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender); 2599 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2600 == XML_STATUS_ERROR) 2601 xml_failure(g_parser); 2602 } 2603 END_TEST 2604 2605 /* Test suspending a subordinate parser from an XML declaration */ 2606 /* Increases code coverage of the tests */ 2607 2608 START_TEST(test_subordinate_xdecl_suspend) { 2609 const char *text 2610 = "<!DOCTYPE doc [\n" 2611 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n" 2612 "]>\n" 2613 "<doc>&entity;</doc>"; 2614 2615 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2616 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl); 2617 g_resumable = XML_TRUE; 2618 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2619 == XML_STATUS_ERROR) 2620 xml_failure(g_parser); 2621 } 2622 END_TEST 2623 2624 START_TEST(test_subordinate_xdecl_abort) { 2625 const char *text 2626 = "<!DOCTYPE doc [\n" 2627 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n" 2628 "]>\n" 2629 "<doc>&entity;</doc>"; 2630 2631 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2632 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl); 2633 g_resumable = XML_FALSE; 2634 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2635 == XML_STATUS_ERROR) 2636 xml_failure(g_parser); 2637 } 2638 END_TEST 2639 2640 /* Test external entity fault handling with suspension */ 2641 START_TEST(test_ext_entity_invalid_suspended_parse) { 2642 const char *text = "<!DOCTYPE doc [\n" 2643 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2644 "]>\n" 2645 "<doc>&en;</doc>"; 2646 ExtFaults faults[] 2647 = {{"<?xml version='1.0' encoding='us-ascii'?><", 2648 "Incomplete element declaration not faulted", NULL, 2649 XML_ERROR_UNCLOSED_TOKEN}, 2650 {/* First two bytes of a three-byte char */ 2651 "<?xml version='1.0' encoding='utf-8'?>\xe2\x82", 2652 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, 2653 {NULL, NULL, NULL, XML_ERROR_NONE}}; 2654 ExtFaults *fault; 2655 2656 for (fault = &faults[0]; fault->parse_text != NULL; fault++) { 2657 set_subtest("%s", fault->parse_text); 2658 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2659 XML_SetExternalEntityRefHandler(g_parser, 2660 external_entity_suspending_faulter); 2661 XML_SetUserData(g_parser, fault); 2662 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 2663 "Parser did not report external entity error"); 2664 XML_ParserReset(g_parser, NULL); 2665 } 2666 } 2667 END_TEST 2668 2669 /* Test setting an explicit encoding */ 2670 START_TEST(test_explicit_encoding) { 2671 const char *text1 = "<doc>Hello "; 2672 const char *text2 = " World</doc>"; 2673 2674 /* Just check that we can set the encoding to NULL before starting */ 2675 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK) 2676 fail("Failed to initialise encoding to NULL"); 2677 /* Say we are UTF-8 */ 2678 if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK) 2679 fail("Failed to set explicit encoding"); 2680 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2681 == XML_STATUS_ERROR) 2682 xml_failure(g_parser); 2683 /* Try to switch encodings mid-parse */ 2684 if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR) 2685 fail("Allowed encoding change"); 2686 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2687 == XML_STATUS_ERROR) 2688 xml_failure(g_parser); 2689 /* Try now the parse is over */ 2690 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK) 2691 fail("Failed to unset encoding"); 2692 } 2693 END_TEST 2694 2695 /* Test handling of trailing CR (rather than newline) */ 2696 START_TEST(test_trailing_cr) { 2697 const char *text = "<doc>\r"; 2698 int found_cr; 2699 2700 /* Try with a character handler, for code coverage */ 2701 XML_SetCharacterDataHandler(g_parser, cr_cdata_handler); 2702 XML_SetUserData(g_parser, &found_cr); 2703 found_cr = 0; 2704 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2705 == XML_STATUS_OK) 2706 fail("Failed to fault unclosed doc"); 2707 if (found_cr == 0) 2708 fail("Did not catch the carriage return"); 2709 XML_ParserReset(g_parser, NULL); 2710 2711 /* Now with a default handler instead */ 2712 XML_SetDefaultHandler(g_parser, cr_cdata_handler); 2713 XML_SetUserData(g_parser, &found_cr); 2714 found_cr = 0; 2715 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2716 == XML_STATUS_OK) 2717 fail("Failed to fault unclosed doc"); 2718 if (found_cr == 0) 2719 fail("Did not catch default carriage return"); 2720 } 2721 END_TEST 2722 2723 /* Test trailing CR in an external entity parse */ 2724 START_TEST(test_ext_entity_trailing_cr) { 2725 const char *text = "<!DOCTYPE doc [\n" 2726 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2727 "]>\n" 2728 "<doc>&en;</doc>"; 2729 int found_cr; 2730 2731 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2732 XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher); 2733 XML_SetUserData(g_parser, &found_cr); 2734 found_cr = 0; 2735 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2736 != XML_STATUS_OK) 2737 xml_failure(g_parser); 2738 if (found_cr == 0) 2739 fail("No carriage return found"); 2740 XML_ParserReset(g_parser, NULL); 2741 2742 /* Try again with a different trailing CR */ 2743 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2744 XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher); 2745 XML_SetUserData(g_parser, &found_cr); 2746 found_cr = 0; 2747 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2748 != XML_STATUS_OK) 2749 xml_failure(g_parser); 2750 if (found_cr == 0) 2751 fail("No carriage return found"); 2752 } 2753 END_TEST 2754 2755 /* Test handling of trailing square bracket */ 2756 START_TEST(test_trailing_rsqb) { 2757 const char *text8 = "<doc>]"; 2758 const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000"; 2759 int found_rsqb; 2760 int text8_len = (int)strlen(text8); 2761 2762 XML_SetCharacterDataHandler(g_parser, rsqb_handler); 2763 XML_SetUserData(g_parser, &found_rsqb); 2764 found_rsqb = 0; 2765 if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE) 2766 == XML_STATUS_OK) 2767 fail("Failed to fault unclosed doc"); 2768 if (found_rsqb == 0) 2769 fail("Did not catch the right square bracket"); 2770 2771 /* Try again with a different encoding */ 2772 XML_ParserReset(g_parser, NULL); 2773 XML_SetCharacterDataHandler(g_parser, rsqb_handler); 2774 XML_SetUserData(g_parser, &found_rsqb); 2775 found_rsqb = 0; 2776 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1, 2777 XML_TRUE) 2778 == XML_STATUS_OK) 2779 fail("Failed to fault unclosed doc"); 2780 if (found_rsqb == 0) 2781 fail("Did not catch the right square bracket"); 2782 2783 /* And finally with a default handler */ 2784 XML_ParserReset(g_parser, NULL); 2785 XML_SetDefaultHandler(g_parser, rsqb_handler); 2786 XML_SetUserData(g_parser, &found_rsqb); 2787 found_rsqb = 0; 2788 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1, 2789 XML_TRUE) 2790 == XML_STATUS_OK) 2791 fail("Failed to fault unclosed doc"); 2792 if (found_rsqb == 0) 2793 fail("Did not catch the right square bracket"); 2794 } 2795 END_TEST 2796 2797 /* Test trailing right square bracket in an external entity parse */ 2798 START_TEST(test_ext_entity_trailing_rsqb) { 2799 const char *text = "<!DOCTYPE doc [\n" 2800 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2801 "]>\n" 2802 "<doc>&en;</doc>"; 2803 int found_rsqb; 2804 2805 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2806 XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher); 2807 XML_SetUserData(g_parser, &found_rsqb); 2808 found_rsqb = 0; 2809 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2810 != XML_STATUS_OK) 2811 xml_failure(g_parser); 2812 if (found_rsqb == 0) 2813 fail("No right square bracket found"); 2814 } 2815 END_TEST 2816 2817 /* Test CDATA handling in an external entity */ 2818 START_TEST(test_ext_entity_good_cdata) { 2819 const char *text = "<!DOCTYPE doc [\n" 2820 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2821 "]>\n" 2822 "<doc>&en;</doc>"; 2823 2824 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2825 XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii); 2826 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2827 != XML_STATUS_OK) 2828 xml_failure(g_parser); 2829 } 2830 END_TEST 2831 2832 /* Test user parameter settings */ 2833 START_TEST(test_user_parameters) { 2834 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2835 "<!-- Primary parse -->\n" 2836 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2837 "<doc>&entity;"; 2838 const char *epilog = "<!-- Back to primary parser -->\n" 2839 "</doc>"; 2840 2841 g_comment_count = 0; 2842 g_skip_count = 0; 2843 g_xdecl_count = 0; 2844 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2845 XML_SetXmlDeclHandler(g_parser, xml_decl_handler); 2846 XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker); 2847 XML_SetCommentHandler(g_parser, data_check_comment_handler); 2848 XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler); 2849 XML_UseParserAsHandlerArg(g_parser); 2850 XML_SetUserData(g_parser, (void *)1); 2851 g_handler_data = g_parser; 2852 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 2853 == XML_STATUS_ERROR) 2854 xml_failure(g_parser); 2855 /* Ensure we can't change policy mid-parse */ 2856 if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER)) 2857 fail("Changed param entity parsing policy while parsing"); 2858 if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE) 2859 == XML_STATUS_ERROR) 2860 xml_failure(g_parser); 2861 if (g_comment_count != 3) 2862 fail("Comment handler not invoked enough times"); 2863 if (g_skip_count != 1) 2864 fail("Skip handler not invoked enough times"); 2865 if (g_xdecl_count != 1) 2866 fail("XML declaration handler not invoked"); 2867 } 2868 END_TEST 2869 2870 /* Test that an explicit external entity handler argument replaces 2871 * the parser as the first argument. 2872 * 2873 * We do not call the first parameter to the external entity handler 2874 * 'parser' for once, since the first time the handler is called it 2875 * will actually be a text string. We need to be able to access the 2876 * global 'parser' variable to create our external entity parser from, 2877 * since there are code paths we need to ensure get executed. 2878 */ 2879 START_TEST(test_ext_entity_ref_parameter) { 2880 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2881 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2882 "<doc>&entity;</doc>"; 2883 2884 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2885 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker); 2886 /* Set a handler arg that is not NULL and not parser (which is 2887 * what NULL would cause to be passed. 2888 */ 2889 XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text); 2890 g_handler_data = text; 2891 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2892 == XML_STATUS_ERROR) 2893 xml_failure(g_parser); 2894 2895 /* Now try again with unset args */ 2896 XML_ParserReset(g_parser, NULL); 2897 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2898 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker); 2899 XML_SetExternalEntityRefHandlerArg(g_parser, NULL); 2900 g_handler_data = g_parser; 2901 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2902 == XML_STATUS_ERROR) 2903 xml_failure(g_parser); 2904 } 2905 END_TEST 2906 2907 /* Test the parsing of an empty string */ 2908 START_TEST(test_empty_parse) { 2909 const char *text = "<doc></doc>"; 2910 const char *partial = "<doc>"; 2911 2912 if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR) 2913 fail("Parsing empty string faulted"); 2914 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 2915 fail("Parsing final empty string not faulted"); 2916 if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS) 2917 fail("Parsing final empty string faulted for wrong reason"); 2918 2919 /* Now try with valid text before the empty end */ 2920 XML_ParserReset(g_parser, NULL); 2921 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 2922 == XML_STATUS_ERROR) 2923 xml_failure(g_parser); 2924 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR) 2925 fail("Parsing final empty string faulted"); 2926 2927 /* Now try with invalid text before the empty end */ 2928 XML_ParserReset(g_parser, NULL); 2929 if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial), 2930 XML_FALSE) 2931 == XML_STATUS_ERROR) 2932 xml_failure(g_parser); 2933 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 2934 fail("Parsing final incomplete empty string not faulted"); 2935 } 2936 END_TEST 2937 2938 /* Test XML_Parse for len < 0 */ 2939 START_TEST(test_negative_len_parse) { 2940 const char *const doc = "<root/>"; 2941 for (int isFinal = 0; isFinal < 2; isFinal++) { 2942 set_subtest("isFinal=%d", isFinal); 2943 2944 XML_Parser parser = XML_ParserCreate(NULL); 2945 2946 if (XML_GetErrorCode(parser) != XML_ERROR_NONE) 2947 fail("There was not supposed to be any initial parse error."); 2948 2949 const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal); 2950 2951 if (status != XML_STATUS_ERROR) 2952 fail("Negative len was expected to fail the parse but did not."); 2953 2954 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT) 2955 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT."); 2956 2957 XML_ParserFree(parser); 2958 } 2959 } 2960 END_TEST 2961 2962 /* Test XML_ParseBuffer for len < 0 */ 2963 START_TEST(test_negative_len_parse_buffer) { 2964 const char *const doc = "<root/>"; 2965 for (int isFinal = 0; isFinal < 2; isFinal++) { 2966 set_subtest("isFinal=%d", isFinal); 2967 2968 XML_Parser parser = XML_ParserCreate(NULL); 2969 2970 if (XML_GetErrorCode(parser) != XML_ERROR_NONE) 2971 fail("There was not supposed to be any initial parse error."); 2972 2973 void *const buffer = XML_GetBuffer(parser, (int)strlen(doc)); 2974 2975 if (buffer == NULL) 2976 fail("XML_GetBuffer failed."); 2977 2978 memcpy(buffer, doc, strlen(doc)); 2979 2980 const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal); 2981 2982 if (status != XML_STATUS_ERROR) 2983 fail("Negative len was expected to fail the parse but did not."); 2984 2985 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT) 2986 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT."); 2987 2988 XML_ParserFree(parser); 2989 } 2990 } 2991 END_TEST 2992 2993 /* Test odd corners of the XML_GetBuffer interface */ 2994 static enum XML_Status 2995 get_feature(enum XML_FeatureEnum feature_id, long *presult) { 2996 const XML_Feature *feature = XML_GetFeatureList(); 2997 2998 if (feature == NULL) 2999 return XML_STATUS_ERROR; 3000 for (; feature->feature != XML_FEATURE_END; feature++) { 3001 if (feature->feature == feature_id) { 3002 *presult = feature->value; 3003 return XML_STATUS_OK; 3004 } 3005 } 3006 return XML_STATUS_ERROR; 3007 } 3008 3009 /* Test odd corners of the XML_GetBuffer interface */ 3010 START_TEST(test_get_buffer_1) { 3011 const char *text = get_buffer_test_text; 3012 void *buffer; 3013 long context_bytes; 3014 3015 /* Attempt to allocate a negative length buffer */ 3016 if (XML_GetBuffer(g_parser, -12) != NULL) 3017 fail("Negative length buffer not failed"); 3018 3019 /* Now get a small buffer and extend it past valid length */ 3020 buffer = XML_GetBuffer(g_parser, 1536); 3021 if (buffer == NULL) 3022 fail("1.5K buffer failed"); 3023 assert(buffer != NULL); 3024 memcpy(buffer, text, strlen(text)); 3025 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE) 3026 == XML_STATUS_ERROR) 3027 xml_failure(g_parser); 3028 if (XML_GetBuffer(g_parser, INT_MAX) != NULL) 3029 fail("INT_MAX buffer not failed"); 3030 3031 /* Now try extending it a more reasonable but still too large 3032 * amount. The allocator in XML_GetBuffer() doubles the buffer 3033 * size until it exceeds the requested amount or INT_MAX. If it 3034 * exceeds INT_MAX, it rejects the request, so we want a request 3035 * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable, 3036 * with an extra byte just to ensure that the request is off any 3037 * boundary. The request will be inflated internally by 3038 * XML_CONTEXT_BYTES (if >=1), so we subtract that from our 3039 * request. 3040 */ 3041 if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK) 3042 context_bytes = 0; 3043 if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL) 3044 fail("INT_MAX- buffer not failed"); 3045 3046 /* Now try extending it a carefully crafted amount */ 3047 if (XML_GetBuffer(g_parser, 1000) == NULL) 3048 fail("1000 buffer failed"); 3049 } 3050 END_TEST 3051 3052 /* Test more corners of the XML_GetBuffer interface */ 3053 START_TEST(test_get_buffer_2) { 3054 const char *text = get_buffer_test_text; 3055 void *buffer; 3056 3057 /* Now get a decent buffer */ 3058 buffer = XML_GetBuffer(g_parser, 1536); 3059 if (buffer == NULL) 3060 fail("1.5K buffer failed"); 3061 assert(buffer != NULL); 3062 memcpy(buffer, text, strlen(text)); 3063 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE) 3064 == XML_STATUS_ERROR) 3065 xml_failure(g_parser); 3066 3067 /* Extend it, to catch a different code path */ 3068 if (XML_GetBuffer(g_parser, 1024) == NULL) 3069 fail("1024 buffer failed"); 3070 } 3071 END_TEST 3072 3073 /* Test for signed integer overflow CVE-2022-23852 */ 3074 #if XML_CONTEXT_BYTES > 0 3075 START_TEST(test_get_buffer_3_overflow) { 3076 XML_Parser parser = XML_ParserCreate(NULL); 3077 assert(parser != NULL); 3078 3079 const char *const text = "\n"; 3080 const int expectedKeepValue = (int)strlen(text); 3081 3082 // After this call, variable "keep" in XML_GetBuffer will 3083 // have value expectedKeepValue 3084 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), 3085 XML_FALSE /* isFinal */) 3086 == XML_STATUS_ERROR) 3087 xml_failure(parser); 3088 3089 assert(expectedKeepValue > 0); 3090 if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL) 3091 fail("enlarging buffer not failed"); 3092 3093 XML_ParserFree(parser); 3094 } 3095 END_TEST 3096 #endif // XML_CONTEXT_BYTES > 0 3097 3098 START_TEST(test_buffer_can_grow_to_max) { 3099 const char *const prefixes[] = { 3100 "", 3101 "<", 3102 "<x a='", 3103 "<doc><x a='", 3104 "<document><x a='", 3105 "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand" 3106 "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif" 3107 "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin" 3108 "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping" 3109 "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"}; 3110 const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]); 3111 int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow 3112 #if defined(__MINGW32__) && ! defined(__MINGW64__) 3113 // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB 3114 // Can we make a big allocation? 3115 for (int i = 1; i <= 2; i++) { 3116 void *const big = malloc(maxbuf); 3117 if (big != NULL) { 3118 free(big); 3119 break; 3120 } 3121 // The big allocation failed. Let's be a little lenient. 3122 maxbuf = maxbuf / 2; 3123 fprintf(stderr, "Reducing maxbuf to %d...\n", maxbuf); 3124 } 3125 #endif 3126 3127 for (int i = 0; i < num_prefixes; ++i) { 3128 set_subtest("\"%s\"", prefixes[i]); 3129 XML_Parser parser = XML_ParserCreate(NULL); 3130 #if XML_GE == 1 3131 assert_true(XML_SetAllocTrackerActivationThreshold(parser, (size_t)-1) 3132 == XML_TRUE); // i.e. deactivate 3133 #endif 3134 const int prefix_len = (int)strlen(prefixes[i]); 3135 const enum XML_Status s 3136 = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE); 3137 if (s != XML_STATUS_OK) 3138 xml_failure(parser); 3139 3140 // XML_CONTEXT_BYTES of the prefix may remain in the buffer; 3141 // subtracting the whole prefix is easiest, and close enough. 3142 assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL); 3143 // The limit should be consistent; no prefix should allow us to 3144 // reach above the max buffer size. 3145 assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL); 3146 XML_ParserFree(parser); 3147 } 3148 } 3149 END_TEST 3150 3151 START_TEST(test_getbuffer_allocates_on_zero_len) { 3152 for (int first_len = 1; first_len >= 0; first_len--) { 3153 set_subtest("with len=%d first", first_len); 3154 XML_Parser parser = XML_ParserCreate(NULL); 3155 assert_true(parser != NULL); 3156 assert_true(XML_GetBuffer(parser, first_len) != NULL); 3157 assert_true(XML_GetBuffer(parser, 0) != NULL); 3158 if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK) 3159 xml_failure(parser); 3160 XML_ParserFree(parser); 3161 } 3162 } 3163 END_TEST 3164 3165 /* Test position information macros */ 3166 START_TEST(test_byte_info_at_end) { 3167 const char *text = "<doc></doc>"; 3168 3169 if (XML_GetCurrentByteIndex(g_parser) != -1 3170 || XML_GetCurrentByteCount(g_parser) != 0) 3171 fail("Byte index/count incorrect at start of parse"); 3172 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3173 == XML_STATUS_ERROR) 3174 xml_failure(g_parser); 3175 /* At end, the count will be zero and the index the end of string */ 3176 if (XML_GetCurrentByteCount(g_parser) != 0) 3177 fail("Terminal byte count incorrect"); 3178 if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text)) 3179 fail("Terminal byte index incorrect"); 3180 } 3181 END_TEST 3182 3183 /* Test position information from errors */ 3184 #define PRE_ERROR_STR "<doc></" 3185 #define POST_ERROR_STR "wombat></doc>" 3186 START_TEST(test_byte_info_at_error) { 3187 const char *text = PRE_ERROR_STR POST_ERROR_STR; 3188 3189 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3190 == XML_STATUS_OK) 3191 fail("Syntax error not faulted"); 3192 if (XML_GetCurrentByteCount(g_parser) != 0) 3193 fail("Error byte count incorrect"); 3194 if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR)) 3195 fail("Error byte index incorrect"); 3196 } 3197 END_TEST 3198 #undef PRE_ERROR_STR 3199 #undef POST_ERROR_STR 3200 3201 /* Test position information in handler */ 3202 #define START_ELEMENT "<e>" 3203 #define CDATA_TEXT "Hello" 3204 #define END_ELEMENT "</e>" 3205 START_TEST(test_byte_info_at_cdata) { 3206 const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT; 3207 int offset, size; 3208 ByteTestData data; 3209 3210 /* Check initial context is empty */ 3211 if (XML_GetInputContext(g_parser, &offset, &size) != NULL) 3212 fail("Unexpected context at start of parse"); 3213 3214 data.start_element_len = (int)strlen(START_ELEMENT); 3215 data.cdata_len = (int)strlen(CDATA_TEXT); 3216 data.total_string_len = (int)strlen(text); 3217 XML_SetCharacterDataHandler(g_parser, byte_character_handler); 3218 XML_SetUserData(g_parser, &data); 3219 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) 3220 xml_failure(g_parser); 3221 } 3222 END_TEST 3223 #undef START_ELEMENT 3224 #undef CDATA_TEXT 3225 #undef END_ELEMENT 3226 3227 /* Test predefined entities are correctly recognised */ 3228 START_TEST(test_predefined_entities) { 3229 const char *text = "<doc><>&"'</doc>"; 3230 const XML_Char *expected = XCS("<doc><>&"'</doc>"); 3231 const XML_Char *result = XCS("<>&\"'"); 3232 CharData storage; 3233 3234 XML_SetDefaultHandler(g_parser, accumulate_characters); 3235 /* run_character_check uses XML_SetCharacterDataHandler(), which 3236 * unfortunately heads off a code path that we need to exercise. 3237 */ 3238 CharData_Init(&storage); 3239 XML_SetUserData(g_parser, &storage); 3240 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3241 == XML_STATUS_ERROR) 3242 xml_failure(g_parser); 3243 /* The default handler doesn't translate the entities */ 3244 CharData_CheckXMLChars(&storage, expected); 3245 3246 /* Now try again and check the translation */ 3247 XML_ParserReset(g_parser, NULL); 3248 run_character_check(text, result); 3249 } 3250 END_TEST 3251 3252 /* Regression test that an invalid tag in an external parameter 3253 * reference in an external DTD is correctly faulted. 3254 * 3255 * Only a few specific tags are legal in DTDs ignoring comments and 3256 * processing instructions, all of which begin with an exclamation 3257 * mark. "<el/>" is not one of them, so the parser should raise an 3258 * error on encountering it. 3259 */ 3260 START_TEST(test_invalid_tag_in_dtd) { 3261 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3262 "<doc></doc>\n"; 3263 3264 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3265 XML_SetExternalEntityRefHandler(g_parser, external_entity_param); 3266 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3267 "Invalid tag IN DTD external param not rejected"); 3268 } 3269 END_TEST 3270 3271 /* Test entities not quite the predefined ones are not mis-recognised */ 3272 START_TEST(test_not_predefined_entities) { 3273 const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>", 3274 "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL}; 3275 int i = 0; 3276 3277 while (text[i] != NULL) { 3278 expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY, 3279 "Undefined entity not rejected"); 3280 XML_ParserReset(g_parser, NULL); 3281 i++; 3282 } 3283 } 3284 END_TEST 3285 3286 /* Test conditional inclusion (IGNORE) */ 3287 START_TEST(test_ignore_section) { 3288 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3289 "<doc><e>&entity;</e></doc>"; 3290 const XML_Char *expected 3291 = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;"); 3292 CharData storage; 3293 3294 CharData_Init(&storage); 3295 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3296 XML_SetUserData(g_parser, &storage); 3297 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore); 3298 XML_SetDefaultHandler(g_parser, accumulate_characters); 3299 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3300 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3301 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3302 XML_SetStartElementHandler(g_parser, dummy_start_element); 3303 XML_SetEndElementHandler(g_parser, dummy_end_element); 3304 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3305 == XML_STATUS_ERROR) 3306 xml_failure(g_parser); 3307 CharData_CheckXMLChars(&storage, expected); 3308 } 3309 END_TEST 3310 3311 START_TEST(test_ignore_section_utf16) { 3312 const char text[] = 3313 /* <!DOCTYPE d SYSTEM 's'> */ 3314 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " 3315 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0" 3316 /* <d><e>&en;</e></d> */ 3317 "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0"; 3318 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;"); 3319 CharData storage; 3320 3321 CharData_Init(&storage); 3322 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3323 XML_SetUserData(g_parser, &storage); 3324 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16); 3325 XML_SetDefaultHandler(g_parser, accumulate_characters); 3326 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3327 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3328 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3329 XML_SetStartElementHandler(g_parser, dummy_start_element); 3330 XML_SetEndElementHandler(g_parser, dummy_end_element); 3331 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 3332 == XML_STATUS_ERROR) 3333 xml_failure(g_parser); 3334 CharData_CheckXMLChars(&storage, expected); 3335 } 3336 END_TEST 3337 3338 START_TEST(test_ignore_section_utf16_be) { 3339 const char text[] = 3340 /* <!DOCTYPE d SYSTEM 's'> */ 3341 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " 3342 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n" 3343 /* <d><e>&en;</e></d> */ 3344 "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>"; 3345 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;"); 3346 CharData storage; 3347 3348 CharData_Init(&storage); 3349 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3350 XML_SetUserData(g_parser, &storage); 3351 XML_SetExternalEntityRefHandler(g_parser, 3352 external_entity_load_ignore_utf16_be); 3353 XML_SetDefaultHandler(g_parser, accumulate_characters); 3354 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3355 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3356 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3357 XML_SetStartElementHandler(g_parser, dummy_start_element); 3358 XML_SetEndElementHandler(g_parser, dummy_end_element); 3359 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 3360 == XML_STATUS_ERROR) 3361 xml_failure(g_parser); 3362 CharData_CheckXMLChars(&storage, expected); 3363 } 3364 END_TEST 3365 3366 /* Test mis-formatted conditional exclusion */ 3367 START_TEST(test_bad_ignore_section) { 3368 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3369 "<doc><e>&entity;</e></doc>"; 3370 ExtFaults faults[] 3371 = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL, 3372 XML_ERROR_SYNTAX}, 3373 {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL, 3374 XML_ERROR_INVALID_TOKEN}, 3375 {/* FIrst two bytes of a three-byte char */ 3376 "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL, 3377 XML_ERROR_PARTIAL_CHAR}, 3378 {NULL, NULL, NULL, XML_ERROR_NONE}}; 3379 ExtFaults *fault; 3380 3381 for (fault = &faults[0]; fault->parse_text != NULL; fault++) { 3382 set_subtest("%s", fault->parse_text); 3383 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3384 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 3385 XML_SetUserData(g_parser, fault); 3386 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3387 "Incomplete IGNORE section not failed"); 3388 XML_ParserReset(g_parser, NULL); 3389 } 3390 } 3391 END_TEST 3392 3393 struct bom_testdata { 3394 const char *external; 3395 int split; 3396 XML_Bool nested_callback_happened; 3397 }; 3398 3399 static int XMLCALL 3400 external_bom_checker(XML_Parser parser, const XML_Char *context, 3401 const XML_Char *base, const XML_Char *systemId, 3402 const XML_Char *publicId) { 3403 const char *text; 3404 UNUSED_P(base); 3405 UNUSED_P(systemId); 3406 UNUSED_P(publicId); 3407 3408 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); 3409 if (ext_parser == NULL) 3410 fail("Could not create external entity parser"); 3411 3412 if (! xcstrcmp(systemId, XCS("004-2.ent"))) { 3413 struct bom_testdata *const testdata 3414 = (struct bom_testdata *)XML_GetUserData(parser); 3415 const char *const external = testdata->external; 3416 const int split = testdata->split; 3417 testdata->nested_callback_happened = XML_TRUE; 3418 3419 if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE) 3420 != XML_STATUS_OK) { 3421 xml_failure(ext_parser); 3422 } 3423 text = external + split; // the parse below will continue where we left off. 3424 } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) { 3425 text = "<!ELEMENT doc EMPTY>\n" 3426 "<!ENTITY % e1 SYSTEM '004-2.ent'>\n" 3427 "<!ENTITY % e2 '%e1;'>\n"; 3428 } else { 3429 fail("unknown systemId"); 3430 } 3431 3432 if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) 3433 != XML_STATUS_OK) 3434 xml_failure(ext_parser); 3435 3436 XML_ParserFree(ext_parser); 3437 return XML_STATUS_OK; 3438 } 3439 3440 /* regression test: BOM should be consumed when followed by a partial token. */ 3441 START_TEST(test_external_bom_consumed) { 3442 const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3443 "<doc></doc>\n"; 3444 const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>"; 3445 const int len = (int)strlen(external); 3446 for (int split = 0; split <= len; ++split) { 3447 set_subtest("split at byte %d", split); 3448 3449 struct bom_testdata testdata; 3450 testdata.external = external; 3451 testdata.split = split; 3452 testdata.nested_callback_happened = XML_FALSE; 3453 3454 XML_Parser parser = XML_ParserCreate(NULL); 3455 if (parser == NULL) { 3456 fail("Couldn't create parser"); 3457 } 3458 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3459 XML_SetExternalEntityRefHandler(parser, external_bom_checker); 3460 XML_SetUserData(parser, &testdata); 3461 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 3462 == XML_STATUS_ERROR) 3463 xml_failure(parser); 3464 if (! testdata.nested_callback_happened) { 3465 fail("ref handler not called"); 3466 } 3467 XML_ParserFree(parser); 3468 } 3469 } 3470 END_TEST 3471 3472 /* Test recursive parsing */ 3473 START_TEST(test_external_entity_values) { 3474 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3475 "<doc></doc>\n"; 3476 ExtFaults data_004_2[] = { 3477 {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE}, 3478 {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL, 3479 XML_ERROR_INVALID_TOKEN}, 3480 {"'wombat", "Unterminated string not faulted", NULL, 3481 XML_ERROR_UNCLOSED_TOKEN}, 3482 {"\xe2\x82", "Partial UTF-8 character not faulted", NULL, 3483 XML_ERROR_PARTIAL_CHAR}, 3484 {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE}, 3485 {"<?xml?>", "Malformed XML declaration not faulted", NULL, 3486 XML_ERROR_XML_DECL}, 3487 {/* UTF-8 BOM */ 3488 "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, 3489 XML_ERROR_NONE}, 3490 {"<?xml version='1.0' encoding='utf-8'?>\n$", 3491 "Invalid token after text declaration not faulted", NULL, 3492 XML_ERROR_INVALID_TOKEN}, 3493 {"<?xml version='1.0' encoding='utf-8'?>\n'wombat", 3494 "Unterminated string after text decl not faulted", NULL, 3495 XML_ERROR_UNCLOSED_TOKEN}, 3496 {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82", 3497 "Partial UTF-8 character after text decl not faulted", NULL, 3498 XML_ERROR_PARTIAL_CHAR}, 3499 {"%e1;", "Recursive parameter entity not faulted", NULL, 3500 XML_ERROR_RECURSIVE_ENTITY_REF}, 3501 {NULL, NULL, NULL, XML_ERROR_NONE}}; 3502 int i; 3503 3504 for (i = 0; data_004_2[i].parse_text != NULL; i++) { 3505 set_subtest("%s", data_004_2[i].parse_text); 3506 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3507 XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer); 3508 XML_SetUserData(g_parser, &data_004_2[i]); 3509 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3510 == XML_STATUS_ERROR) 3511 xml_failure(g_parser); 3512 XML_ParserReset(g_parser, NULL); 3513 } 3514 } 3515 END_TEST 3516 3517 /* Test the recursive parse interacts with a not standalone handler */ 3518 START_TEST(test_ext_entity_not_standalone) { 3519 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3520 "<doc></doc>"; 3521 3522 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3523 XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone); 3524 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3525 "Standalone rejection not caught"); 3526 } 3527 END_TEST 3528 3529 START_TEST(test_ext_entity_value_abort) { 3530 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3531 "<doc></doc>\n"; 3532 3533 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3534 XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter); 3535 g_resumable = XML_FALSE; 3536 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3537 == XML_STATUS_ERROR) 3538 xml_failure(g_parser); 3539 } 3540 END_TEST 3541 3542 START_TEST(test_bad_public_doctype) { 3543 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n" 3544 "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n" 3545 "<doc></doc>"; 3546 3547 /* Setting a handler provokes a particular code path */ 3548 XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler, 3549 dummy_end_doctype_handler); 3550 expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed"); 3551 } 3552 END_TEST 3553 3554 /* Test based on ibm/valid/P32/ibm32v04.xml */ 3555 START_TEST(test_attribute_enum_value) { 3556 const char *text = "<?xml version='1.0' standalone='no'?>\n" 3557 "<!DOCTYPE animal SYSTEM 'test.dtd'>\n" 3558 "<animal>This is a \n <a/> \n\nyellow tiger</animal>"; 3559 ExtTest dtd_data 3560 = {"<!ELEMENT animal (#PCDATA|a)*>\n" 3561 "<!ELEMENT a EMPTY>\n" 3562 "<!ATTLIST animal xml:space (default|preserve) 'preserve'>", 3563 NULL, NULL}; 3564 const XML_Char *expected = XCS("This is a \n \n\nyellow tiger"); 3565 3566 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3567 XML_SetUserData(g_parser, &dtd_data); 3568 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3569 /* An attribute list handler provokes a different code path */ 3570 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 3571 run_ext_character_check(text, &dtd_data, expected); 3572 } 3573 END_TEST 3574 3575 /* Slightly bizarrely, the library seems to silently ignore entity 3576 * definitions for predefined entities, even when they are wrong. The 3577 * language of the XML 1.0 spec is somewhat unhelpful as to what ought 3578 * to happen, so this is currently treated as acceptable. 3579 */ 3580 START_TEST(test_predefined_entity_redefinition) { 3581 const char *text = "<!DOCTYPE doc [\n" 3582 "<!ENTITY apos 'foo'>\n" 3583 "]>\n" 3584 "<doc>'</doc>"; 3585 run_character_check(text, XCS("'")); 3586 } 3587 END_TEST 3588 3589 /* Test that the parser stops processing the DTD after an unresolved 3590 * parameter entity is encountered. 3591 */ 3592 START_TEST(test_dtd_stop_processing) { 3593 const char *text = "<!DOCTYPE doc [\n" 3594 "%foo;\n" 3595 "<!ENTITY bar 'bas'>\n" 3596 "]><doc/>"; 3597 3598 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 3599 init_dummy_handlers(); 3600 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3601 == XML_STATUS_ERROR) 3602 xml_failure(g_parser); 3603 if (get_dummy_handler_flags() != 0) 3604 fail("DTD processing still going after undefined PE"); 3605 } 3606 END_TEST 3607 3608 /* Test public notations with no system ID */ 3609 START_TEST(test_public_notation_no_sysid) { 3610 const char *text = "<!DOCTYPE doc [\n" 3611 "<!NOTATION note PUBLIC 'foo'>\n" 3612 "<!ELEMENT doc EMPTY>\n" 3613 "]>\n<doc/>"; 3614 3615 init_dummy_handlers(); 3616 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); 3617 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3618 == XML_STATUS_ERROR) 3619 xml_failure(g_parser); 3620 if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG) 3621 fail("Notation declaration handler not called"); 3622 } 3623 END_TEST 3624 3625 START_TEST(test_nested_groups) { 3626 const char *text 3627 = "<!DOCTYPE doc [\n" 3628 "<!ELEMENT doc " 3629 /* Sixteen elements per line */ 3630 "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?," 3631 "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?" 3632 "))))))))))))))))))))))))))))))))>\n" 3633 "<!ELEMENT e EMPTY>" 3634 "]>\n" 3635 "<doc><e/></doc>"; 3636 CharData storage; 3637 3638 CharData_Init(&storage); 3639 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3640 XML_SetStartElementHandler(g_parser, record_element_start_handler); 3641 XML_SetUserData(g_parser, &storage); 3642 init_dummy_handlers(); 3643 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3644 == XML_STATUS_ERROR) 3645 xml_failure(g_parser); 3646 CharData_CheckXMLChars(&storage, XCS("doce")); 3647 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG) 3648 fail("Element handler not fired"); 3649 } 3650 END_TEST 3651 3652 START_TEST(test_group_choice) { 3653 const char *text = "<!DOCTYPE doc [\n" 3654 "<!ELEMENT doc (a|b|c)+>\n" 3655 "<!ELEMENT a EMPTY>\n" 3656 "<!ELEMENT b (#PCDATA)>\n" 3657 "<!ELEMENT c ANY>\n" 3658 "]>\n" 3659 "<doc>\n" 3660 "<a/>\n" 3661 "<b attr='foo'>This is a foo</b>\n" 3662 "<c></c>\n" 3663 "</doc>\n"; 3664 3665 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3666 init_dummy_handlers(); 3667 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3668 == XML_STATUS_ERROR) 3669 xml_failure(g_parser); 3670 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG) 3671 fail("Element handler flag not raised"); 3672 } 3673 END_TEST 3674 3675 START_TEST(test_standalone_parameter_entity) { 3676 const char *text = "<?xml version='1.0' standalone='yes'?>\n" 3677 "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n" 3678 "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n" 3679 "%entity;\n" 3680 "]>\n" 3681 "<doc></doc>"; 3682 char dtd_data[] = "<!ENTITY % e1 'foo'>\n"; 3683 3684 XML_SetUserData(g_parser, dtd_data); 3685 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3686 XML_SetExternalEntityRefHandler(g_parser, external_entity_public); 3687 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3688 == XML_STATUS_ERROR) 3689 xml_failure(g_parser); 3690 } 3691 END_TEST 3692 3693 /* Test skipping of parameter entity in an external DTD */ 3694 /* Derived from ibm/invalid/P69/ibm69i01.xml */ 3695 START_TEST(test_skipped_parameter_entity) { 3696 const char *text = "<?xml version='1.0'?>\n" 3697 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n" 3698 "<!ELEMENT root (#PCDATA|a)* >\n" 3699 "]>\n" 3700 "<root></root>"; 3701 ExtTest dtd_data = {"%pe2;", NULL, NULL}; 3702 3703 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3704 XML_SetUserData(g_parser, &dtd_data); 3705 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3706 XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler); 3707 init_dummy_handlers(); 3708 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3709 == XML_STATUS_ERROR) 3710 xml_failure(g_parser); 3711 if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG) 3712 fail("Skip handler not executed"); 3713 } 3714 END_TEST 3715 3716 /* Test recursive parameter entity definition rejected in external DTD */ 3717 START_TEST(test_recursive_external_parameter_entity) { 3718 const char *text = "<?xml version='1.0'?>\n" 3719 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n" 3720 "<!ELEMENT root (#PCDATA|a)* >\n" 3721 "]>\n" 3722 "<root></root>"; 3723 ExtFaults dtd_data = {"<!ENTITY % pe2 '%pe2;'>\n%pe2;", 3724 "Recursive external parameter entity not faulted", NULL, 3725 XML_ERROR_RECURSIVE_ENTITY_REF}; 3726 3727 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 3728 XML_SetUserData(g_parser, &dtd_data); 3729 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3730 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3731 "Recursive external parameter not spotted"); 3732 } 3733 END_TEST 3734 3735 /* Test undefined parameter entity in external entity handler */ 3736 START_TEST(test_undefined_ext_entity_in_external_dtd) { 3737 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3738 "<doc></doc>\n"; 3739 3740 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3741 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer); 3742 XML_SetUserData(g_parser, NULL); 3743 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3744 == XML_STATUS_ERROR) 3745 xml_failure(g_parser); 3746 3747 /* Now repeat without the external entity ref handler invoking 3748 * another copy of itself. 3749 */ 3750 XML_ParserReset(g_parser, NULL); 3751 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3752 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer); 3753 XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */ 3754 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3755 == XML_STATUS_ERROR) 3756 xml_failure(g_parser); 3757 } 3758 END_TEST 3759 3760 /* Test suspending the parse on receiving an XML declaration works */ 3761 START_TEST(test_suspend_xdecl) { 3762 const char *text = long_character_data_text; 3763 3764 XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler); 3765 XML_SetUserData(g_parser, g_parser); 3766 g_resumable = XML_TRUE; 3767 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 3768 // we won't know exactly how much input we actually managed to give Expat. 3769 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 3770 != XML_STATUS_SUSPENDED) 3771 xml_failure(g_parser); 3772 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 3773 xml_failure(g_parser); 3774 /* Attempt to start a new parse while suspended */ 3775 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3776 != XML_STATUS_ERROR) 3777 fail("Attempt to parse while suspended not faulted"); 3778 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) 3779 fail("Suspended parse not faulted with correct error"); 3780 } 3781 END_TEST 3782 3783 /* Test aborting the parse in an epilog works */ 3784 START_TEST(test_abort_epilog) { 3785 const char *text = "<doc></doc>\n\r\n"; 3786 XML_Char trigger_char = XCS('\r'); 3787 3788 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 3789 XML_SetUserData(g_parser, &trigger_char); 3790 g_resumable = XML_FALSE; 3791 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3792 != XML_STATUS_ERROR) 3793 fail("Abort not triggered"); 3794 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED) 3795 xml_failure(g_parser); 3796 } 3797 END_TEST 3798 3799 /* Test a different code path for abort in the epilog */ 3800 START_TEST(test_abort_epilog_2) { 3801 const char *text = "<doc></doc>\n"; 3802 XML_Char trigger_char = XCS('\n'); 3803 3804 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 3805 XML_SetUserData(g_parser, &trigger_char); 3806 g_resumable = XML_FALSE; 3807 expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered"); 3808 } 3809 END_TEST 3810 3811 /* Test suspension from the epilog */ 3812 START_TEST(test_suspend_epilog) { 3813 const char *text = "<doc></doc>\n"; 3814 XML_Char trigger_char = XCS('\n'); 3815 3816 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 3817 XML_SetUserData(g_parser, &trigger_char); 3818 g_resumable = XML_TRUE; 3819 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3820 != XML_STATUS_SUSPENDED) 3821 xml_failure(g_parser); 3822 } 3823 END_TEST 3824 3825 START_TEST(test_suspend_in_sole_empty_tag) { 3826 const char *text = "<doc/>"; 3827 enum XML_Status rc; 3828 3829 XML_SetEndElementHandler(g_parser, suspending_end_handler); 3830 XML_SetUserData(g_parser, g_parser); 3831 rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE); 3832 if (rc == XML_STATUS_ERROR) 3833 xml_failure(g_parser); 3834 else if (rc != XML_STATUS_SUSPENDED) 3835 fail("Suspend not triggered"); 3836 rc = XML_ResumeParser(g_parser); 3837 if (rc == XML_STATUS_ERROR) 3838 xml_failure(g_parser); 3839 else if (rc != XML_STATUS_OK) 3840 fail("Resume failed"); 3841 } 3842 END_TEST 3843 3844 START_TEST(test_unfinished_epilog) { 3845 const char *text = "<doc></doc><"; 3846 3847 expect_failure(text, XML_ERROR_UNCLOSED_TOKEN, 3848 "Incomplete epilog entry not faulted"); 3849 } 3850 END_TEST 3851 3852 START_TEST(test_partial_char_in_epilog) { 3853 const char *text = "<doc></doc>\xe2\x82"; 3854 3855 /* First check that no fault is raised if the parse is not finished */ 3856 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 3857 == XML_STATUS_ERROR) 3858 xml_failure(g_parser); 3859 /* Now check that it is faulted once we finish */ 3860 if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR) 3861 fail("Partial character in epilog not faulted"); 3862 if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR) 3863 xml_failure(g_parser); 3864 } 3865 END_TEST 3866 3867 /* Test resuming a parse suspended in entity substitution */ 3868 START_TEST(test_suspend_resume_internal_entity) { 3869 const char *text 3870 = "<!DOCTYPE doc [\n" 3871 "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n" 3872 "]>\n" 3873 "<doc>&foo;</doc>\n"; 3874 const XML_Char *expected1 = XCS("Hi"); 3875 const XML_Char *expected2 = XCS("HiHo"); 3876 CharData storage; 3877 3878 CharData_Init(&storage); 3879 XML_SetStartElementHandler(g_parser, start_element_suspender); 3880 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 3881 XML_SetUserData(g_parser, &storage); 3882 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 3883 // we won't know exactly how much input we actually managed to give Expat. 3884 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 3885 != XML_STATUS_SUSPENDED) 3886 xml_failure(g_parser); 3887 CharData_CheckXMLChars(&storage, XCS("")); 3888 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED) 3889 xml_failure(g_parser); 3890 CharData_CheckXMLChars(&storage, expected1); 3891 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 3892 xml_failure(g_parser); 3893 CharData_CheckXMLChars(&storage, expected2); 3894 } 3895 END_TEST 3896 3897 START_TEST(test_suspend_resume_internal_entity_issue_629) { 3898 const char *const text 3899 = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n" 3900 "<" 3901 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3902 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3903 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3904 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3905 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3906 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3907 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3908 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3909 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3910 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3911 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3912 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3913 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3914 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3915 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3916 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3917 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3918 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3919 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3920 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3921 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3922 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3923 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3924 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3925 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3926 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3927 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3928 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3929 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3930 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3931 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3932 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3933 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3934 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3935 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3936 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3937 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3938 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3939 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3940 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3941 "/>" 3942 "</b></a>"; 3943 const size_t firstChunkSizeBytes = 54; 3944 3945 XML_Parser parser = XML_ParserCreate(NULL); 3946 XML_SetUserData(parser, parser); 3947 XML_SetCommentHandler(parser, suspending_comment_handler); 3948 3949 if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE) 3950 != XML_STATUS_SUSPENDED) 3951 xml_failure(parser); 3952 if (XML_ResumeParser(parser) != XML_STATUS_OK) 3953 xml_failure(parser); 3954 if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes, 3955 (int)(strlen(text) - firstChunkSizeBytes), 3956 XML_TRUE) 3957 != XML_STATUS_OK) 3958 xml_failure(parser); 3959 XML_ParserFree(parser); 3960 } 3961 END_TEST 3962 3963 /* Test syntax error is caught at parse resumption */ 3964 START_TEST(test_resume_entity_with_syntax_error) { 3965 if (g_chunkSize != 0) { 3966 // this test does not use SINGLE_BYTES, because of suspension 3967 return; 3968 } 3969 3970 const char *text = "<!DOCTYPE doc [\n" 3971 "<!ENTITY foo '<suspend>Hi</wombat>'>\n" 3972 "]>\n" 3973 "<doc>&foo;</doc>\n"; 3974 3975 XML_SetStartElementHandler(g_parser, start_element_suspender); 3976 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 3977 // we won't know exactly how much input we actually managed to give Expat. 3978 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 3979 != XML_STATUS_SUSPENDED) 3980 xml_failure(g_parser); 3981 if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR) 3982 fail("Syntax error in entity not faulted"); 3983 if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH) 3984 xml_failure(g_parser); 3985 } 3986 END_TEST 3987 3988 /* Test suspending and resuming in a parameter entity substitution */ 3989 START_TEST(test_suspend_resume_parameter_entity) { 3990 const char *text = "<!DOCTYPE doc [\n" 3991 "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n" 3992 "%foo;\n" 3993 "]>\n" 3994 "<doc>Hello, world</doc>"; 3995 const XML_Char *expected = XCS("Hello, world"); 3996 CharData storage; 3997 3998 CharData_Init(&storage); 3999 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4000 XML_SetElementDeclHandler(g_parser, element_decl_suspender); 4001 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 4002 XML_SetUserData(g_parser, &storage); 4003 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 4004 != XML_STATUS_SUSPENDED) 4005 xml_failure(g_parser); 4006 CharData_CheckXMLChars(&storage, XCS("")); 4007 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 4008 xml_failure(g_parser); 4009 CharData_CheckXMLChars(&storage, expected); 4010 } 4011 END_TEST 4012 4013 /* Test attempting to use parser after an error is faulted */ 4014 START_TEST(test_restart_on_error) { 4015 const char *text = "<$doc><doc></doc>"; 4016 4017 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4018 != XML_STATUS_ERROR) 4019 fail("Invalid tag name not faulted"); 4020 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 4021 xml_failure(g_parser); 4022 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 4023 fail("Restarting invalid parse not faulted"); 4024 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 4025 xml_failure(g_parser); 4026 } 4027 END_TEST 4028 4029 /* Test that angle brackets in an attribute default value are faulted */ 4030 START_TEST(test_reject_lt_in_attribute_value) { 4031 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n" 4032 "<doc></doc>"; 4033 4034 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4035 "Bad attribute default not faulted"); 4036 } 4037 END_TEST 4038 4039 START_TEST(test_reject_unfinished_param_in_att_value) { 4040 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n" 4041 "<doc></doc>"; 4042 4043 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4044 "Bad attribute default not faulted"); 4045 } 4046 END_TEST 4047 4048 START_TEST(test_trailing_cr_in_att_value) { 4049 const char *text = "<doc a='value\r'/>"; 4050 4051 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4052 == XML_STATUS_ERROR) 4053 xml_failure(g_parser); 4054 } 4055 END_TEST 4056 4057 /* Try parsing a general entity within a parameter entity in a 4058 * standalone internal DTD. Covers a corner case in the parser. 4059 */ 4060 START_TEST(test_standalone_internal_entity) { 4061 const char *text = "<?xml version='1.0' standalone='yes' ?>\n" 4062 "<!DOCTYPE doc [\n" 4063 " <!ELEMENT doc (#PCDATA)>\n" 4064 " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"≥\">'>\n" 4065 " <!ENTITY ge 'AttDefaultValue'>\n" 4066 " %pe;\n" 4067 "]>\n" 4068 "<doc att2='any'/>"; 4069 4070 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4071 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4072 == XML_STATUS_ERROR) 4073 xml_failure(g_parser); 4074 } 4075 END_TEST 4076 4077 /* Test that a reference to an unknown external entity is skipped */ 4078 START_TEST(test_skipped_external_entity) { 4079 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" 4080 "<doc></doc>\n"; 4081 ExtTest test_data = {"<!ELEMENT doc EMPTY>\n" 4082 "<!ENTITY % e2 '%e1;'>\n", 4083 NULL, NULL}; 4084 4085 XML_SetUserData(g_parser, &test_data); 4086 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4087 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 4088 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4089 == XML_STATUS_ERROR) 4090 xml_failure(g_parser); 4091 } 4092 END_TEST 4093 4094 /* Test a different form of unknown external entity */ 4095 START_TEST(test_skipped_null_loaded_ext_entity) { 4096 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" 4097 "<doc />"; 4098 ExtHdlrData test_data 4099 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n" 4100 "<!ENTITY % pe2 '%pe1;'>\n" 4101 "%pe2;\n", 4102 external_entity_null_loader, NULL}; 4103 4104 XML_SetUserData(g_parser, &test_data); 4105 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4106 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader); 4107 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4108 == XML_STATUS_ERROR) 4109 xml_failure(g_parser); 4110 } 4111 END_TEST 4112 4113 START_TEST(test_skipped_unloaded_ext_entity) { 4114 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" 4115 "<doc />"; 4116 ExtHdlrData test_data 4117 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n" 4118 "<!ENTITY % pe2 '%pe1;'>\n" 4119 "%pe2;\n", 4120 NULL, NULL}; 4121 4122 XML_SetUserData(g_parser, &test_data); 4123 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4124 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader); 4125 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4126 == XML_STATUS_ERROR) 4127 xml_failure(g_parser); 4128 } 4129 END_TEST 4130 4131 /* Test that a parameter entity value ending with a carriage return 4132 * has it translated internally into a newline. 4133 */ 4134 START_TEST(test_param_entity_with_trailing_cr) { 4135 #define PARAM_ENTITY_NAME "pe" 4136 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">" 4137 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" 4138 "<doc/>"; 4139 ExtTest test_data 4140 = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n" 4141 "%" PARAM_ENTITY_NAME ";\n", 4142 NULL, NULL}; 4143 4144 XML_SetUserData(g_parser, &test_data); 4145 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4146 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 4147 XML_SetEntityDeclHandler(g_parser, param_entity_match_handler); 4148 param_entity_match_init(XCS(PARAM_ENTITY_NAME), 4149 XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n")); 4150 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4151 == XML_STATUS_ERROR) 4152 xml_failure(g_parser); 4153 int entity_match_flag = get_param_entity_match_flag(); 4154 if (entity_match_flag == ENTITY_MATCH_FAIL) 4155 fail("Parameter entity CR->NEWLINE conversion failed"); 4156 else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND) 4157 fail("Parameter entity not parsed"); 4158 } 4159 #undef PARAM_ENTITY_NAME 4160 #undef PARAM_ENTITY_CORE_VALUE 4161 END_TEST 4162 4163 START_TEST(test_invalid_character_entity) { 4164 const char *text = "<!DOCTYPE doc [\n" 4165 " <!ENTITY entity '�'>\n" 4166 "]>\n" 4167 "<doc>&entity;</doc>"; 4168 4169 expect_failure(text, XML_ERROR_BAD_CHAR_REF, 4170 "Out of range character reference not faulted"); 4171 } 4172 END_TEST 4173 4174 START_TEST(test_invalid_character_entity_2) { 4175 const char *text = "<!DOCTYPE doc [\n" 4176 " <!ENTITY entity '&#xg0;'>\n" 4177 "]>\n" 4178 "<doc>&entity;</doc>"; 4179 4180 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4181 "Out of range character reference not faulted"); 4182 } 4183 END_TEST 4184 4185 START_TEST(test_invalid_character_entity_3) { 4186 const char text[] = 4187 /* <!DOCTYPE doc [\n */ 4188 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" 4189 /* U+0E04 = KHO KHWAI 4190 * U+0E08 = CHO CHAN */ 4191 /* <!ENTITY entity '&\u0e04\u0e08;'>\n */ 4192 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 " 4193 "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n" 4194 /* ]>\n */ 4195 "\0]\0>\0\n" 4196 /* <doc>&entity;</doc> */ 4197 "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>"; 4198 4199 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4200 != XML_STATUS_ERROR) 4201 fail("Invalid start of entity name not faulted"); 4202 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY) 4203 xml_failure(g_parser); 4204 } 4205 END_TEST 4206 4207 START_TEST(test_invalid_character_entity_4) { 4208 const char *text = "<!DOCTYPE doc [\n" 4209 " <!ENTITY entity '�'>\n" /* = � */ 4210 "]>\n" 4211 "<doc>&entity;</doc>"; 4212 4213 expect_failure(text, XML_ERROR_BAD_CHAR_REF, 4214 "Out of range character reference not faulted"); 4215 } 4216 END_TEST 4217 4218 /* Test that processing instructions are picked up by a default handler */ 4219 START_TEST(test_pi_handled_in_default) { 4220 const char *text = "<?test processing instruction?>\n<doc/>"; 4221 const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>"); 4222 CharData storage; 4223 4224 CharData_Init(&storage); 4225 XML_SetDefaultHandler(g_parser, accumulate_characters); 4226 XML_SetUserData(g_parser, &storage); 4227 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4228 == XML_STATUS_ERROR) 4229 xml_failure(g_parser); 4230 CharData_CheckXMLChars(&storage, expected); 4231 } 4232 END_TEST 4233 4234 /* Test that comments are picked up by a default handler */ 4235 START_TEST(test_comment_handled_in_default) { 4236 const char *text = "<!-- This is a comment -->\n<doc/>"; 4237 const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>"); 4238 CharData storage; 4239 4240 CharData_Init(&storage); 4241 XML_SetDefaultHandler(g_parser, accumulate_characters); 4242 XML_SetUserData(g_parser, &storage); 4243 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4244 == XML_STATUS_ERROR) 4245 xml_failure(g_parser); 4246 CharData_CheckXMLChars(&storage, expected); 4247 } 4248 END_TEST 4249 4250 /* Test PIs that look almost but not quite like XML declarations */ 4251 START_TEST(test_pi_yml) { 4252 const char *text = "<?yml something like data?><doc/>"; 4253 const XML_Char *expected = XCS("yml: something like data\n"); 4254 CharData storage; 4255 4256 CharData_Init(&storage); 4257 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4258 XML_SetUserData(g_parser, &storage); 4259 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4260 == XML_STATUS_ERROR) 4261 xml_failure(g_parser); 4262 CharData_CheckXMLChars(&storage, expected); 4263 } 4264 END_TEST 4265 4266 START_TEST(test_pi_xnl) { 4267 const char *text = "<?xnl nothing like data?><doc/>"; 4268 const XML_Char *expected = XCS("xnl: nothing like data\n"); 4269 CharData storage; 4270 4271 CharData_Init(&storage); 4272 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4273 XML_SetUserData(g_parser, &storage); 4274 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4275 == XML_STATUS_ERROR) 4276 xml_failure(g_parser); 4277 CharData_CheckXMLChars(&storage, expected); 4278 } 4279 END_TEST 4280 4281 START_TEST(test_pi_xmm) { 4282 const char *text = "<?xmm everything like data?><doc/>"; 4283 const XML_Char *expected = XCS("xmm: everything like data\n"); 4284 CharData storage; 4285 4286 CharData_Init(&storage); 4287 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4288 XML_SetUserData(g_parser, &storage); 4289 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4290 == XML_STATUS_ERROR) 4291 xml_failure(g_parser); 4292 CharData_CheckXMLChars(&storage, expected); 4293 } 4294 END_TEST 4295 4296 START_TEST(test_utf16_pi) { 4297 const char text[] = 4298 /* <?{KHO KHWAI}{CHO CHAN}?> 4299 * where {KHO KHWAI} = U+0E04 4300 * and {CHO CHAN} = U+0E08 4301 */ 4302 "<\0?\0\x04\x0e\x08\x0e?\0>\0" 4303 /* <q/> */ 4304 "<\0q\0/\0>\0"; 4305 #ifdef XML_UNICODE 4306 const XML_Char *expected = XCS("\x0e04\x0e08: \n"); 4307 #else 4308 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n"); 4309 #endif 4310 CharData storage; 4311 4312 CharData_Init(&storage); 4313 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4314 XML_SetUserData(g_parser, &storage); 4315 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4316 == XML_STATUS_ERROR) 4317 xml_failure(g_parser); 4318 CharData_CheckXMLChars(&storage, expected); 4319 } 4320 END_TEST 4321 4322 START_TEST(test_utf16_be_pi) { 4323 const char text[] = 4324 /* <?{KHO KHWAI}{CHO CHAN}?> 4325 * where {KHO KHWAI} = U+0E04 4326 * and {CHO CHAN} = U+0E08 4327 */ 4328 "\0<\0?\x0e\x04\x0e\x08\0?\0>" 4329 /* <q/> */ 4330 "\0<\0q\0/\0>"; 4331 #ifdef XML_UNICODE 4332 const XML_Char *expected = XCS("\x0e04\x0e08: \n"); 4333 #else 4334 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n"); 4335 #endif 4336 CharData storage; 4337 4338 CharData_Init(&storage); 4339 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4340 XML_SetUserData(g_parser, &storage); 4341 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4342 == XML_STATUS_ERROR) 4343 xml_failure(g_parser); 4344 CharData_CheckXMLChars(&storage, expected); 4345 } 4346 END_TEST 4347 4348 /* Test that comments can be picked up and translated */ 4349 START_TEST(test_utf16_be_comment) { 4350 const char text[] = 4351 /* <!-- Comment A --> */ 4352 "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n" 4353 /* <doc/> */ 4354 "\0<\0d\0o\0c\0/\0>"; 4355 const XML_Char *expected = XCS(" Comment A "); 4356 CharData storage; 4357 4358 CharData_Init(&storage); 4359 XML_SetCommentHandler(g_parser, accumulate_comment); 4360 XML_SetUserData(g_parser, &storage); 4361 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4362 == XML_STATUS_ERROR) 4363 xml_failure(g_parser); 4364 CharData_CheckXMLChars(&storage, expected); 4365 } 4366 END_TEST 4367 4368 START_TEST(test_utf16_le_comment) { 4369 const char text[] = 4370 /* <!-- Comment B --> */ 4371 "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0" 4372 /* <doc/> */ 4373 "<\0d\0o\0c\0/\0>\0"; 4374 const XML_Char *expected = XCS(" Comment B "); 4375 CharData storage; 4376 4377 CharData_Init(&storage); 4378 XML_SetCommentHandler(g_parser, accumulate_comment); 4379 XML_SetUserData(g_parser, &storage); 4380 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4381 == XML_STATUS_ERROR) 4382 xml_failure(g_parser); 4383 CharData_CheckXMLChars(&storage, expected); 4384 } 4385 END_TEST 4386 4387 /* Test that the unknown encoding handler with map entries that expect 4388 * conversion but no conversion function is faulted 4389 */ 4390 START_TEST(test_missing_encoding_conversion_fn) { 4391 const char *text = "<?xml version='1.0' encoding='no-conv'?>\n" 4392 "<doc>\x81</doc>"; 4393 4394 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4395 /* MiscEncodingHandler sets up an encoding with every top-bit-set 4396 * character introducing a two-byte sequence. For this, it 4397 * requires a convert function. The above function call doesn't 4398 * pass one through, so when BadEncodingHandler actually gets 4399 * called it should supply an invalid encoding. 4400 */ 4401 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4402 "Encoding with missing convert() not faulted"); 4403 } 4404 END_TEST 4405 4406 START_TEST(test_failing_encoding_conversion_fn) { 4407 const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n" 4408 "<doc>\x81</doc>"; 4409 4410 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4411 /* BadEncodingHandler sets up an encoding with every top-bit-set 4412 * character introducing a two-byte sequence. For this, it 4413 * requires a convert function. The above function call passes 4414 * one that insists all possible sequences are invalid anyway. 4415 */ 4416 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4417 "Encoding with failing convert() not faulted"); 4418 } 4419 END_TEST 4420 4421 /* Test unknown encoding conversions */ 4422 START_TEST(test_unknown_encoding_success) { 4423 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4424 /* Equivalent to <eoc>Hello, world</eoc> */ 4425 "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>"; 4426 4427 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4428 run_character_check(text, XCS("Hello, world")); 4429 } 4430 END_TEST 4431 4432 /* Test bad name character in unknown encoding */ 4433 START_TEST(test_unknown_encoding_bad_name) { 4434 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4435 "<\xff\x64oc>Hello, world</\xff\x64oc>"; 4436 4437 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4438 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4439 "Bad name start in unknown encoding not faulted"); 4440 } 4441 END_TEST 4442 4443 /* Test bad mid-name character in unknown encoding */ 4444 START_TEST(test_unknown_encoding_bad_name_2) { 4445 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4446 "<d\xffoc>Hello, world</d\xffoc>"; 4447 4448 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4449 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4450 "Bad name in unknown encoding not faulted"); 4451 } 4452 END_TEST 4453 4454 /* Test element name that is long enough to fill the conversion buffer 4455 * in an unknown encoding, finishing with an encoded character. 4456 */ 4457 START_TEST(test_unknown_encoding_long_name_1) { 4458 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4459 "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>" 4460 "Hi" 4461 "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"; 4462 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop"); 4463 CharData storage; 4464 4465 CharData_Init(&storage); 4466 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4467 XML_SetStartElementHandler(g_parser, record_element_start_handler); 4468 XML_SetUserData(g_parser, &storage); 4469 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4470 == XML_STATUS_ERROR) 4471 xml_failure(g_parser); 4472 CharData_CheckXMLChars(&storage, expected); 4473 } 4474 END_TEST 4475 4476 /* Test element name that is long enough to fill the conversion buffer 4477 * in an unknown encoding, finishing with an simple character. 4478 */ 4479 START_TEST(test_unknown_encoding_long_name_2) { 4480 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4481 "<abcdefghabcdefghabcdefghijklmnop>" 4482 "Hi" 4483 "</abcdefghabcdefghabcdefghijklmnop>"; 4484 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop"); 4485 CharData storage; 4486 4487 CharData_Init(&storage); 4488 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4489 XML_SetStartElementHandler(g_parser, record_element_start_handler); 4490 XML_SetUserData(g_parser, &storage); 4491 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4492 == XML_STATUS_ERROR) 4493 xml_failure(g_parser); 4494 CharData_CheckXMLChars(&storage, expected); 4495 } 4496 END_TEST 4497 4498 START_TEST(test_invalid_unknown_encoding) { 4499 const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n" 4500 "<doc>Hello world</doc>"; 4501 4502 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4503 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4504 "Invalid unknown encoding not faulted"); 4505 } 4506 END_TEST 4507 4508 START_TEST(test_unknown_ascii_encoding_ok) { 4509 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n" 4510 "<doc>Hello, world</doc>"; 4511 4512 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4513 run_character_check(text, XCS("Hello, world")); 4514 } 4515 END_TEST 4516 4517 START_TEST(test_unknown_ascii_encoding_fail) { 4518 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n" 4519 "<doc>Hello, \x80 world</doc>"; 4520 4521 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4522 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4523 "Invalid character not faulted"); 4524 } 4525 END_TEST 4526 4527 START_TEST(test_unknown_encoding_invalid_length) { 4528 const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n" 4529 "<doc>Hello, world</doc>"; 4530 4531 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4532 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4533 "Invalid unknown encoding not faulted"); 4534 } 4535 END_TEST 4536 4537 START_TEST(test_unknown_encoding_invalid_topbit) { 4538 const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n" 4539 "<doc>Hello, world</doc>"; 4540 4541 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4542 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4543 "Invalid unknown encoding not faulted"); 4544 } 4545 END_TEST 4546 4547 START_TEST(test_unknown_encoding_invalid_surrogate) { 4548 const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n" 4549 "<doc>Hello, \x82 world</doc>"; 4550 4551 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4552 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4553 "Invalid unknown encoding not faulted"); 4554 } 4555 END_TEST 4556 4557 START_TEST(test_unknown_encoding_invalid_high) { 4558 const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n" 4559 "<doc>Hello, world</doc>"; 4560 4561 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4562 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4563 "Invalid unknown encoding not faulted"); 4564 } 4565 END_TEST 4566 4567 START_TEST(test_unknown_encoding_invalid_attr_value) { 4568 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4569 "<doc attr='\xff\x30'/>"; 4570 4571 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4572 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4573 "Invalid attribute valid not faulted"); 4574 } 4575 END_TEST 4576 4577 START_TEST(test_unknown_encoding_user_data_primary) { 4578 // This test is based on ideas contributed by Artiphishell Inc. 4579 const char *const text = "<?xml version='1.0' encoding='x-unk'?>\n" 4580 "<root />\n"; 4581 XML_Parser parser = XML_ParserCreate(NULL); 4582 XML_SetUnknownEncodingHandler(parser, 4583 user_data_checking_unknown_encoding_handler, 4584 (void *)(intptr_t)0xC0FFEE); 4585 4586 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 4587 == XML_STATUS_OK); 4588 4589 XML_ParserFree(parser); 4590 } 4591 END_TEST 4592 4593 START_TEST(test_unknown_encoding_user_data_secondary) { 4594 // This test is based on ideas contributed by Artiphishell Inc. 4595 const char *const text_main = "<!DOCTYPE r [\n" 4596 " <!ENTITY ext SYSTEM 'ext.ent'>\n" 4597 "]>\n" 4598 "<r>&ext;</r>\n"; 4599 const char *const text_external = "<?xml version='1.0' encoding='x-unk'?>\n" 4600 "<e>data</e>"; 4601 ExtTest2 test_data = {text_external, (int)strlen(text_external), NULL, NULL}; 4602 XML_Parser parser = XML_ParserCreate(NULL); 4603 XML_SetExternalEntityRefHandler(parser, external_entity_loader2); 4604 XML_SetUnknownEncodingHandler(parser, 4605 user_data_checking_unknown_encoding_handler, 4606 (void *)(intptr_t)0xC0FFEE); 4607 XML_SetUserData(parser, &test_data); 4608 4609 assert_true(_XML_Parse_SINGLE_BYTES(parser, text_main, (int)strlen(text_main), 4610 XML_TRUE) 4611 == XML_STATUS_OK); 4612 4613 XML_ParserFree(parser); 4614 } 4615 END_TEST 4616 4617 /* Test an external entity parser set to use latin-1 detects UTF-16 4618 * BOMs correctly. 4619 */ 4620 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */ 4621 START_TEST(test_ext_entity_latin1_utf16le_bom) { 4622 const char *text = "<!DOCTYPE doc [\n" 4623 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4624 "]>\n" 4625 "<doc>&en;</doc>"; 4626 ExtTest2 test_data 4627 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4628 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4629 * 0x4c = L and 0x20 is a space 4630 */ 4631 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL}; 4632 #ifdef XML_UNICODE 4633 const XML_Char *expected = XCS("\x00ff\x00feL "); 4634 #else 4635 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4636 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL "); 4637 #endif 4638 CharData storage; 4639 4640 CharData_Init(&storage); 4641 test_data.storage = &storage; 4642 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4643 XML_SetUserData(g_parser, &test_data); 4644 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4645 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4646 == XML_STATUS_ERROR) 4647 xml_failure(g_parser); 4648 CharData_CheckXMLChars(&storage, expected); 4649 } 4650 END_TEST 4651 4652 START_TEST(test_ext_entity_latin1_utf16be_bom) { 4653 const char *text = "<!DOCTYPE doc [\n" 4654 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4655 "]>\n" 4656 "<doc>&en;</doc>"; 4657 ExtTest2 test_data 4658 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4659 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4660 * 0x4c = L and 0x20 is a space 4661 */ 4662 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL}; 4663 #ifdef XML_UNICODE 4664 const XML_Char *expected = XCS("\x00fe\x00ff L"); 4665 #else 4666 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4667 const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L"); 4668 #endif 4669 CharData storage; 4670 4671 CharData_Init(&storage); 4672 test_data.storage = &storage; 4673 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4674 XML_SetUserData(g_parser, &test_data); 4675 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4676 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4677 == XML_STATUS_ERROR) 4678 xml_failure(g_parser); 4679 CharData_CheckXMLChars(&storage, expected); 4680 } 4681 END_TEST 4682 4683 /* Parsing the full buffer rather than a byte at a time makes a 4684 * difference to the encoding scanning code, so repeat the above tests 4685 * without breaking them down by byte. 4686 */ 4687 START_TEST(test_ext_entity_latin1_utf16le_bom2) { 4688 const char *text = "<!DOCTYPE doc [\n" 4689 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4690 "]>\n" 4691 "<doc>&en;</doc>"; 4692 ExtTest2 test_data 4693 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4694 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4695 * 0x4c = L and 0x20 is a space 4696 */ 4697 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL}; 4698 #ifdef XML_UNICODE 4699 const XML_Char *expected = XCS("\x00ff\x00feL "); 4700 #else 4701 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4702 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL "); 4703 #endif 4704 CharData storage; 4705 4706 CharData_Init(&storage); 4707 test_data.storage = &storage; 4708 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4709 XML_SetUserData(g_parser, &test_data); 4710 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4711 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4712 == XML_STATUS_ERROR) 4713 xml_failure(g_parser); 4714 CharData_CheckXMLChars(&storage, expected); 4715 } 4716 END_TEST 4717 4718 START_TEST(test_ext_entity_latin1_utf16be_bom2) { 4719 const char *text = "<!DOCTYPE doc [\n" 4720 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4721 "]>\n" 4722 "<doc>&en;</doc>"; 4723 ExtTest2 test_data 4724 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4725 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4726 * 0x4c = L and 0x20 is a space 4727 */ 4728 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL}; 4729 #ifdef XML_UNICODE 4730 const XML_Char *expected = XCS("\x00fe\x00ff L"); 4731 #else 4732 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4733 const XML_Char *expected = "\xc3\xbe\xc3\xbf L"; 4734 #endif 4735 CharData storage; 4736 4737 CharData_Init(&storage); 4738 test_data.storage = &storage; 4739 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4740 XML_SetUserData(g_parser, &test_data); 4741 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4742 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4743 == XML_STATUS_ERROR) 4744 xml_failure(g_parser); 4745 CharData_CheckXMLChars(&storage, expected); 4746 } 4747 END_TEST 4748 4749 /* Test little-endian UTF-16 given an explicit big-endian encoding */ 4750 START_TEST(test_ext_entity_utf16_be) { 4751 const char *text = "<!DOCTYPE doc [\n" 4752 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4753 "]>\n" 4754 "<doc>&en;</doc>"; 4755 ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL}; 4756 #ifdef XML_UNICODE 4757 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00"); 4758 #else 4759 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */ 4760 "\xe6\x94\x80" /* U+6500 */ 4761 "\xe2\xbc\x80" /* U+2F00 */ 4762 "\xe3\xb8\x80"); /* U+3E00 */ 4763 #endif 4764 CharData storage; 4765 4766 CharData_Init(&storage); 4767 test_data.storage = &storage; 4768 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4769 XML_SetUserData(g_parser, &test_data); 4770 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4771 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4772 == XML_STATUS_ERROR) 4773 xml_failure(g_parser); 4774 CharData_CheckXMLChars(&storage, expected); 4775 } 4776 END_TEST 4777 4778 /* Test big-endian UTF-16 given an explicit little-endian encoding */ 4779 START_TEST(test_ext_entity_utf16_le) { 4780 const char *text = "<!DOCTYPE doc [\n" 4781 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4782 "]>\n" 4783 "<doc>&en;</doc>"; 4784 ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL}; 4785 #ifdef XML_UNICODE 4786 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00"); 4787 #else 4788 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */ 4789 "\xe6\x94\x80" /* U+6500 */ 4790 "\xe2\xbc\x80" /* U+2F00 */ 4791 "\xe3\xb8\x80"); /* U+3E00 */ 4792 #endif 4793 CharData storage; 4794 4795 CharData_Init(&storage); 4796 test_data.storage = &storage; 4797 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4798 XML_SetUserData(g_parser, &test_data); 4799 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4800 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4801 == XML_STATUS_ERROR) 4802 xml_failure(g_parser); 4803 CharData_CheckXMLChars(&storage, expected); 4804 } 4805 END_TEST 4806 4807 /* Test little-endian UTF-16 given no explicit encoding. 4808 * The existing default encoding (UTF-8) is assumed to hold without a 4809 * BOM to contradict it, so the entity value will in fact provoke an 4810 * error because 0x00 is not a valid XML character. We parse the 4811 * whole buffer in one go rather than feeding it in byte by byte to 4812 * exercise different code paths in the initial scanning routines. 4813 */ 4814 START_TEST(test_ext_entity_utf16_unknown) { 4815 const char *text = "<!DOCTYPE doc [\n" 4816 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4817 "]>\n" 4818 "<doc>&en;</doc>"; 4819 ExtFaults2 test_data 4820 = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL, 4821 XML_ERROR_INVALID_TOKEN}; 4822 4823 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2); 4824 XML_SetUserData(g_parser, &test_data); 4825 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 4826 "Invalid character should not have been accepted"); 4827 } 4828 END_TEST 4829 4830 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */ 4831 START_TEST(test_ext_entity_utf8_non_bom) { 4832 const char *text = "<!DOCTYPE doc [\n" 4833 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4834 "]>\n" 4835 "<doc>&en;</doc>"; 4836 ExtTest2 test_data 4837 = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */ 4838 3, NULL, NULL}; 4839 #ifdef XML_UNICODE 4840 const XML_Char *expected = XCS("\xfec0"); 4841 #else 4842 const XML_Char *expected = XCS("\xef\xbb\x80"); 4843 #endif 4844 CharData storage; 4845 4846 CharData_Init(&storage); 4847 test_data.storage = &storage; 4848 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4849 XML_SetUserData(g_parser, &test_data); 4850 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4851 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4852 == XML_STATUS_ERROR) 4853 xml_failure(g_parser); 4854 CharData_CheckXMLChars(&storage, expected); 4855 } 4856 END_TEST 4857 4858 /* Test that UTF-8 in a CDATA section is correctly passed through */ 4859 START_TEST(test_utf8_in_cdata_section) { 4860 const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>"; 4861 #ifdef XML_UNICODE 4862 const XML_Char *expected = XCS("one \x00e9 two"); 4863 #else 4864 const XML_Char *expected = XCS("one \xc3\xa9 two"); 4865 #endif 4866 4867 run_character_check(text, expected); 4868 } 4869 END_TEST 4870 4871 /* Test that little-endian UTF-16 in a CDATA section is handled */ 4872 START_TEST(test_utf8_in_cdata_section_2) { 4873 const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>"; 4874 #ifdef XML_UNICODE 4875 const XML_Char *expected = XCS("\x00e9]\x00e9two"); 4876 #else 4877 const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two"); 4878 #endif 4879 4880 run_character_check(text, expected); 4881 } 4882 END_TEST 4883 4884 START_TEST(test_utf8_in_start_tags) { 4885 struct test_case { 4886 bool goodName; 4887 bool goodNameStart; 4888 const char *tagName; 4889 }; 4890 4891 // The idea with the tests below is this: 4892 // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences 4893 // go to isNever and are hence not a concern. 4894 // 4895 // We start with a character that is a valid name character 4896 // (or even name-start character, see XML 1.0r4 spec) and then we flip 4897 // single bits at places where (1) the result leaves the UTF-8 encoding space 4898 // and (2) we stay in the same n-byte sequence family. 4899 // 4900 // The flipped bits are highlighted in angle brackets in comments, 4901 // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped 4902 // the most significant bit to 1 to leave UTF-8 encoding space. 4903 struct test_case cases[] = { 4904 // 1-byte UTF-8: [0xxx xxxx] 4905 {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':' 4906 {false, false, "\xBA"}, // [<1>011 1010] 4907 {true, false, "\x39"}, // [0011 1001] = ASCII nine '9' 4908 {false, false, "\xB9"}, // [<1>011 1001] 4909 4910 // 2-byte UTF-8: [110x xxxx] [10xx xxxx] 4911 {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] = 4912 // Arabic small waw U+06E5 4913 {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101] 4914 {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101] 4915 {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101] 4916 {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] = 4917 // combining char U+0301 4918 {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001] 4919 {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001] 4920 {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001] 4921 4922 // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx] 4923 {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] = 4924 // Devanagari Letter A U+0905 4925 {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101] 4926 {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101] 4927 {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101] 4928 {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101] 4929 {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101] 4930 {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] = 4931 // combining char U+0901 4932 {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001] 4933 {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001] 4934 {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001] 4935 {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001] 4936 {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001] 4937 }; 4938 const bool atNameStart[] = {true, false}; 4939 4940 size_t i = 0; 4941 char doc[1024]; 4942 size_t failCount = 0; 4943 4944 // we need all the bytes to be parsed, but we don't want the errors that can 4945 // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on. 4946 if (g_reparseDeferralEnabledDefault) { 4947 return; 4948 } 4949 4950 for (; i < sizeof(cases) / sizeof(cases[0]); i++) { 4951 size_t j = 0; 4952 for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { 4953 const bool expectedSuccess 4954 = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName; 4955 snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a", 4956 cases[i].tagName); 4957 XML_Parser parser = XML_ParserCreate(NULL); 4958 4959 const enum XML_Status status = _XML_Parse_SINGLE_BYTES( 4960 parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE); 4961 4962 bool success = true; 4963 if ((status == XML_STATUS_OK) != expectedSuccess) { 4964 success = false; 4965 } 4966 if ((status == XML_STATUS_ERROR) 4967 && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) { 4968 success = false; 4969 } 4970 4971 if (! success) { 4972 fprintf( 4973 stderr, 4974 "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n", 4975 (unsigned)i + 1u, atNameStart[j] ? " " : "not ", 4976 (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser)); 4977 failCount++; 4978 } 4979 4980 XML_ParserFree(parser); 4981 } 4982 } 4983 4984 if (failCount > 0) { 4985 fail("UTF-8 regression detected"); 4986 } 4987 } 4988 END_TEST 4989 4990 /* Test trailing spaces in elements are accepted */ 4991 START_TEST(test_trailing_spaces_in_elements) { 4992 const char *text = "<doc >Hi</doc >"; 4993 const XML_Char *expected = XCS("doc/doc"); 4994 CharData storage; 4995 4996 CharData_Init(&storage); 4997 XML_SetElementHandler(g_parser, record_element_start_handler, 4998 record_element_end_handler); 4999 XML_SetUserData(g_parser, &storage); 5000 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5001 == XML_STATUS_ERROR) 5002 xml_failure(g_parser); 5003 CharData_CheckXMLChars(&storage, expected); 5004 } 5005 END_TEST 5006 5007 START_TEST(test_utf16_attribute) { 5008 const char text[] = 5009 /* <d {KHO KHWAI}{CHO CHAN}='a'/> 5010 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 5011 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 5012 */ 5013 "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0"; 5014 const XML_Char *expected = XCS("a"); 5015 CharData storage; 5016 5017 CharData_Init(&storage); 5018 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5019 XML_SetUserData(g_parser, &storage); 5020 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5021 == XML_STATUS_ERROR) 5022 xml_failure(g_parser); 5023 CharData_CheckXMLChars(&storage, expected); 5024 } 5025 END_TEST 5026 5027 START_TEST(test_utf16_second_attr) { 5028 /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/> 5029 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 5030 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 5031 */ 5032 const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0" 5033 "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0"; 5034 const XML_Char *expected = XCS("1"); 5035 CharData storage; 5036 5037 CharData_Init(&storage); 5038 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5039 XML_SetUserData(g_parser, &storage); 5040 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5041 == XML_STATUS_ERROR) 5042 xml_failure(g_parser); 5043 CharData_CheckXMLChars(&storage, expected); 5044 } 5045 END_TEST 5046 5047 START_TEST(test_attr_after_solidus) { 5048 const char *text = "<doc attr1='a' / attr2='b'>"; 5049 5050 expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted"); 5051 } 5052 END_TEST 5053 5054 START_TEST(test_utf16_pe) { 5055 /* <!DOCTYPE doc [ 5056 * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'> 5057 * %{KHO KHWAI}{CHO CHAN}; 5058 * ]> 5059 * <doc></doc> 5060 * 5061 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 5062 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 5063 */ 5064 const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" 5065 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 " 5066 "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 " 5067 "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n" 5068 "\0%\x0e\x04\x0e\x08\0;\0\n" 5069 "\0]\0>\0\n" 5070 "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>"; 5071 #ifdef XML_UNICODE 5072 const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n"); 5073 #else 5074 const XML_Char *expected 5075 = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n"); 5076 #endif 5077 CharData storage; 5078 5079 CharData_Init(&storage); 5080 XML_SetUserData(g_parser, &storage); 5081 XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl); 5082 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5083 == XML_STATUS_ERROR) 5084 xml_failure(g_parser); 5085 CharData_CheckXMLChars(&storage, expected); 5086 } 5087 END_TEST 5088 5089 /* Test that duff attribute description keywords are rejected */ 5090 START_TEST(test_bad_attr_desc_keyword) { 5091 const char *text = "<!DOCTYPE doc [\n" 5092 " <!ATTLIST doc attr CDATA #!IMPLIED>\n" 5093 "]>\n" 5094 "<doc />"; 5095 5096 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5097 "Bad keyword !IMPLIED not faulted"); 5098 } 5099 END_TEST 5100 5101 /* Test that an invalid attribute description keyword consisting of 5102 * UTF-16 characters with their top bytes non-zero are correctly 5103 * faulted 5104 */ 5105 START_TEST(test_bad_attr_desc_keyword_utf16) { 5106 /* <!DOCTYPE d [ 5107 * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}> 5108 * ]><d/> 5109 * 5110 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 5111 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 5112 */ 5113 const char text[] 5114 = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" 5115 "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 " 5116 "\0#\x0e\x04\x0e\x08\0>\0\n" 5117 "\0]\0>\0<\0d\0/\0>"; 5118 5119 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5120 != XML_STATUS_ERROR) 5121 fail("Invalid UTF16 attribute keyword not faulted"); 5122 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX) 5123 xml_failure(g_parser); 5124 } 5125 END_TEST 5126 5127 /* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this 5128 * using prefix-encoding (see above) to trigger specific code paths 5129 */ 5130 START_TEST(test_bad_doctype) { 5131 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 5132 "<!DOCTYPE doc [ \x80\x44 ]><doc/>"; 5133 5134 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 5135 expect_failure(text, XML_ERROR_SYNTAX, 5136 "Invalid bytes in DOCTYPE not faulted"); 5137 } 5138 END_TEST 5139 5140 START_TEST(test_bad_doctype_utf8) { 5141 const char *text = "<!DOCTYPE \xDB\x25" 5142 "doc><doc/>"; // [1101 1011] [<0>010 0101] 5143 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5144 "Invalid UTF-8 in DOCTYPE not faulted"); 5145 } 5146 END_TEST 5147 5148 START_TEST(test_bad_doctype_utf16) { 5149 const char text[] = 5150 /* <!DOCTYPE doc [ \x06f2 ]><doc/> 5151 * 5152 * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number 5153 * (name character) but not a valid letter (name start character) 5154 */ 5155 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 " 5156 "\x06\xf2" 5157 "\0 \0]\0>\0<\0d\0o\0c\0/\0>"; 5158 5159 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5160 != XML_STATUS_ERROR) 5161 fail("Invalid bytes in DOCTYPE not faulted"); 5162 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX) 5163 xml_failure(g_parser); 5164 } 5165 END_TEST 5166 5167 START_TEST(test_bad_doctype_plus) { 5168 const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n" 5169 "<1+>&foo;</1+>"; 5170 5171 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5172 "'+' in document name not faulted"); 5173 } 5174 END_TEST 5175 5176 START_TEST(test_bad_doctype_star) { 5177 const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n" 5178 "<1*>&foo;</1*>"; 5179 5180 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5181 "'*' in document name not faulted"); 5182 } 5183 END_TEST 5184 5185 START_TEST(test_bad_doctype_query) { 5186 const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n" 5187 "<1?>&foo;</1?>"; 5188 5189 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5190 "'?' in document name not faulted"); 5191 } 5192 END_TEST 5193 5194 START_TEST(test_unknown_encoding_bad_ignore) { 5195 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>" 5196 "<!DOCTYPE doc SYSTEM 'foo'>" 5197 "<doc><e>&entity;</e></doc>"; 5198 ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>", 5199 "Invalid character not faulted", XCS("prefix-conv"), 5200 XML_ERROR_INVALID_TOKEN}; 5201 5202 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 5203 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5204 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 5205 XML_SetUserData(g_parser, &fault); 5206 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 5207 "Bad IGNORE section with unknown encoding not failed"); 5208 } 5209 END_TEST 5210 5211 START_TEST(test_entity_in_utf16_be_attr) { 5212 const char text[] = 5213 /* <e a='ä ä'></e> */ 5214 "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 " 5215 "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>"; 5216 #ifdef XML_UNICODE 5217 const XML_Char *expected = XCS("\x00e4 \x00e4"); 5218 #else 5219 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4"); 5220 #endif 5221 CharData storage; 5222 5223 CharData_Init(&storage); 5224 XML_SetUserData(g_parser, &storage); 5225 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5226 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5227 == XML_STATUS_ERROR) 5228 xml_failure(g_parser); 5229 CharData_CheckXMLChars(&storage, expected); 5230 } 5231 END_TEST 5232 5233 START_TEST(test_entity_in_utf16_le_attr) { 5234 const char text[] = 5235 /* <e a='ä ä'></e> */ 5236 "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0" 5237 "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0"; 5238 #ifdef XML_UNICODE 5239 const XML_Char *expected = XCS("\x00e4 \x00e4"); 5240 #else 5241 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4"); 5242 #endif 5243 CharData storage; 5244 5245 CharData_Init(&storage); 5246 XML_SetUserData(g_parser, &storage); 5247 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5248 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5249 == XML_STATUS_ERROR) 5250 xml_failure(g_parser); 5251 CharData_CheckXMLChars(&storage, expected); 5252 } 5253 END_TEST 5254 5255 START_TEST(test_entity_public_utf16_be) { 5256 const char text[] = 5257 /* <!DOCTYPE d [ */ 5258 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" 5259 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */ 5260 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 " 5261 "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n" 5262 /* %e; */ 5263 "\0%\0e\0;\0\n" 5264 /* ]> */ 5265 "\0]\0>\0\n" 5266 /* <d>&j;</d> */ 5267 "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>"; 5268 ExtTest2 test_data 5269 = {/* <!ENTITY j 'baz'> */ 5270 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL}; 5271 const XML_Char *expected = XCS("baz"); 5272 CharData storage; 5273 5274 CharData_Init(&storage); 5275 test_data.storage = &storage; 5276 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5277 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5278 XML_SetUserData(g_parser, &test_data); 5279 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5280 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5281 == XML_STATUS_ERROR) 5282 xml_failure(g_parser); 5283 CharData_CheckXMLChars(&storage, expected); 5284 } 5285 END_TEST 5286 5287 START_TEST(test_entity_public_utf16_le) { 5288 const char text[] = 5289 /* <!DOCTYPE d [ */ 5290 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0" 5291 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */ 5292 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0" 5293 "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0" 5294 /* %e; */ 5295 "%\0e\0;\0\n\0" 5296 /* ]> */ 5297 "]\0>\0\n\0" 5298 /* <d>&j;</d> */ 5299 "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0"; 5300 ExtTest2 test_data 5301 = {/* <!ENTITY j 'baz'> */ 5302 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL}; 5303 const XML_Char *expected = XCS("baz"); 5304 CharData storage; 5305 5306 CharData_Init(&storage); 5307 test_data.storage = &storage; 5308 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5309 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5310 XML_SetUserData(g_parser, &test_data); 5311 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5312 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5313 == XML_STATUS_ERROR) 5314 xml_failure(g_parser); 5315 CharData_CheckXMLChars(&storage, expected); 5316 } 5317 END_TEST 5318 5319 /* Test that a doctype with neither an internal nor external subset is 5320 * faulted 5321 */ 5322 START_TEST(test_short_doctype) { 5323 const char *text = "<!DOCTYPE doc></doc>"; 5324 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5325 "DOCTYPE without subset not rejected"); 5326 } 5327 END_TEST 5328 5329 START_TEST(test_short_doctype_2) { 5330 const char *text = "<!DOCTYPE doc PUBLIC></doc>"; 5331 expect_failure(text, XML_ERROR_SYNTAX, 5332 "DOCTYPE without Public ID not rejected"); 5333 } 5334 END_TEST 5335 5336 START_TEST(test_short_doctype_3) { 5337 const char *text = "<!DOCTYPE doc SYSTEM></doc>"; 5338 expect_failure(text, XML_ERROR_SYNTAX, 5339 "DOCTYPE without System ID not rejected"); 5340 } 5341 END_TEST 5342 5343 START_TEST(test_long_doctype) { 5344 const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>"; 5345 expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected"); 5346 } 5347 END_TEST 5348 5349 START_TEST(test_bad_entity) { 5350 const char *text = "<!DOCTYPE doc [\n" 5351 " <!ENTITY foo PUBLIC>\n" 5352 "]>\n" 5353 "<doc/>"; 5354 expect_failure(text, XML_ERROR_SYNTAX, 5355 "ENTITY without Public ID is not rejected"); 5356 } 5357 END_TEST 5358 5359 /* Test unquoted value is faulted */ 5360 START_TEST(test_bad_entity_2) { 5361 const char *text = "<!DOCTYPE doc [\n" 5362 " <!ENTITY % foo bar>\n" 5363 "]>\n" 5364 "<doc/>"; 5365 expect_failure(text, XML_ERROR_SYNTAX, 5366 "ENTITY without Public ID is not rejected"); 5367 } 5368 END_TEST 5369 5370 START_TEST(test_bad_entity_3) { 5371 const char *text = "<!DOCTYPE doc [\n" 5372 " <!ENTITY % foo PUBLIC>\n" 5373 "]>\n" 5374 "<doc/>"; 5375 expect_failure(text, XML_ERROR_SYNTAX, 5376 "Parameter ENTITY without Public ID is not rejected"); 5377 } 5378 END_TEST 5379 5380 START_TEST(test_bad_entity_4) { 5381 const char *text = "<!DOCTYPE doc [\n" 5382 " <!ENTITY % foo SYSTEM>\n" 5383 "]>\n" 5384 "<doc/>"; 5385 expect_failure(text, XML_ERROR_SYNTAX, 5386 "Parameter ENTITY without Public ID is not rejected"); 5387 } 5388 END_TEST 5389 5390 START_TEST(test_bad_notation) { 5391 const char *text = "<!DOCTYPE doc [\n" 5392 " <!NOTATION n SYSTEM>\n" 5393 "]>\n" 5394 "<doc/>"; 5395 expect_failure(text, XML_ERROR_SYNTAX, 5396 "Notation without System ID is not rejected"); 5397 } 5398 END_TEST 5399 5400 /* Test for issue #11, wrongly suppressed default handler */ 5401 START_TEST(test_default_doctype_handler) { 5402 const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n" 5403 " <!ENTITY foo 'bar'>\n" 5404 "]>\n" 5405 "<doc>&foo;</doc>"; 5406 DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE}, 5407 {XCS("'test.dtd'"), 10, XML_FALSE}, 5408 {NULL, 0, XML_FALSE}}; 5409 int i; 5410 5411 XML_SetUserData(g_parser, &test_data); 5412 XML_SetDefaultHandler(g_parser, checking_default_handler); 5413 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 5414 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5415 == XML_STATUS_ERROR) 5416 xml_failure(g_parser); 5417 for (i = 0; test_data[i].expected != NULL; i++) 5418 if (! test_data[i].seen) 5419 fail("Default handler not run for public !DOCTYPE"); 5420 } 5421 END_TEST 5422 5423 START_TEST(test_empty_element_abort) { 5424 const char *text = "<abort/>"; 5425 5426 XML_SetStartElementHandler(g_parser, start_element_suspender); 5427 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5428 != XML_STATUS_ERROR) 5429 fail("Expected to error on abort"); 5430 } 5431 END_TEST 5432 5433 /* Regression test for GH issue #612: unfinished m_declAttributeType 5434 * allocation in ->m_tempPool can corrupt following allocation. 5435 */ 5436 START_TEST(test_pool_integrity_with_unfinished_attr) { 5437 const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n" 5438 "<!DOCTYPE foo [\n" 5439 "<!ELEMENT foo ANY>\n" 5440 "<!ENTITY % entp SYSTEM \"external.dtd\">\n" 5441 "%entp;\n" 5442 "]>\n" 5443 "<a></a>\n"; 5444 const XML_Char *expected = XCS("COMMENT"); 5445 CharData storage; 5446 5447 CharData_Init(&storage); 5448 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5449 XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist); 5450 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 5451 XML_SetCommentHandler(g_parser, accumulate_comment); 5452 XML_SetUserData(g_parser, &storage); 5453 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5454 == XML_STATUS_ERROR) 5455 xml_failure(g_parser); 5456 CharData_CheckXMLChars(&storage, expected); 5457 } 5458 END_TEST 5459 5460 /* Test a possible early return location in internalEntityProcessor */ 5461 START_TEST(test_entity_ref_no_elements) { 5462 const char *const text = "<!DOCTYPE foo [\n" 5463 "<!ENTITY e1 \"test\">\n" 5464 "]> <foo>&e1;"; // intentionally missing newline 5465 5466 XML_Parser parser = XML_ParserCreate(NULL); 5467 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 5468 == XML_STATUS_ERROR); 5469 assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS); 5470 XML_ParserFree(parser); 5471 } 5472 END_TEST 5473 5474 /* Tests if chained entity references lead to unbounded recursion */ 5475 START_TEST(test_deep_nested_entity) { 5476 const size_t N_LINES = 60000; 5477 const size_t SIZE_PER_LINE = 50; 5478 5479 char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE); 5480 if (text == NULL) { 5481 fail("malloc failed"); 5482 } 5483 5484 char *textPtr = text; 5485 5486 // Create the XML 5487 textPtr += snprintf(textPtr, SIZE_PER_LINE, 5488 "<!DOCTYPE foo [\n" 5489 " <!ENTITY s0 'deepText'>\n"); 5490 5491 for (size_t i = 1; i < N_LINES; ++i) { 5492 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n", 5493 (long unsigned)i, (long unsigned)(i - 1)); 5494 } 5495 5496 snprintf(textPtr, SIZE_PER_LINE, "]> <foo>&s%lu;</foo>\n", 5497 (long unsigned)(N_LINES - 1)); 5498 5499 const XML_Char *const expected = XCS("deepText"); 5500 5501 CharData storage; 5502 CharData_Init(&storage); 5503 5504 XML_Parser parser = XML_ParserCreate(NULL); 5505 5506 XML_SetCharacterDataHandler(parser, accumulate_characters); 5507 XML_SetUserData(parser, &storage); 5508 5509 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 5510 == XML_STATUS_ERROR) 5511 xml_failure(parser); 5512 5513 CharData_CheckXMLChars(&storage, expected); 5514 XML_ParserFree(parser); 5515 free(text); 5516 } 5517 END_TEST 5518 5519 /* Tests if chained entity references in attributes 5520 lead to unbounded recursion */ 5521 START_TEST(test_deep_nested_attribute_entity) { 5522 const size_t N_LINES = 60000; 5523 const size_t SIZE_PER_LINE = 100; 5524 5525 char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE); 5526 if (text == NULL) { 5527 fail("malloc failed"); 5528 } 5529 5530 char *textPtr = text; 5531 5532 // Create the XML 5533 textPtr += snprintf(textPtr, SIZE_PER_LINE, 5534 "<!DOCTYPE foo [\n" 5535 " <!ENTITY s0 'deepText'>\n"); 5536 5537 for (size_t i = 1; i < N_LINES; ++i) { 5538 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n", 5539 (long unsigned)i, (long unsigned)(i - 1)); 5540 } 5541 5542 snprintf(textPtr, SIZE_PER_LINE, "]> <foo name='&s%lu;'>mainText</foo>\n", 5543 (long unsigned)(N_LINES - 1)); 5544 5545 AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}}; 5546 ElementInfo info[] = {{XCS("foo"), 1, NULL, NULL}, {NULL, 0, NULL, NULL}}; 5547 info[0].attributes = doc_info; 5548 5549 XML_Parser parser = XML_ParserCreate(NULL); 5550 ParserAndElementInfo parserPlusElemenInfo = {parser, info}; 5551 5552 XML_SetStartElementHandler(parser, counting_start_element_handler); 5553 XML_SetUserData(parser, &parserPlusElemenInfo); 5554 5555 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 5556 == XML_STATUS_ERROR) 5557 xml_failure(parser); 5558 5559 XML_ParserFree(parser); 5560 free(text); 5561 } 5562 END_TEST 5563 5564 START_TEST(test_deep_nested_entity_delayed_interpretation) { 5565 const size_t N_LINES = 70000; 5566 const size_t SIZE_PER_LINE = 100; 5567 5568 char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE); 5569 if (text == NULL) { 5570 fail("malloc failed"); 5571 } 5572 5573 char *textPtr = text; 5574 5575 // Create the XML 5576 textPtr += snprintf(textPtr, SIZE_PER_LINE, 5577 "<!DOCTYPE foo [\n" 5578 " <!ENTITY %% s0 'deepText'>\n"); 5579 5580 for (size_t i = 1; i < N_LINES; ++i) { 5581 textPtr += snprintf(textPtr, SIZE_PER_LINE, 5582 " <!ENTITY %% s%lu '%s%lu;'>\n", (long unsigned)i, 5583 (long unsigned)(i - 1)); 5584 } 5585 5586 snprintf(textPtr, SIZE_PER_LINE, 5587 " <!ENTITY %% define_g \"<!ENTITY g '%s%lu;'>\">\n" 5588 " %%define_g;\n" 5589 "]>\n" 5590 "<foo/>\n", 5591 (long unsigned)(N_LINES - 1)); 5592 5593 XML_Parser parser = XML_ParserCreate(NULL); 5594 5595 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5596 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 5597 == XML_STATUS_ERROR) 5598 xml_failure(parser); 5599 5600 XML_ParserFree(parser); 5601 free(text); 5602 } 5603 END_TEST 5604 5605 START_TEST(test_nested_entity_suspend) { 5606 const char *const text = "<!DOCTYPE a [\n" 5607 " <!ENTITY e1 '<!--e1-->'>\n" 5608 " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n" 5609 " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n" 5610 "]>\n" 5611 "<a><!--start-->&e3;<!--end--></a>"; 5612 const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head") 5613 XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end"); 5614 CharData storage; 5615 CharData_Init(&storage); 5616 XML_Parser parser = XML_ParserCreate(NULL); 5617 ParserPlusStorage parserPlusStorage = {parser, &storage}; 5618 5619 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5620 XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler); 5621 XML_SetUserData(parser, &parserPlusStorage); 5622 5623 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE); 5624 while (status == XML_STATUS_SUSPENDED) { 5625 status = XML_ResumeParser(parser); 5626 } 5627 if (status != XML_STATUS_OK) 5628 xml_failure(parser); 5629 5630 CharData_CheckXMLChars(&storage, expected); 5631 XML_ParserFree(parser); 5632 } 5633 END_TEST 5634 5635 START_TEST(test_nested_entity_suspend_2) { 5636 const char *const text = "<!DOCTYPE doc [\n" 5637 " <!ENTITY ge1 'head1Ztail1'>\n" 5638 " <!ENTITY ge2 'head2&ge1;tail2'>\n" 5639 " <!ENTITY ge3 'head3&ge2;tail3'>\n" 5640 "]>\n" 5641 "<doc>&ge3;</doc>"; 5642 const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1") 5643 XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3"); 5644 CharData storage; 5645 CharData_Init(&storage); 5646 XML_Parser parser = XML_ParserCreate(NULL); 5647 ParserPlusStorage parserPlusStorage = {parser, &storage}; 5648 5649 XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend); 5650 XML_SetUserData(parser, &parserPlusStorage); 5651 5652 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE); 5653 while (status == XML_STATUS_SUSPENDED) { 5654 status = XML_ResumeParser(parser); 5655 } 5656 if (status != XML_STATUS_OK) 5657 xml_failure(parser); 5658 5659 CharData_CheckXMLChars(&storage, expected); 5660 XML_ParserFree(parser); 5661 } 5662 END_TEST 5663 5664 /* Regression test for quadratic parsing on large tokens */ 5665 START_TEST(test_big_tokens_scale_linearly) { 5666 const struct { 5667 const char *pre; 5668 const char *post; 5669 } text[] = { 5670 {"<a>", "</a>"}, // assumed good, used as baseline 5671 {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch 5672 {"<c attr='", "'></c>"}, // big attribute, used to be O(N²) 5673 {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²) 5674 {"<e><", "/></e>"}, // big elem name, used to be O(N²) 5675 }; 5676 const int num_cases = sizeof(text) / sizeof(text[0]); 5677 char aaaaaa[4096]; 5678 const int fillsize = (int)sizeof(aaaaaa); 5679 const int fillcount = 100; 5680 const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post. 5681 const unsigned max_factor = 4; 5682 const unsigned max_scanned = max_factor * approx_bytes; 5683 5684 memset(aaaaaa, 'a', fillsize); 5685 5686 if (! g_reparseDeferralEnabledDefault) { 5687 return; // heuristic is disabled; we would get O(n^2) and fail. 5688 } 5689 5690 for (int i = 0; i < num_cases; ++i) { 5691 XML_Parser parser = XML_ParserCreate(NULL); 5692 assert_true(parser != NULL); 5693 enum XML_Status status; 5694 set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post); 5695 5696 // parse the start text 5697 g_bytesScanned = 0; 5698 status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre, 5699 (int)strlen(text[i].pre), XML_FALSE); 5700 if (status != XML_STATUS_OK) { 5701 xml_failure(parser); 5702 } 5703 5704 // parse lots of 'a', failing the test early if it takes too long 5705 unsigned past_max_count = 0; 5706 for (int f = 0; f < fillcount; ++f) { 5707 status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE); 5708 if (status != XML_STATUS_OK) { 5709 xml_failure(parser); 5710 } 5711 if (g_bytesScanned > max_scanned) { 5712 // We're not done, and have already passed the limit -- the test will 5713 // definitely fail. This block allows us to save time by failing early. 5714 const unsigned pushed 5715 = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize; 5716 fprintf( 5717 stderr, 5718 "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", 5719 f + 1, fillcount, pushed, g_bytesScanned, 5720 g_bytesScanned / (double)pushed, max_scanned, max_factor); 5721 past_max_count++; 5722 // We are failing, but allow a few log prints first. If we don't reach 5723 // a count of five, the test will fail after the loop instead. 5724 assert_true(past_max_count < 5); 5725 } 5726 } 5727 5728 // parse the end text 5729 status = _XML_Parse_SINGLE_BYTES(parser, text[i].post, 5730 (int)strlen(text[i].post), XML_TRUE); 5731 if (status != XML_STATUS_OK) { 5732 xml_failure(parser); 5733 } 5734 5735 assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working 5736 if (g_bytesScanned > max_scanned) { 5737 fprintf( 5738 stderr, 5739 "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", 5740 g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned, 5741 max_factor); 5742 fail("scanned too many bytes"); 5743 } 5744 5745 XML_ParserFree(parser); 5746 } 5747 } 5748 END_TEST 5749 5750 START_TEST(test_set_reparse_deferral) { 5751 const char *const pre = "<d>"; 5752 const char *const start = "<x attr='"; 5753 const char *const end = "'></x>"; 5754 char eeeeee[100]; 5755 const int fillsize = (int)sizeof(eeeeee); 5756 memset(eeeeee, 'e', fillsize); 5757 5758 for (int enabled = 0; enabled <= 1; enabled += 1) { 5759 set_subtest("deferral=%d", enabled); 5760 5761 XML_Parser parser = XML_ParserCreate(NULL); 5762 assert_true(parser != NULL); 5763 assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); 5764 // pre-grow the buffer to avoid reparsing due to almost-fullness 5765 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); 5766 5767 CharData storage; 5768 CharData_Init(&storage); 5769 XML_SetUserData(parser, &storage); 5770 XML_SetStartElementHandler(parser, start_element_event_handler); 5771 5772 enum XML_Status status; 5773 // parse the start text 5774 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 5775 if (status != XML_STATUS_OK) { 5776 xml_failure(parser); 5777 } 5778 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done 5779 5780 // ..and the start of the token 5781 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); 5782 if (status != XML_STATUS_OK) { 5783 xml_failure(parser); 5784 } 5785 CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one 5786 5787 // try to parse lots of 'e', but the token isn't finished 5788 for (int c = 0; c < 100; ++c) { 5789 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 5790 if (status != XML_STATUS_OK) { 5791 xml_failure(parser); 5792 } 5793 } 5794 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one 5795 5796 // end the <x> token. 5797 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 5798 if (status != XML_STATUS_OK) { 5799 xml_failure(parser); 5800 } 5801 5802 if (enabled) { 5803 // In general, we may need to push more data to trigger a reparse attempt, 5804 // but in this test, the data is constructed to always require it. 5805 CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect 5806 // 2x the token length should suffice; the +1 covers the start and end. 5807 for (int c = 0; c < 101; ++c) { 5808 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 5809 if (status != XML_STATUS_OK) { 5810 xml_failure(parser); 5811 } 5812 } 5813 } 5814 CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done 5815 5816 XML_ParserFree(parser); 5817 } 5818 } 5819 END_TEST 5820 5821 struct element_decl_data { 5822 XML_Parser parser; 5823 int count; 5824 }; 5825 5826 static void 5827 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) { 5828 UNUSED_P(name); 5829 struct element_decl_data *testdata = (struct element_decl_data *)userData; 5830 testdata->count += 1; 5831 XML_FreeContentModel(testdata->parser, model); 5832 } 5833 5834 static int 5835 external_inherited_parser(XML_Parser p, const XML_Char *context, 5836 const XML_Char *base, const XML_Char *systemId, 5837 const XML_Char *publicId) { 5838 UNUSED_P(base); 5839 UNUSED_P(systemId); 5840 UNUSED_P(publicId); 5841 const char *const pre = "<!ELEMENT document ANY>\n"; 5842 const char *const start = "<!ELEMENT "; 5843 const char *const end = " ANY>\n"; 5844 const char *const post = "<!ELEMENT xyz ANY>\n"; 5845 const int enabled = *(int *)XML_GetUserData(p); 5846 char eeeeee[100]; 5847 char spaces[100]; 5848 const int fillsize = (int)sizeof(eeeeee); 5849 assert_true(fillsize == (int)sizeof(spaces)); 5850 memset(eeeeee, 'e', fillsize); 5851 memset(spaces, ' ', fillsize); 5852 5853 XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL); 5854 assert_true(parser != NULL); 5855 // pre-grow the buffer to avoid reparsing due to almost-fullness 5856 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); 5857 5858 struct element_decl_data testdata; 5859 testdata.parser = parser; 5860 testdata.count = 0; 5861 XML_SetUserData(parser, &testdata); 5862 XML_SetElementDeclHandler(parser, element_decl_counter); 5863 5864 enum XML_Status status; 5865 // parse the initial text 5866 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 5867 if (status != XML_STATUS_OK) { 5868 xml_failure(parser); 5869 } 5870 assert_true(testdata.count == 1); // first element should be done 5871 5872 // ..and the start of the big token 5873 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); 5874 if (status != XML_STATUS_OK) { 5875 xml_failure(parser); 5876 } 5877 assert_true(testdata.count == 1); // still just the first one 5878 5879 // try to parse lots of 'e', but the token isn't finished 5880 for (int c = 0; c < 100; ++c) { 5881 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 5882 if (status != XML_STATUS_OK) { 5883 xml_failure(parser); 5884 } 5885 } 5886 assert_true(testdata.count == 1); // *still* just the first one 5887 5888 // end the big token. 5889 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 5890 if (status != XML_STATUS_OK) { 5891 xml_failure(parser); 5892 } 5893 5894 if (enabled) { 5895 // In general, we may need to push more data to trigger a reparse attempt, 5896 // but in this test, the data is constructed to always require it. 5897 assert_true(testdata.count == 1); // or the test is incorrect 5898 // 2x the token length should suffice; the +1 covers the start and end. 5899 for (int c = 0; c < 101; ++c) { 5900 status = XML_Parse(parser, spaces, fillsize, XML_FALSE); 5901 if (status != XML_STATUS_OK) { 5902 xml_failure(parser); 5903 } 5904 } 5905 } 5906 assert_true(testdata.count == 2); // the big token should be done 5907 5908 // parse the final text 5909 status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE); 5910 if (status != XML_STATUS_OK) { 5911 xml_failure(parser); 5912 } 5913 assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done 5914 5915 XML_ParserFree(parser); 5916 return XML_STATUS_OK; 5917 } 5918 5919 START_TEST(test_reparse_deferral_is_inherited) { 5920 const char *const text 5921 = "<!DOCTYPE document SYSTEM 'something.ext'><document/>"; 5922 for (int enabled = 0; enabled <= 1; ++enabled) { 5923 set_subtest("deferral=%d", enabled); 5924 5925 XML_Parser parser = XML_ParserCreate(NULL); 5926 assert_true(parser != NULL); 5927 XML_SetUserData(parser, (void *)&enabled); 5928 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5929 // this handler creates a sub-parser and checks that its deferral behavior 5930 // is what we expected, based on the value of `enabled` (in userdata). 5931 XML_SetExternalEntityRefHandler(parser, external_inherited_parser); 5932 assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); 5933 if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) 5934 xml_failure(parser); 5935 5936 XML_ParserFree(parser); 5937 } 5938 } 5939 END_TEST 5940 5941 START_TEST(test_set_reparse_deferral_on_null_parser) { 5942 assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE); 5943 assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE); 5944 assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE); 5945 assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE); 5946 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN) 5947 == XML_FALSE); 5948 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX) 5949 == XML_FALSE); 5950 } 5951 END_TEST 5952 5953 START_TEST(test_set_reparse_deferral_on_the_fly) { 5954 const char *const pre = "<d><x attr='"; 5955 const char *const end = "'></x>"; 5956 char iiiiii[100]; 5957 const int fillsize = (int)sizeof(iiiiii); 5958 memset(iiiiii, 'i', fillsize); 5959 5960 XML_Parser parser = XML_ParserCreate(NULL); 5961 assert_true(parser != NULL); 5962 assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE)); 5963 5964 CharData storage; 5965 CharData_Init(&storage); 5966 XML_SetUserData(parser, &storage); 5967 XML_SetStartElementHandler(parser, start_element_event_handler); 5968 5969 enum XML_Status status; 5970 // parse the start text 5971 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 5972 if (status != XML_STATUS_OK) { 5973 xml_failure(parser); 5974 } 5975 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done 5976 5977 // try to parse some 'i', but the token isn't finished 5978 status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE); 5979 if (status != XML_STATUS_OK) { 5980 xml_failure(parser); 5981 } 5982 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one 5983 5984 // end the <x> token. 5985 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 5986 if (status != XML_STATUS_OK) { 5987 xml_failure(parser); 5988 } 5989 CharData_CheckXMLChars(&storage, XCS("d")); // not yet. 5990 5991 // now change the heuristic setting and add *no* data 5992 assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE)); 5993 // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic. 5994 status = XML_Parse(parser, "", 0, XML_FALSE); 5995 if (status != XML_STATUS_OK) { 5996 xml_failure(parser); 5997 } 5998 CharData_CheckXMLChars(&storage, XCS("dx")); 5999 6000 XML_ParserFree(parser); 6001 } 6002 END_TEST 6003 6004 START_TEST(test_set_bad_reparse_option) { 6005 XML_Parser parser = XML_ParserCreate(NULL); 6006 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2)); 6007 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3)); 6008 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99)); 6009 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127)); 6010 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128)); 6011 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129)); 6012 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255)); 6013 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0)); 6014 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1)); 6015 XML_ParserFree(parser); 6016 } 6017 END_TEST 6018 6019 static size_t g_totalAlloc = 0; 6020 static size_t g_biggestAlloc = 0; 6021 6022 static void * 6023 counting_realloc(void *ptr, size_t size) { 6024 g_totalAlloc += size; 6025 if (size > g_biggestAlloc) { 6026 g_biggestAlloc = size; 6027 } 6028 return realloc(ptr, size); 6029 } 6030 6031 static void * 6032 counting_malloc(size_t size) { 6033 return counting_realloc(NULL, size); 6034 } 6035 6036 START_TEST(test_bypass_heuristic_when_close_to_bufsize) { 6037 if (g_chunkSize != 0) { 6038 // this test does not use SINGLE_BYTES, because it depends on very precise 6039 // buffer fills. 6040 return; 6041 } 6042 if (! g_reparseDeferralEnabledDefault) { 6043 return; // this test is irrelevant when the deferral heuristic is disabled. 6044 } 6045 6046 const int document_length = 65536; 6047 char *const document = (char *)malloc(document_length); 6048 assert_true(document != NULL); 6049 6050 const XML_Memory_Handling_Suite memfuncs = { 6051 counting_malloc, 6052 counting_realloc, 6053 free, 6054 }; 6055 6056 const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1}; 6057 const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1}; 6058 const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1}; 6059 6060 for (const int *leading = leading_list; *leading >= 0; leading++) { 6061 for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) { 6062 for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) { 6063 set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken, 6064 *fillsize); 6065 // start by checking that the test looks reasonably valid 6066 assert_true(*leading + *bigtoken <= document_length); 6067 6068 // put 'x' everywhere; some will be overwritten by elements. 6069 memset(document, 'x', document_length); 6070 // maybe add an initial tag 6071 if (*leading) { 6072 assert_true(*leading >= 3); // or the test case is invalid 6073 memcpy(document, "<a>", 3); 6074 } 6075 // add the large token 6076 document[*leading + 0] = '<'; 6077 document[*leading + 1] = 'b'; 6078 memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token 6079 document[*leading + *bigtoken - 1] = '>'; 6080 6081 // 1 for 'b', plus 1 or 0 depending on the presence of 'a' 6082 const int expected_elem_total = 1 + (*leading ? 1 : 0); 6083 6084 XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL); 6085 assert_true(parser != NULL); 6086 6087 CharData storage; 6088 CharData_Init(&storage); 6089 XML_SetUserData(parser, &storage); 6090 XML_SetStartElementHandler(parser, start_element_event_handler); 6091 6092 g_biggestAlloc = 0; 6093 g_totalAlloc = 0; 6094 int offset = 0; 6095 // fill data until the big token is covered (but not necessarily parsed) 6096 while (offset < *leading + *bigtoken) { 6097 assert_true(offset + *fillsize <= document_length); 6098 const enum XML_Status status 6099 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 6100 if (status != XML_STATUS_OK) { 6101 xml_failure(parser); 6102 } 6103 offset += *fillsize; 6104 } 6105 // Now, check that we've had a buffer allocation that could fit the 6106 // context bytes and our big token. In order to detect a special case, 6107 // we need to know how many bytes of our big token were included in the 6108 // first push that contained _any_ bytes of the big token: 6109 const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize); 6110 if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) { 6111 // Special case: we aren't saving any context, and the whole big token 6112 // was covered by a single fill, so Expat may have parsed directly 6113 // from our input pointer, without allocating an internal buffer. 6114 } else if (*leading < XML_CONTEXT_BYTES) { 6115 assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken); 6116 } else { 6117 assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken); 6118 } 6119 // fill data until the big token is actually parsed 6120 while (storage.count < expected_elem_total) { 6121 const size_t alloc_before = g_totalAlloc; 6122 assert_true(offset + *fillsize <= document_length); 6123 const enum XML_Status status 6124 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 6125 if (status != XML_STATUS_OK) { 6126 xml_failure(parser); 6127 } 6128 offset += *fillsize; 6129 // since all the bytes of the big token are already in the buffer, 6130 // the bufsize ceiling should make us finish its parsing without any 6131 // further buffer allocations. We assume that there will be no other 6132 // large allocations in this test. 6133 assert_true(g_totalAlloc - alloc_before < 4096); 6134 } 6135 // test-the-test: was our alloc even called? 6136 assert_true(g_totalAlloc > 0); 6137 // test-the-test: there shouldn't be any extra start elements 6138 assert_true(storage.count == expected_elem_total); 6139 6140 XML_ParserFree(parser); 6141 } 6142 } 6143 } 6144 free(document); 6145 } 6146 END_TEST 6147 6148 START_TEST(test_varying_buffer_fills) { 6149 const int KiB = 1024; 6150 const int MiB = 1024 * KiB; 6151 const int document_length = 16 * MiB; 6152 const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB 6153 6154 if (g_chunkSize != 0) { 6155 return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES(). 6156 } 6157 6158 char *const document = (char *)malloc(document_length); 6159 assert_true(document != NULL); 6160 memset(document, 'x', document_length); 6161 document[0] = '<'; 6162 document[1] = 't'; 6163 memset(&document[2], ' ', big - 2); // a very spacy token 6164 document[big - 1] = '>'; 6165 6166 // Each testcase is a list of buffer fill sizes, terminated by a value < 0. 6167 // When reparse deferral is enabled, the final (negated) value is the expected 6168 // maximum number of bytes scanned in parse attempts. 6169 const int testcases[][30] = { 6170 {8 * MiB, -8 * MiB}, 6171 {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total 6172 // zero-size fills shouldn't trigger the bypass 6173 {4 * MiB, 0, 4 * MiB, -12 * MiB}, 6174 {4 * MiB, 0, 0, 4 * MiB, -12 * MiB}, 6175 {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB}, 6176 // try to hit the buffer ceiling only once (at the end) 6177 {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB}, 6178 // try to hit the same buffer ceiling multiple times 6179 {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB}, 6180 6181 // try to hit every ceiling, by always landing 1K shy of the buffer size 6182 {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB, 6183 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB}, 6184 6185 // try to avoid every ceiling, by always landing 1B past the buffer size 6186 // the normal 2x heuristic threshold still forces parse attempts. 6187 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 6188 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 6189 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 6190 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 6191 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 6192 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 6193 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7 6194 -(10 * MiB + 682 * KiB + 7)}, 6195 // try to avoid every ceiling again, except on our last fill. 6196 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 6197 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 6198 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 6199 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 6200 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 6201 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 6202 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6 6203 -(10 * MiB + 682 * KiB + 6)}, 6204 6205 // try to hit ceilings on the way multiple times 6206 {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer 6207 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer 6208 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer 6209 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer 6210 // we'll make a parse attempt at every parse call 6211 -(45 * MiB + 12)}, 6212 }; 6213 const int testcount = sizeof(testcases) / sizeof(testcases[0]); 6214 for (int test_i = 0; test_i < testcount; test_i++) { 6215 const int *fillsize = testcases[test_i]; 6216 set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1], 6217 fillsize[2], fillsize[3]); 6218 XML_Parser parser = XML_ParserCreate(NULL); 6219 assert_true(parser != NULL); 6220 6221 CharData storage; 6222 CharData_Init(&storage); 6223 XML_SetUserData(parser, &storage); 6224 XML_SetStartElementHandler(parser, start_element_event_handler); 6225 6226 g_bytesScanned = 0; 6227 int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call) 6228 int offset = 0; 6229 while (*fillsize >= 0) { 6230 assert_true(offset + *fillsize <= document_length); // or test is invalid 6231 const enum XML_Status status 6232 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 6233 if (status != XML_STATUS_OK) { 6234 xml_failure(parser); 6235 } 6236 offset += *fillsize; 6237 fillsize++; 6238 assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow 6239 worstcase_bytes += offset; // we might've tried to parse all pending bytes 6240 } 6241 assert_true(storage.count == 1); // the big token should've been parsed 6242 assert_true(g_bytesScanned > 0); // test-the-test: does our counter work? 6243 if (g_reparseDeferralEnabledDefault) { 6244 // heuristic is enabled; some XML_Parse calls may have deferred reparsing 6245 const unsigned max_bytes_scanned = -*fillsize; 6246 if (g_bytesScanned > max_bytes_scanned) { 6247 fprintf(stderr, 6248 "bytes scanned in parse attempts: actual=%u limit=%u \n", 6249 g_bytesScanned, max_bytes_scanned); 6250 fail("too many bytes scanned in parse attempts"); 6251 } 6252 } 6253 assert_true(g_bytesScanned <= (unsigned)worstcase_bytes); 6254 6255 XML_ParserFree(parser); 6256 } 6257 free(document); 6258 } 6259 END_TEST 6260 6261 START_TEST(test_empty_ext_param_entity_in_value) { 6262 const char *text = "<!DOCTYPE r SYSTEM \"ext.dtd\"><r/>"; 6263 ExtOption options[] = { 6264 {XCS("ext.dtd"), "<!ENTITY % pe SYSTEM \"empty\">" 6265 "<!ENTITY ge \"%pe;\">"}, 6266 {XCS("empty"), ""}, 6267 {NULL, NULL}, 6268 }; 6269 6270 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 6271 XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner); 6272 XML_SetUserData(g_parser, options); 6273 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 6274 == XML_STATUS_ERROR) 6275 xml_failure(g_parser); 6276 } 6277 END_TEST 6278 6279 void 6280 make_basic_test_case(Suite *s) { 6281 TCase *tc_basic = tcase_create("basic tests"); 6282 6283 suite_add_tcase(s, tc_basic); 6284 tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown); 6285 6286 tcase_add_test(tc_basic, test_nul_byte); 6287 tcase_add_test(tc_basic, test_u0000_char); 6288 tcase_add_test(tc_basic, test_siphash_self); 6289 tcase_add_test(tc_basic, test_siphash_spec); 6290 tcase_add_test(tc_basic, test_bom_utf8); 6291 tcase_add_test(tc_basic, test_bom_utf16_be); 6292 tcase_add_test(tc_basic, test_bom_utf16_le); 6293 tcase_add_test(tc_basic, test_nobom_utf16_le); 6294 tcase_add_test(tc_basic, test_hash_collision); 6295 tcase_add_test(tc_basic, test_illegal_utf8); 6296 tcase_add_test(tc_basic, test_utf8_auto_align); 6297 tcase_add_test(tc_basic, test_utf16); 6298 tcase_add_test(tc_basic, test_utf16_le_epilog_newline); 6299 tcase_add_test(tc_basic, test_not_utf16); 6300 tcase_add_test(tc_basic, test_bad_encoding); 6301 tcase_add_test(tc_basic, test_latin1_umlauts); 6302 tcase_add_test(tc_basic, test_long_utf8_character); 6303 tcase_add_test(tc_basic, test_long_latin1_attribute); 6304 tcase_add_test(tc_basic, test_long_ascii_attribute); 6305 /* Regression test for SF bug #491986. */ 6306 tcase_add_test(tc_basic, test_danish_latin1); 6307 /* Regression test for SF bug #514281. */ 6308 tcase_add_test(tc_basic, test_french_charref_hexidecimal); 6309 tcase_add_test(tc_basic, test_french_charref_decimal); 6310 tcase_add_test(tc_basic, test_french_latin1); 6311 tcase_add_test(tc_basic, test_french_utf8); 6312 tcase_add_test(tc_basic, test_utf8_false_rejection); 6313 tcase_add_test(tc_basic, test_line_number_after_parse); 6314 tcase_add_test(tc_basic, test_column_number_after_parse); 6315 tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers); 6316 tcase_add_test(tc_basic, test_line_number_after_error); 6317 tcase_add_test(tc_basic, test_column_number_after_error); 6318 tcase_add_test(tc_basic, test_really_long_lines); 6319 tcase_add_test(tc_basic, test_really_long_encoded_lines); 6320 tcase_add_test(tc_basic, test_end_element_events); 6321 tcase_add_test(tc_basic, test_helper_is_whitespace_normalized); 6322 tcase_add_test(tc_basic, test_attr_whitespace_normalization); 6323 tcase_add_test(tc_basic, test_xmldecl_misplaced); 6324 tcase_add_test(tc_basic, test_xmldecl_invalid); 6325 tcase_add_test(tc_basic, test_xmldecl_missing_attr); 6326 tcase_add_test(tc_basic, test_xmldecl_missing_value); 6327 tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity); 6328 tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity); 6329 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding); 6330 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler); 6331 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom); 6332 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding); 6333 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2); 6334 tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset); 6335 tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset); 6336 tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone); 6337 tcase_add_test(tc_basic, 6338 test_wfc_undeclared_entity_with_external_subset_standalone); 6339 tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone); 6340 tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset); 6341 tcase_add_test(tc_basic, test_not_standalone_handler_reject); 6342 tcase_add_test(tc_basic, test_not_standalone_handler_accept); 6343 tcase_add_test(tc_basic, test_entity_start_tag_level_greater_than_one); 6344 tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs); 6345 tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs); 6346 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse); 6347 tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling); 6348 tcase_add_test(tc_basic, test_dtd_attr_handling); 6349 tcase_add_test(tc_basic, test_empty_ns_without_namespaces); 6350 tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces); 6351 tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls); 6352 tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls); 6353 tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls); 6354 tcase_add_test(tc_basic, test_good_cdata_ascii); 6355 tcase_add_test(tc_basic, test_good_cdata_utf16); 6356 tcase_add_test(tc_basic, test_good_cdata_utf16_le); 6357 tcase_add_test(tc_basic, test_long_cdata_utf16); 6358 tcase_add_test(tc_basic, test_multichar_cdata_utf16); 6359 tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair); 6360 tcase_add_test(tc_basic, test_bad_cdata); 6361 tcase_add_test(tc_basic, test_bad_cdata_utf16); 6362 tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls); 6363 tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls); 6364 tcase_add_test(tc_basic, test_memory_allocation); 6365 tcase_add_test__if_xml_ge(tc_basic, test_default_current); 6366 tcase_add_test(tc_basic, test_dtd_elements); 6367 tcase_add_test(tc_basic, test_dtd_elements_nesting); 6368 tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd); 6369 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone); 6370 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd); 6371 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype); 6372 tcase_add_test__ifdef_xml_dtd(tc_basic, 6373 test_foreign_dtd_without_external_subset); 6374 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd); 6375 tcase_add_test(tc_basic, test_set_base); 6376 tcase_add_test(tc_basic, test_attributes); 6377 tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity); 6378 tcase_add_test(tc_basic, test_resume_invalid_parse); 6379 tcase_add_test(tc_basic, test_resume_resuspended); 6380 tcase_add_test(tc_basic, test_cdata_default); 6381 tcase_add_test(tc_basic, test_subordinate_reset); 6382 tcase_add_test(tc_basic, test_subordinate_suspend); 6383 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend); 6384 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort); 6385 tcase_add_test__ifdef_xml_dtd(tc_basic, 6386 test_ext_entity_invalid_suspended_parse); 6387 tcase_add_test(tc_basic, test_explicit_encoding); 6388 tcase_add_test(tc_basic, test_trailing_cr); 6389 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr); 6390 tcase_add_test(tc_basic, test_trailing_rsqb); 6391 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb); 6392 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata); 6393 tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters); 6394 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter); 6395 tcase_add_test(tc_basic, test_empty_parse); 6396 tcase_add_test(tc_basic, test_negative_len_parse); 6397 tcase_add_test(tc_basic, test_negative_len_parse_buffer); 6398 tcase_add_test(tc_basic, test_get_buffer_1); 6399 tcase_add_test(tc_basic, test_get_buffer_2); 6400 #if XML_CONTEXT_BYTES > 0 6401 tcase_add_test(tc_basic, test_get_buffer_3_overflow); 6402 #endif 6403 tcase_add_test(tc_basic, test_buffer_can_grow_to_max); 6404 tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len); 6405 tcase_add_test(tc_basic, test_byte_info_at_end); 6406 tcase_add_test(tc_basic, test_byte_info_at_error); 6407 tcase_add_test(tc_basic, test_byte_info_at_cdata); 6408 tcase_add_test(tc_basic, test_predefined_entities); 6409 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd); 6410 tcase_add_test(tc_basic, test_not_predefined_entities); 6411 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section); 6412 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16); 6413 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be); 6414 tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section); 6415 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed); 6416 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values); 6417 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone); 6418 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort); 6419 tcase_add_test(tc_basic, test_bad_public_doctype); 6420 tcase_add_test(tc_basic, test_attribute_enum_value); 6421 tcase_add_test(tc_basic, test_predefined_entity_redefinition); 6422 tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing); 6423 tcase_add_test(tc_basic, test_public_notation_no_sysid); 6424 tcase_add_test(tc_basic, test_nested_groups); 6425 tcase_add_test(tc_basic, test_group_choice); 6426 tcase_add_test(tc_basic, test_standalone_parameter_entity); 6427 tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity); 6428 tcase_add_test__ifdef_xml_dtd(tc_basic, 6429 test_recursive_external_parameter_entity); 6430 tcase_add_test__ifdef_xml_dtd(tc_basic, 6431 test_recursive_external_parameter_entity_2); 6432 tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd); 6433 tcase_add_test(tc_basic, test_suspend_xdecl); 6434 tcase_add_test(tc_basic, test_abort_epilog); 6435 tcase_add_test(tc_basic, test_abort_epilog_2); 6436 tcase_add_test(tc_basic, test_suspend_epilog); 6437 tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag); 6438 tcase_add_test(tc_basic, test_unfinished_epilog); 6439 tcase_add_test(tc_basic, test_partial_char_in_epilog); 6440 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity); 6441 tcase_add_test__ifdef_xml_dtd(tc_basic, 6442 test_suspend_resume_internal_entity_issue_629); 6443 tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error); 6444 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity); 6445 tcase_add_test(tc_basic, test_restart_on_error); 6446 tcase_add_test(tc_basic, test_reject_lt_in_attribute_value); 6447 tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value); 6448 tcase_add_test(tc_basic, test_trailing_cr_in_att_value); 6449 tcase_add_test(tc_basic, test_standalone_internal_entity); 6450 tcase_add_test(tc_basic, test_skipped_external_entity); 6451 tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity); 6452 tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity); 6453 tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr); 6454 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity); 6455 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2); 6456 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3); 6457 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4); 6458 tcase_add_test(tc_basic, test_pi_handled_in_default); 6459 tcase_add_test(tc_basic, test_comment_handled_in_default); 6460 tcase_add_test(tc_basic, test_pi_yml); 6461 tcase_add_test(tc_basic, test_pi_xnl); 6462 tcase_add_test(tc_basic, test_pi_xmm); 6463 tcase_add_test(tc_basic, test_utf16_pi); 6464 tcase_add_test(tc_basic, test_utf16_be_pi); 6465 tcase_add_test(tc_basic, test_utf16_be_comment); 6466 tcase_add_test(tc_basic, test_utf16_le_comment); 6467 tcase_add_test(tc_basic, test_missing_encoding_conversion_fn); 6468 tcase_add_test(tc_basic, test_failing_encoding_conversion_fn); 6469 tcase_add_test(tc_basic, test_unknown_encoding_success); 6470 tcase_add_test(tc_basic, test_unknown_encoding_bad_name); 6471 tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2); 6472 tcase_add_test(tc_basic, test_unknown_encoding_long_name_1); 6473 tcase_add_test(tc_basic, test_unknown_encoding_long_name_2); 6474 tcase_add_test(tc_basic, test_invalid_unknown_encoding); 6475 tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok); 6476 tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail); 6477 tcase_add_test(tc_basic, test_unknown_encoding_invalid_length); 6478 tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit); 6479 tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate); 6480 tcase_add_test(tc_basic, test_unknown_encoding_invalid_high); 6481 tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value); 6482 tcase_add_test(tc_basic, test_unknown_encoding_user_data_primary); 6483 tcase_add_test(tc_basic, test_unknown_encoding_user_data_secondary); 6484 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom); 6485 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom); 6486 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2); 6487 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2); 6488 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be); 6489 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le); 6490 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown); 6491 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom); 6492 tcase_add_test(tc_basic, test_utf8_in_cdata_section); 6493 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2); 6494 tcase_add_test(tc_basic, test_utf8_in_start_tags); 6495 tcase_add_test(tc_basic, test_trailing_spaces_in_elements); 6496 tcase_add_test(tc_basic, test_utf16_attribute); 6497 tcase_add_test(tc_basic, test_utf16_second_attr); 6498 tcase_add_test(tc_basic, test_attr_after_solidus); 6499 tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe); 6500 tcase_add_test(tc_basic, test_bad_attr_desc_keyword); 6501 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16); 6502 tcase_add_test(tc_basic, test_bad_doctype); 6503 tcase_add_test(tc_basic, test_bad_doctype_utf8); 6504 tcase_add_test(tc_basic, test_bad_doctype_utf16); 6505 tcase_add_test(tc_basic, test_bad_doctype_plus); 6506 tcase_add_test(tc_basic, test_bad_doctype_star); 6507 tcase_add_test(tc_basic, test_bad_doctype_query); 6508 tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore); 6509 tcase_add_test(tc_basic, test_entity_in_utf16_be_attr); 6510 tcase_add_test(tc_basic, test_entity_in_utf16_le_attr); 6511 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be); 6512 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le); 6513 tcase_add_test(tc_basic, test_short_doctype); 6514 tcase_add_test(tc_basic, test_short_doctype_2); 6515 tcase_add_test(tc_basic, test_short_doctype_3); 6516 tcase_add_test(tc_basic, test_long_doctype); 6517 tcase_add_test(tc_basic, test_bad_entity); 6518 tcase_add_test(tc_basic, test_bad_entity_2); 6519 tcase_add_test(tc_basic, test_bad_entity_3); 6520 tcase_add_test(tc_basic, test_bad_entity_4); 6521 tcase_add_test(tc_basic, test_bad_notation); 6522 tcase_add_test(tc_basic, test_default_doctype_handler); 6523 tcase_add_test(tc_basic, test_empty_element_abort); 6524 tcase_add_test__ifdef_xml_dtd(tc_basic, 6525 test_pool_integrity_with_unfinished_attr); 6526 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_ext_param_entity_in_value); 6527 tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements); 6528 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity); 6529 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity); 6530 tcase_add_test__if_xml_ge(tc_basic, 6531 test_deep_nested_entity_delayed_interpretation); 6532 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend); 6533 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2); 6534 tcase_add_test(tc_basic, test_big_tokens_scale_linearly); 6535 tcase_add_test(tc_basic, test_set_reparse_deferral); 6536 tcase_add_test(tc_basic, test_reparse_deferral_is_inherited); 6537 tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser); 6538 tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly); 6539 tcase_add_test(tc_basic, test_set_bad_reparse_option); 6540 tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize); 6541 tcase_add_test(tc_basic, test_varying_buffer_fills); 6542 } 6543