1 /* Tests in the "basic" test case for the Expat test suite 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> 11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> 13 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> 14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 15 Copyright (c) 2017 Joe Orton <jorton@redhat.com> 16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> 17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com> 20 Copyright (c) 2021 Donghee Na <donghee.na@python.org> 21 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com> 22 Licensed under the MIT license: 23 24 Permission is hereby granted, free of charge, to any person obtaining 25 a copy of this software and associated documentation files (the 26 "Software"), to deal in the Software without restriction, including 27 without limitation the rights to use, copy, modify, merge, publish, 28 distribute, sublicense, and/or sell copies of the Software, and to permit 29 persons to whom the Software is furnished to do so, subject to the 30 following conditions: 31 32 The above copyright notice and this permission notice shall be included 33 in all copies or substantial portions of the Software. 34 35 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 36 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 37 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 38 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 39 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 40 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 41 USE OR OTHER DEALINGS IN THE SOFTWARE. 42 */ 43 44 #if defined(NDEBUG) 45 # undef NDEBUG /* because test suite relies on assert(...) at the moment */ 46 #endif 47 48 #include <assert.h> 49 50 #include <stdio.h> 51 #include <string.h> 52 #include <time.h> 53 54 #if ! defined(__cplusplus) 55 # include <stdbool.h> 56 #endif 57 58 #include "expat_config.h" 59 60 #include "expat.h" 61 #include "internal.h" 62 #include "minicheck.h" 63 #include "structdata.h" 64 #include "common.h" 65 #include "dummy.h" 66 #include "handlers.h" 67 #include "siphash.h" 68 #include "basic_tests.h" 69 70 static void 71 basic_setup(void) { 72 g_parser = XML_ParserCreate(NULL); 73 if (g_parser == NULL) 74 fail("Parser not created."); 75 } 76 77 /* 78 * Character & encoding tests. 79 */ 80 81 START_TEST(test_nul_byte) { 82 char text[] = "<doc>\0</doc>"; 83 84 /* test that a NUL byte (in US-ASCII data) is an error */ 85 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 86 == XML_STATUS_OK) 87 fail("Parser did not report error on NUL-byte."); 88 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 89 xml_failure(g_parser); 90 } 91 END_TEST 92 93 START_TEST(test_u0000_char) { 94 /* test that a NUL byte (in US-ASCII data) is an error */ 95 expect_failure("<doc>�</doc>", XML_ERROR_BAD_CHAR_REF, 96 "Parser did not report error on NUL-byte."); 97 } 98 END_TEST 99 100 START_TEST(test_siphash_self) { 101 if (! sip24_valid()) 102 fail("SipHash self-test failed"); 103 } 104 END_TEST 105 106 START_TEST(test_siphash_spec) { 107 /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */ 108 const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" 109 "\x0a\x0b\x0c\x0d\x0e"; 110 const size_t len = sizeof(message) - 1; 111 const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U); 112 struct siphash state; 113 struct sipkey key; 114 115 sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" 116 "\x0a\x0b\x0c\x0d\x0e\x0f"); 117 sip24_init(&state, &key); 118 119 /* Cover spread across calls */ 120 sip24_update(&state, message, 4); 121 sip24_update(&state, message + 4, len - 4); 122 123 /* Cover null length */ 124 sip24_update(&state, message, 0); 125 126 if (sip24_final(&state) != expected) 127 fail("sip24_final failed spec test\n"); 128 129 /* Cover wrapper */ 130 if (siphash24(message, len, &key) != expected) 131 fail("siphash24 failed spec test\n"); 132 } 133 END_TEST 134 135 START_TEST(test_bom_utf8) { 136 /* This test is really just making sure we don't core on a UTF-8 BOM. */ 137 const char *text = "\357\273\277<e/>"; 138 139 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 140 == XML_STATUS_ERROR) 141 xml_failure(g_parser); 142 } 143 END_TEST 144 145 START_TEST(test_bom_utf16_be) { 146 char text[] = "\376\377\0<\0e\0/\0>"; 147 148 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 149 == XML_STATUS_ERROR) 150 xml_failure(g_parser); 151 } 152 END_TEST 153 154 START_TEST(test_bom_utf16_le) { 155 char text[] = "\377\376<\0e\0/\0>\0"; 156 157 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 158 == XML_STATUS_ERROR) 159 xml_failure(g_parser); 160 } 161 END_TEST 162 163 START_TEST(test_nobom_utf16_le) { 164 char text[] = " \0<\0e\0/\0>\0"; 165 166 if (g_chunkSize == 1) { 167 // TODO: with just the first byte, we can't tell the difference between 168 // UTF-16-LE and UTF-8. Avoid the failure for now. 169 return; 170 } 171 172 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 173 == XML_STATUS_ERROR) 174 xml_failure(g_parser); 175 } 176 END_TEST 177 178 START_TEST(test_hash_collision) { 179 /* For full coverage of the lookup routine, we need to ensure a 180 * hash collision even though we can only tell that we have one 181 * through breakpoint debugging or coverage statistics. The 182 * following will cause a hash collision on machines with a 64-bit 183 * long type; others will have to experiment. The full coverage 184 * tests invoked from qa.sh usually provide a hash collision, but 185 * not always. This is an attempt to provide insurance. 186 */ 187 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U) 188 const char *text 189 = "<doc>\n" 190 "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n" 191 "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n" 192 "<b5></b5><b6></b6><b7></b7><b8></b8>\n" 193 "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n" 194 "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n" 195 "<d8>This triggers the table growth and collides with b2</d8>\n" 196 "</doc>\n"; 197 198 XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT); 199 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 200 == XML_STATUS_ERROR) 201 xml_failure(g_parser); 202 } 203 END_TEST 204 #undef COLLIDING_HASH_SALT 205 206 /* Regression test for SF bug #491986. */ 207 START_TEST(test_danish_latin1) { 208 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 209 "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>"; 210 #ifdef XML_UNICODE 211 const XML_Char *expected 212 = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5"); 213 #else 214 const XML_Char *expected 215 = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85"); 216 #endif 217 run_character_check(text, expected); 218 } 219 END_TEST 220 221 /* Regression test for SF bug #514281. */ 222 START_TEST(test_french_charref_hexidecimal) { 223 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 224 "<doc>éèàçêÈ</doc>"; 225 #ifdef XML_UNICODE 226 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 227 #else 228 const XML_Char *expected 229 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 230 #endif 231 run_character_check(text, expected); 232 } 233 END_TEST 234 235 START_TEST(test_french_charref_decimal) { 236 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 237 "<doc>éèàçêÈ</doc>"; 238 #ifdef XML_UNICODE 239 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 240 #else 241 const XML_Char *expected 242 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 243 #endif 244 run_character_check(text, expected); 245 } 246 END_TEST 247 248 START_TEST(test_french_latin1) { 249 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 250 "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>"; 251 #ifdef XML_UNICODE 252 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 253 #else 254 const XML_Char *expected 255 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 256 #endif 257 run_character_check(text, expected); 258 } 259 END_TEST 260 261 START_TEST(test_french_utf8) { 262 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n" 263 "<doc>\xC3\xA9</doc>"; 264 #ifdef XML_UNICODE 265 const XML_Char *expected = XCS("\x00e9"); 266 #else 267 const XML_Char *expected = XCS("\xC3\xA9"); 268 #endif 269 run_character_check(text, expected); 270 } 271 END_TEST 272 273 /* Regression test for SF bug #600479. 274 XXX There should be a test that exercises all legal XML Unicode 275 characters as PCDATA and attribute value content, and XML Name 276 characters as part of element and attribute names. 277 */ 278 START_TEST(test_utf8_false_rejection) { 279 const char *text = "<doc>\xEF\xBA\xBF</doc>"; 280 #ifdef XML_UNICODE 281 const XML_Char *expected = XCS("\xfebf"); 282 #else 283 const XML_Char *expected = XCS("\xEF\xBA\xBF"); 284 #endif 285 run_character_check(text, expected); 286 } 287 END_TEST 288 289 /* Regression test for SF bug #477667. 290 This test assures that any 8-bit character followed by a 7-bit 291 character will not be mistakenly interpreted as a valid UTF-8 292 sequence. 293 */ 294 START_TEST(test_illegal_utf8) { 295 char text[100]; 296 int i; 297 298 for (i = 128; i <= 255; ++i) { 299 snprintf(text, sizeof(text), "<e>%ccd</e>", i); 300 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 301 == XML_STATUS_OK) { 302 snprintf(text, sizeof(text), 303 "expected token error for '%c' (ordinal %d) in UTF-8 text", i, 304 i); 305 fail(text); 306 } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 307 xml_failure(g_parser); 308 /* Reset the parser since we use the same parser repeatedly. */ 309 XML_ParserReset(g_parser, NULL); 310 } 311 } 312 END_TEST 313 314 /* Examples, not masks: */ 315 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */ 316 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */ 317 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */ 318 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */ 319 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */ 320 321 START_TEST(test_utf8_auto_align) { 322 struct TestCase { 323 ptrdiff_t expectedMovementInChars; 324 const char *input; 325 }; 326 327 struct TestCase cases[] = { 328 {00, ""}, 329 330 {00, UTF8_LEAD_1}, 331 332 {-1, UTF8_LEAD_2}, 333 {00, UTF8_LEAD_2 UTF8_FOLLOW}, 334 335 {-1, UTF8_LEAD_3}, 336 {-2, UTF8_LEAD_3 UTF8_FOLLOW}, 337 {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW}, 338 339 {-1, UTF8_LEAD_4}, 340 {-2, UTF8_LEAD_4 UTF8_FOLLOW}, 341 {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW}, 342 {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW}, 343 }; 344 345 size_t i = 0; 346 bool success = true; 347 for (; i < sizeof(cases) / sizeof(*cases); i++) { 348 const char *fromLim = cases[i].input + strlen(cases[i].input); 349 const char *const fromLimInitially = fromLim; 350 ptrdiff_t actualMovementInChars; 351 352 _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim); 353 354 actualMovementInChars = (fromLim - fromLimInitially); 355 if (actualMovementInChars != cases[i].expectedMovementInChars) { 356 size_t j = 0; 357 success = false; 358 printf("[-] UTF-8 case %2u: Expected movement by %2d chars" 359 ", actually moved by %2d chars: \"", 360 (unsigned)(i + 1), (int)cases[i].expectedMovementInChars, 361 (int)actualMovementInChars); 362 for (; j < strlen(cases[i].input); j++) { 363 printf("\\x%02x", (unsigned char)cases[i].input[j]); 364 } 365 printf("\"\n"); 366 } 367 } 368 369 if (! success) { 370 fail("UTF-8 auto-alignment is not bullet-proof\n"); 371 } 372 } 373 END_TEST 374 375 START_TEST(test_utf16) { 376 /* <?xml version="1.0" encoding="UTF-16"?> 377 * <doc a='123'>some {A} text</doc> 378 * 379 * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A 380 */ 381 char text[] 382 = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o" 383 "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o" 384 "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066" 385 "\000'\000?\000>\000\n" 386 "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>" 387 "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000" 388 "<\000/\000d\000o\000c\000>"; 389 #ifdef XML_UNICODE 390 const XML_Char *expected = XCS("some \xff21 text"); 391 #else 392 const XML_Char *expected = XCS("some \357\274\241 text"); 393 #endif 394 CharData storage; 395 396 CharData_Init(&storage); 397 XML_SetUserData(g_parser, &storage); 398 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 399 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 400 == XML_STATUS_ERROR) 401 xml_failure(g_parser); 402 CharData_CheckXMLChars(&storage, expected); 403 } 404 END_TEST 405 406 START_TEST(test_utf16_le_epilog_newline) { 407 unsigned int first_chunk_bytes = 17; 408 char text[] = "\xFF\xFE" /* BOM */ 409 "<\000e\000/\000>\000" /* document element */ 410 "\r\000\n\000\r\000\n\000"; /* epilog */ 411 412 if (first_chunk_bytes >= sizeof(text) - 1) 413 fail("bad value of first_chunk_bytes"); 414 if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE) 415 == XML_STATUS_ERROR) 416 xml_failure(g_parser); 417 else { 418 enum XML_Status rc; 419 rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes, 420 sizeof(text) - first_chunk_bytes - 1, 421 XML_TRUE); 422 if (rc == XML_STATUS_ERROR) 423 xml_failure(g_parser); 424 } 425 } 426 END_TEST 427 428 /* Test that an outright lie in the encoding is faulted */ 429 START_TEST(test_not_utf16) { 430 const char *text = "<?xml version='1.0' encoding='utf-16'?>" 431 "<doc>Hi</doc>"; 432 433 /* Use a handler to provoke the appropriate code paths */ 434 XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler); 435 expect_failure(text, XML_ERROR_INCORRECT_ENCODING, 436 "UTF-16 declared in UTF-8 not faulted"); 437 } 438 END_TEST 439 440 /* Test that an unknown encoding is rejected */ 441 START_TEST(test_bad_encoding) { 442 const char *text = "<doc>Hi</doc>"; 443 444 if (! XML_SetEncoding(g_parser, XCS("unknown-encoding"))) 445 fail("XML_SetEncoding failed"); 446 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 447 "Unknown encoding not faulted"); 448 } 449 END_TEST 450 451 /* Regression test for SF bug #481609, #774028. */ 452 START_TEST(test_latin1_umlauts) { 453 const char *text 454 = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 455 "<e a='\xE4 \xF6 \xFC ä ö ü ä ö ü >'\n" 456 " >\xE4 \xF6 \xFC ä ö ü ä ö ü ></e>"; 457 #ifdef XML_UNICODE 458 /* Expected results in UTF-16 */ 459 const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ") 460 XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >"); 461 #else 462 /* Expected results in UTF-8 */ 463 const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") 464 XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >"); 465 #endif 466 467 run_character_check(text, expected); 468 XML_ParserReset(g_parser, NULL); 469 run_attribute_check(text, expected); 470 /* Repeat with a default handler */ 471 XML_ParserReset(g_parser, NULL); 472 XML_SetDefaultHandler(g_parser, dummy_default_handler); 473 run_character_check(text, expected); 474 XML_ParserReset(g_parser, NULL); 475 XML_SetDefaultHandler(g_parser, dummy_default_handler); 476 run_attribute_check(text, expected); 477 } 478 END_TEST 479 480 /* Test that an element name with a 4-byte UTF-8 character is rejected */ 481 START_TEST(test_long_utf8_character) { 482 const char *text 483 = "<?xml version='1.0' encoding='utf-8'?>\n" 484 /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */ 485 "<do\xf0\x90\x80\x80/>"; 486 expect_failure(text, XML_ERROR_INVALID_TOKEN, 487 "4-byte UTF-8 character in element name not faulted"); 488 } 489 END_TEST 490 491 /* Test that a long latin-1 attribute (too long to convert in one go) 492 * is correctly converted 493 */ 494 START_TEST(test_long_latin1_attribute) { 495 const char *text 496 = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 497 "<doc att='" 498 /* 64 characters per line */ 499 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 500 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 501 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 502 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 503 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 504 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 505 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 506 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 507 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 508 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 509 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 510 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 511 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 512 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 513 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 514 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" 515 /* Last character splits across a buffer boundary */ 516 "\xe4'>\n</doc>"; 517 518 const XML_Char *expected = 519 /* 64 characters per line */ 520 /* clang-format off */ 521 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 522 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 523 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 524 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 525 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 526 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 527 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 528 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 529 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 530 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 531 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 532 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 533 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 534 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 535 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 536 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO") 537 /* clang-format on */ 538 #ifdef XML_UNICODE 539 XCS("\x00e4"); 540 #else 541 XCS("\xc3\xa4"); 542 #endif 543 544 run_attribute_check(text, expected); 545 } 546 END_TEST 547 548 /* Test that a long ASCII attribute (too long to convert in one go) 549 * is correctly converted 550 */ 551 START_TEST(test_long_ascii_attribute) { 552 const char *text 553 = "<?xml version='1.0' encoding='us-ascii'?>\n" 554 "<doc att='" 555 /* 64 characters per line */ 556 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 557 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 558 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 559 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 560 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 561 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 562 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 563 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 564 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 565 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 566 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 567 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 568 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 569 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 570 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 571 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 572 "01234'>\n</doc>"; 573 const XML_Char *expected = 574 /* 64 characters per line */ 575 /* clang-format off */ 576 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 577 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 578 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 579 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 580 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 581 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 582 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 583 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 584 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 585 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 586 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 587 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 588 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 589 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 590 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 591 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 592 XCS("01234"); 593 /* clang-format on */ 594 595 run_attribute_check(text, expected); 596 } 597 END_TEST 598 599 /* Regression test #1 for SF bug #653180. */ 600 START_TEST(test_line_number_after_parse) { 601 const char *text = "<tag>\n" 602 "\n" 603 "\n</tag>"; 604 XML_Size lineno; 605 606 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 607 == XML_STATUS_ERROR) 608 xml_failure(g_parser); 609 lineno = XML_GetCurrentLineNumber(g_parser); 610 if (lineno != 4) { 611 char buffer[100]; 612 snprintf(buffer, sizeof(buffer), 613 "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno); 614 fail(buffer); 615 } 616 } 617 END_TEST 618 619 /* Regression test #2 for SF bug #653180. */ 620 START_TEST(test_column_number_after_parse) { 621 const char *text = "<tag></tag>"; 622 XML_Size colno; 623 624 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 625 == XML_STATUS_ERROR) 626 xml_failure(g_parser); 627 colno = XML_GetCurrentColumnNumber(g_parser); 628 if (colno != 11) { 629 char buffer[100]; 630 snprintf(buffer, sizeof(buffer), 631 "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno); 632 fail(buffer); 633 } 634 } 635 END_TEST 636 637 /* Regression test #3 for SF bug #653180. */ 638 START_TEST(test_line_and_column_numbers_inside_handlers) { 639 const char *text = "<a>\n" /* Unix end-of-line */ 640 " <b>\r\n" /* Windows end-of-line */ 641 " <c/>\r" /* Mac OS end-of-line */ 642 " </b>\n" 643 " <d>\n" 644 " <f/>\n" 645 " </d>\n" 646 "</a>"; 647 const StructDataEntry expected[] 648 = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG}, 649 {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG}, 650 {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG}, 651 {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG}, 652 {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}}; 653 const int expected_count = sizeof(expected) / sizeof(StructDataEntry); 654 StructData storage; 655 656 StructData_Init(&storage); 657 XML_SetUserData(g_parser, &storage); 658 XML_SetStartElementHandler(g_parser, start_element_event_handler2); 659 XML_SetEndElementHandler(g_parser, end_element_event_handler2); 660 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 661 == XML_STATUS_ERROR) 662 xml_failure(g_parser); 663 664 StructData_CheckItems(&storage, expected, expected_count); 665 StructData_Dispose(&storage); 666 } 667 END_TEST 668 669 /* Regression test #4 for SF bug #653180. */ 670 START_TEST(test_line_number_after_error) { 671 const char *text = "<a>\n" 672 " <b>\n" 673 " </a>"; /* missing </b> */ 674 XML_Size lineno; 675 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 676 != XML_STATUS_ERROR) 677 fail("Expected a parse error"); 678 679 lineno = XML_GetCurrentLineNumber(g_parser); 680 if (lineno != 3) { 681 char buffer[100]; 682 snprintf(buffer, sizeof(buffer), 683 "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno); 684 fail(buffer); 685 } 686 } 687 END_TEST 688 689 /* Regression test #5 for SF bug #653180. */ 690 START_TEST(test_column_number_after_error) { 691 const char *text = "<a>\n" 692 " <b>\n" 693 " </a>"; /* missing </b> */ 694 XML_Size colno; 695 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 696 != XML_STATUS_ERROR) 697 fail("Expected a parse error"); 698 699 colno = XML_GetCurrentColumnNumber(g_parser); 700 if (colno != 4) { 701 char buffer[100]; 702 snprintf(buffer, sizeof(buffer), 703 "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno); 704 fail(buffer); 705 } 706 } 707 END_TEST 708 709 /* Regression test for SF bug #478332. */ 710 START_TEST(test_really_long_lines) { 711 /* This parses an input line longer than INIT_DATA_BUF_SIZE 712 characters long (defined to be 1024 in xmlparse.c). We take a 713 really cheesy approach to building the input buffer, because 714 this avoids writing bugs in buffer-filling code. 715 */ 716 const char *text 717 = "<e>" 718 /* 64 chars */ 719 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 720 /* until we have at least 1024 characters on the line: */ 721 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 722 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 723 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 724 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 725 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 726 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 727 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 728 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 729 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 730 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 731 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 732 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 733 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 734 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 735 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 736 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 737 "</e>"; 738 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 739 == XML_STATUS_ERROR) 740 xml_failure(g_parser); 741 } 742 END_TEST 743 744 /* Test cdata processing across a buffer boundary */ 745 START_TEST(test_really_long_encoded_lines) { 746 /* As above, except that we want to provoke an output buffer 747 * overflow with a non-trivial encoding. For this we need to pass 748 * the whole cdata in one go, not byte-by-byte. 749 */ 750 void *buffer; 751 const char *text 752 = "<?xml version='1.0' encoding='iso-8859-1'?>" 753 "<e>" 754 /* 64 chars */ 755 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 756 /* until we have at least 1024 characters on the line: */ 757 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 758 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 759 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 760 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 761 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 762 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 763 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 764 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 765 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 766 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 767 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 768 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 769 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 770 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 771 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 772 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 773 "</e>"; 774 int parse_len = (int)strlen(text); 775 776 /* Need a cdata handler to provoke the code path we want to test */ 777 XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler); 778 buffer = XML_GetBuffer(g_parser, parse_len); 779 if (buffer == NULL) 780 fail("Could not allocate parse buffer"); 781 assert(buffer != NULL); 782 memcpy(buffer, text, parse_len); 783 if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR) 784 xml_failure(g_parser); 785 } 786 END_TEST 787 788 /* 789 * Element event tests. 790 */ 791 792 START_TEST(test_end_element_events) { 793 const char *text = "<a><b><c/></b><d><f/></d></a>"; 794 const XML_Char *expected = XCS("/c/b/f/d/a"); 795 CharData storage; 796 797 CharData_Init(&storage); 798 XML_SetUserData(g_parser, &storage); 799 XML_SetEndElementHandler(g_parser, end_element_event_handler); 800 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 801 == XML_STATUS_ERROR) 802 xml_failure(g_parser); 803 CharData_CheckXMLChars(&storage, expected); 804 } 805 END_TEST 806 807 /* 808 * Attribute tests. 809 */ 810 811 /* Helper used by the following tests; this checks any "attr" and "refs" 812 attributes to make sure whitespace has been normalized. 813 814 Return true if whitespace has been normalized in a string, using 815 the rules for attribute value normalization. The 'is_cdata' flag 816 is needed since CDATA attributes don't need to have multiple 817 whitespace characters collapsed to a single space, while other 818 attribute data types do. (Section 3.3.3 of the recommendation.) 819 */ 820 static int 821 is_whitespace_normalized(const XML_Char *s, int is_cdata) { 822 int blanks = 0; 823 int at_start = 1; 824 while (*s) { 825 if (*s == XCS(' ')) 826 ++blanks; 827 else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r')) 828 return 0; 829 else { 830 if (at_start) { 831 at_start = 0; 832 if (blanks && ! is_cdata) 833 /* illegal leading blanks */ 834 return 0; 835 } else if (blanks > 1 && ! is_cdata) 836 return 0; 837 blanks = 0; 838 } 839 ++s; 840 } 841 if (blanks && ! is_cdata) 842 return 0; 843 return 1; 844 } 845 846 /* Check the attribute whitespace checker: */ 847 START_TEST(test_helper_is_whitespace_normalized) { 848 assert(is_whitespace_normalized(XCS("abc"), 0)); 849 assert(is_whitespace_normalized(XCS("abc"), 1)); 850 assert(is_whitespace_normalized(XCS("abc def ghi"), 0)); 851 assert(is_whitespace_normalized(XCS("abc def ghi"), 1)); 852 assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0)); 853 assert(is_whitespace_normalized(XCS(" abc def ghi"), 1)); 854 assert(! is_whitespace_normalized(XCS("abc def ghi"), 0)); 855 assert(is_whitespace_normalized(XCS("abc def ghi"), 1)); 856 assert(! is_whitespace_normalized(XCS("abc def ghi "), 0)); 857 assert(is_whitespace_normalized(XCS("abc def ghi "), 1)); 858 assert(! is_whitespace_normalized(XCS(" "), 0)); 859 assert(is_whitespace_normalized(XCS(" "), 1)); 860 assert(! is_whitespace_normalized(XCS("\t"), 0)); 861 assert(! is_whitespace_normalized(XCS("\t"), 1)); 862 assert(! is_whitespace_normalized(XCS("\n"), 0)); 863 assert(! is_whitespace_normalized(XCS("\n"), 1)); 864 assert(! is_whitespace_normalized(XCS("\r"), 0)); 865 assert(! is_whitespace_normalized(XCS("\r"), 1)); 866 assert(! is_whitespace_normalized(XCS("abc\t def"), 1)); 867 } 868 END_TEST 869 870 static void XMLCALL 871 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name, 872 const XML_Char **atts) { 873 int i; 874 UNUSED_P(userData); 875 UNUSED_P(name); 876 for (i = 0; atts[i] != NULL; i += 2) { 877 const XML_Char *attrname = atts[i]; 878 const XML_Char *value = atts[i + 1]; 879 if (xcstrcmp(XCS("attr"), attrname) == 0 880 || xcstrcmp(XCS("ents"), attrname) == 0 881 || xcstrcmp(XCS("refs"), attrname) == 0) { 882 if (! is_whitespace_normalized(value, 0)) { 883 char buffer[256]; 884 snprintf(buffer, sizeof(buffer), 885 "attribute value not normalized: %" XML_FMT_STR 886 "='%" XML_FMT_STR "'", 887 attrname, value); 888 fail(buffer); 889 } 890 } 891 } 892 } 893 894 START_TEST(test_attr_whitespace_normalization) { 895 const char *text 896 = "<!DOCTYPE doc [\n" 897 " <!ATTLIST doc\n" 898 " attr NMTOKENS #REQUIRED\n" 899 " ents ENTITIES #REQUIRED\n" 900 " refs IDREFS #REQUIRED>\n" 901 "]>\n" 902 "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n" 903 " ents=' ent-1 \t\r\n" 904 " ent-2 ' >\n" 905 " <e id='id-1'/>\n" 906 " <e id='id-2'/>\n" 907 "</doc>"; 908 909 XML_SetStartElementHandler(g_parser, 910 check_attr_contains_normalized_whitespace); 911 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 912 == XML_STATUS_ERROR) 913 xml_failure(g_parser); 914 } 915 END_TEST 916 917 /* 918 * XML declaration tests. 919 */ 920 921 START_TEST(test_xmldecl_misplaced) { 922 expect_failure("\n" 923 "<?xml version='1.0'?>\n" 924 "<a/>", 925 XML_ERROR_MISPLACED_XML_PI, 926 "failed to report misplaced XML declaration"); 927 } 928 END_TEST 929 930 START_TEST(test_xmldecl_invalid) { 931 expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL, 932 "Failed to report invalid XML declaration"); 933 } 934 END_TEST 935 936 START_TEST(test_xmldecl_missing_attr) { 937 expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL, 938 "Failed to report missing XML declaration attribute"); 939 } 940 END_TEST 941 942 START_TEST(test_xmldecl_missing_value) { 943 expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n" 944 "<doc/>", 945 XML_ERROR_XML_DECL, 946 "Failed to report missing attribute value"); 947 } 948 END_TEST 949 950 /* Regression test for SF bug #584832. */ 951 START_TEST(test_unknown_encoding_internal_entity) { 952 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n" 953 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n" 954 "<test a='&foo;'/>"; 955 956 XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL); 957 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 958 == XML_STATUS_ERROR) 959 xml_failure(g_parser); 960 } 961 END_TEST 962 963 /* Test unrecognised encoding handler */ 964 START_TEST(test_unrecognised_encoding_internal_entity) { 965 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n" 966 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n" 967 "<test a='&foo;'/>"; 968 969 XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL); 970 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 971 != XML_STATUS_ERROR) 972 fail("Unrecognised encoding not rejected"); 973 } 974 END_TEST 975 976 /* Regression test for SF bug #620106. */ 977 START_TEST(test_ext_entity_set_encoding) { 978 const char *text = "<!DOCTYPE doc [\n" 979 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 980 "]>\n" 981 "<doc>&en;</doc>"; 982 ExtTest test_data 983 = {/* This text says it's an unsupported encoding, but it's really 984 UTF-8, which we tell Expat using XML_SetEncoding(). 985 */ 986 "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL}; 987 #ifdef XML_UNICODE 988 const XML_Char *expected = XCS("\x00e9"); 989 #else 990 const XML_Char *expected = XCS("\xc3\xa9"); 991 #endif 992 993 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 994 run_ext_character_check(text, &test_data, expected); 995 } 996 END_TEST 997 998 /* Test external entities with no handler */ 999 START_TEST(test_ext_entity_no_handler) { 1000 const char *text = "<!DOCTYPE doc [\n" 1001 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1002 "]>\n" 1003 "<doc>&en;</doc>"; 1004 1005 XML_SetDefaultHandler(g_parser, dummy_default_handler); 1006 run_character_check(text, XCS("")); 1007 } 1008 END_TEST 1009 1010 /* Test UTF-8 BOM is accepted */ 1011 START_TEST(test_ext_entity_set_bom) { 1012 const char *text = "<!DOCTYPE doc [\n" 1013 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1014 "]>\n" 1015 "<doc>&en;</doc>"; 1016 ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */ 1017 "<?xml encoding='iso-8859-3'?>" 1018 "\xC3\xA9", 1019 XCS("utf-8"), NULL}; 1020 #ifdef XML_UNICODE 1021 const XML_Char *expected = XCS("\x00e9"); 1022 #else 1023 const XML_Char *expected = XCS("\xc3\xa9"); 1024 #endif 1025 1026 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1027 run_ext_character_check(text, &test_data, expected); 1028 } 1029 END_TEST 1030 1031 /* Test that bad encodings are faulted */ 1032 START_TEST(test_ext_entity_bad_encoding) { 1033 const char *text = "<!DOCTYPE doc [\n" 1034 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1035 "]>\n" 1036 "<doc>&en;</doc>"; 1037 ExtFaults fault 1038 = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted", 1039 XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING}; 1040 1041 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1042 XML_SetUserData(g_parser, &fault); 1043 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1044 "Bad encoding should not have been accepted"); 1045 } 1046 END_TEST 1047 1048 /* Try handing an invalid encoding to an external entity parser */ 1049 START_TEST(test_ext_entity_bad_encoding_2) { 1050 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1051 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1052 "<doc>&entity;</doc>"; 1053 ExtFaults fault 1054 = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted", 1055 XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING}; 1056 1057 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1058 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1059 XML_SetUserData(g_parser, &fault); 1060 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1061 "Bad encoding not faulted in external entity handler"); 1062 } 1063 END_TEST 1064 1065 /* Test that no error is reported for unknown entities if we don't 1066 read an external subset. This was fixed in Expat 1.95.5. 1067 */ 1068 START_TEST(test_wfc_undeclared_entity_unread_external_subset) { 1069 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 1070 "<doc>&entity;</doc>"; 1071 1072 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1073 == XML_STATUS_ERROR) 1074 xml_failure(g_parser); 1075 } 1076 END_TEST 1077 1078 /* Test that an error is reported for unknown entities if we don't 1079 have an external subset. 1080 */ 1081 START_TEST(test_wfc_undeclared_entity_no_external_subset) { 1082 expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY, 1083 "Parser did not report undefined entity w/out a DTD."); 1084 } 1085 END_TEST 1086 1087 /* Test that an error is reported for unknown entities if we don't 1088 read an external subset, but have been declared standalone. 1089 */ 1090 START_TEST(test_wfc_undeclared_entity_standalone) { 1091 const char *text 1092 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1093 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1094 "<doc>&entity;</doc>"; 1095 1096 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1097 "Parser did not report undefined entity (standalone)."); 1098 } 1099 END_TEST 1100 1101 /* Test that an error is reported for unknown entities if we have read 1102 an external subset, and standalone is true. 1103 */ 1104 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) { 1105 const char *text 1106 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1107 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1108 "<doc>&entity;</doc>"; 1109 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1110 1111 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1112 XML_SetUserData(g_parser, &test_data); 1113 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1114 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1115 "Parser did not report undefined entity (external DTD)."); 1116 } 1117 END_TEST 1118 1119 /* Test that external entity handling is not done if the parsing flag 1120 * is set to UNLESS_STANDALONE 1121 */ 1122 START_TEST(test_entity_with_external_subset_unless_standalone) { 1123 const char *text 1124 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1125 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1126 "<doc>&entity;</doc>"; 1127 ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL}; 1128 1129 XML_SetParamEntityParsing(g_parser, 1130 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); 1131 XML_SetUserData(g_parser, &test_data); 1132 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1133 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1134 "Parser did not report undefined entity"); 1135 } 1136 END_TEST 1137 1138 /* Test that no error is reported for unknown entities if we have read 1139 an external subset, and standalone is false. 1140 */ 1141 START_TEST(test_wfc_undeclared_entity_with_external_subset) { 1142 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1143 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1144 "<doc>&entity;</doc>"; 1145 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1146 1147 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1148 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1149 run_ext_character_check(text, &test_data, XCS("")); 1150 } 1151 END_TEST 1152 1153 /* Test that an error is reported if our NotStandalone handler fails */ 1154 START_TEST(test_not_standalone_handler_reject) { 1155 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1156 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1157 "<doc>&entity;</doc>"; 1158 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1159 1160 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1161 XML_SetUserData(g_parser, &test_data); 1162 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1163 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 1164 expect_failure(text, XML_ERROR_NOT_STANDALONE, 1165 "NotStandalone handler failed to reject"); 1166 1167 /* Try again but without external entity handling */ 1168 XML_ParserReset(g_parser, NULL); 1169 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 1170 expect_failure(text, XML_ERROR_NOT_STANDALONE, 1171 "NotStandalone handler failed to reject"); 1172 } 1173 END_TEST 1174 1175 /* Test that no error is reported if our NotStandalone handler succeeds */ 1176 START_TEST(test_not_standalone_handler_accept) { 1177 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1178 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1179 "<doc>&entity;</doc>"; 1180 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1181 1182 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1183 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1184 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler); 1185 run_ext_character_check(text, &test_data, XCS("")); 1186 1187 /* Repeat without the external entity handler */ 1188 XML_ParserReset(g_parser, NULL); 1189 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler); 1190 run_character_check(text, XCS("")); 1191 } 1192 END_TEST 1193 1194 START_TEST(test_wfc_no_recursive_entity_refs) { 1195 const char *text = "<!DOCTYPE doc [\n" 1196 " <!ENTITY entity '&entity;'>\n" 1197 "]>\n" 1198 "<doc>&entity;</doc>"; 1199 1200 expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF, 1201 "Parser did not report recursive entity reference."); 1202 } 1203 END_TEST 1204 1205 /* Test incomplete external entities are faulted */ 1206 START_TEST(test_ext_entity_invalid_parse) { 1207 const char *text = "<!DOCTYPE doc [\n" 1208 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1209 "]>\n" 1210 "<doc>&en;</doc>"; 1211 const ExtFaults faults[] 1212 = {{"<", "Incomplete element declaration not faulted", NULL, 1213 XML_ERROR_UNCLOSED_TOKEN}, 1214 {"<\xe2\x82", /* First two bytes of a three-byte char */ 1215 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, 1216 {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL, 1217 XML_ERROR_PARTIAL_CHAR}, 1218 {NULL, NULL, NULL, XML_ERROR_NONE}}; 1219 const ExtFaults *fault = faults; 1220 1221 for (; fault->parse_text != NULL; fault++) { 1222 set_subtest("\"%s\"", fault->parse_text); 1223 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1224 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1225 XML_SetUserData(g_parser, (void *)fault); 1226 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1227 "Parser did not report external entity error"); 1228 XML_ParserReset(g_parser, NULL); 1229 } 1230 } 1231 END_TEST 1232 1233 /* Regression test for SF bug #483514. */ 1234 START_TEST(test_dtd_default_handling) { 1235 const char *text = "<!DOCTYPE doc [\n" 1236 "<!ENTITY e SYSTEM 'http://example.org/e'>\n" 1237 "<!NOTATION n SYSTEM 'http://example.org/n'>\n" 1238 "<!ELEMENT doc EMPTY>\n" 1239 "<!ATTLIST doc a CDATA #IMPLIED>\n" 1240 "<?pi in dtd?>\n" 1241 "<!--comment in dtd-->\n" 1242 "]><doc/>"; 1243 1244 XML_SetDefaultHandler(g_parser, accumulate_characters); 1245 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 1246 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 1247 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 1248 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); 1249 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 1250 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 1251 XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler); 1252 XML_SetCommentHandler(g_parser, dummy_comment_handler); 1253 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler); 1254 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler); 1255 run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>")); 1256 } 1257 END_TEST 1258 1259 /* Test handling of attribute declarations */ 1260 START_TEST(test_dtd_attr_handling) { 1261 const char *prolog = "<!DOCTYPE doc [\n" 1262 "<!ELEMENT doc EMPTY>\n"; 1263 AttTest attr_data[] 1264 = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n" 1265 "]>" 1266 "<doc a='two'/>", 1267 XCS("doc"), XCS("a"), 1268 XCS("(one|two|three)"), /* Extraneous spaces will be removed */ 1269 NULL, XML_TRUE}, 1270 {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n" 1271 "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n" 1272 "]>" 1273 "<doc/>", 1274 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE}, 1275 {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n" 1276 "]>" 1277 "<doc/>", 1278 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE}, 1279 {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n" 1280 "]>" 1281 "<doc/>", 1282 XCS("doc"), XCS("a"), XCS("CDATA"), 1283 #ifdef XML_UNICODE 1284 XCS("\x06f2"), 1285 #else 1286 XCS("\xdb\xb2"), 1287 #endif 1288 XML_FALSE}, 1289 {NULL, NULL, NULL, NULL, NULL, XML_FALSE}}; 1290 AttTest *test; 1291 1292 for (test = attr_data; test->definition != NULL; test++) { 1293 set_subtest("%s", test->definition); 1294 XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler); 1295 XML_SetUserData(g_parser, test); 1296 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog), 1297 XML_FALSE) 1298 == XML_STATUS_ERROR) 1299 xml_failure(g_parser); 1300 if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition, 1301 (int)strlen(test->definition), XML_TRUE) 1302 == XML_STATUS_ERROR) 1303 xml_failure(g_parser); 1304 XML_ParserReset(g_parser, NULL); 1305 } 1306 } 1307 END_TEST 1308 1309 /* See related SF bug #673791. 1310 When namespace processing is enabled, setting the namespace URI for 1311 a prefix is not allowed; this test ensures that it *is* allowed 1312 when namespace processing is not enabled. 1313 (See Namespaces in XML, section 2.) 1314 */ 1315 START_TEST(test_empty_ns_without_namespaces) { 1316 const char *text = "<doc xmlns:prefix='http://example.org/'>\n" 1317 " <e xmlns:prefix=''/>\n" 1318 "</doc>"; 1319 1320 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1321 == XML_STATUS_ERROR) 1322 xml_failure(g_parser); 1323 } 1324 END_TEST 1325 1326 /* Regression test for SF bug #824420. 1327 Checks that an xmlns:prefix attribute set in an attribute's default 1328 value isn't misinterpreted. 1329 */ 1330 START_TEST(test_ns_in_attribute_default_without_namespaces) { 1331 const char *text = "<!DOCTYPE e:element [\n" 1332 " <!ATTLIST e:element\n" 1333 " xmlns:e CDATA 'http://example.org/'>\n" 1334 " ]>\n" 1335 "<e:element/>"; 1336 1337 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1338 == XML_STATUS_ERROR) 1339 xml_failure(g_parser); 1340 } 1341 END_TEST 1342 1343 /* Regression test for SF bug #1515266: missing check of stopped 1344 parser in doContext() 'for' loop. */ 1345 START_TEST(test_stop_parser_between_char_data_calls) { 1346 /* The sample data must be big enough that there are two calls to 1347 the character data handler from within the inner "for" loop of 1348 the XML_TOK_DATA_CHARS case in doContent(), and the character 1349 handler must stop the parser and clear the character data 1350 handler. 1351 */ 1352 const char *text = long_character_data_text; 1353 1354 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1355 g_resumable = XML_FALSE; 1356 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1357 != XML_STATUS_ERROR) 1358 xml_failure(g_parser); 1359 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED) 1360 xml_failure(g_parser); 1361 } 1362 END_TEST 1363 1364 /* Regression test for SF bug #1515266: missing check of stopped 1365 parser in doContext() 'for' loop. */ 1366 START_TEST(test_suspend_parser_between_char_data_calls) { 1367 /* The sample data must be big enough that there are two calls to 1368 the character data handler from within the inner "for" loop of 1369 the XML_TOK_DATA_CHARS case in doContent(), and the character 1370 handler must stop the parser and clear the character data 1371 handler. 1372 */ 1373 const char *text = long_character_data_text; 1374 1375 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1376 g_resumable = XML_TRUE; 1377 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1378 != XML_STATUS_SUSPENDED) 1379 xml_failure(g_parser); 1380 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 1381 xml_failure(g_parser); 1382 /* Try parsing directly */ 1383 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1384 != XML_STATUS_ERROR) 1385 fail("Attempt to continue parse while suspended not faulted"); 1386 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) 1387 fail("Suspended parse not faulted with correct error"); 1388 } 1389 END_TEST 1390 1391 /* Test repeated calls to XML_StopParser are handled correctly */ 1392 START_TEST(test_repeated_stop_parser_between_char_data_calls) { 1393 const char *text = long_character_data_text; 1394 1395 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1396 g_resumable = XML_FALSE; 1397 g_abortable = XML_FALSE; 1398 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1399 != XML_STATUS_ERROR) 1400 fail("Failed to double-stop parser"); 1401 1402 XML_ParserReset(g_parser, NULL); 1403 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1404 g_resumable = XML_TRUE; 1405 g_abortable = XML_FALSE; 1406 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1407 != XML_STATUS_SUSPENDED) 1408 fail("Failed to double-suspend parser"); 1409 1410 XML_ParserReset(g_parser, NULL); 1411 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1412 g_resumable = XML_TRUE; 1413 g_abortable = XML_TRUE; 1414 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1415 != XML_STATUS_ERROR) 1416 fail("Failed to suspend-abort parser"); 1417 } 1418 END_TEST 1419 1420 START_TEST(test_good_cdata_ascii) { 1421 const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>"; 1422 const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>"); 1423 1424 CharData storage; 1425 CharData_Init(&storage); 1426 XML_SetUserData(g_parser, &storage); 1427 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1428 /* Add start and end handlers for coverage */ 1429 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler); 1430 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler); 1431 1432 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1433 == XML_STATUS_ERROR) 1434 xml_failure(g_parser); 1435 CharData_CheckXMLChars(&storage, expected); 1436 1437 /* Try again, this time with a default handler */ 1438 XML_ParserReset(g_parser, NULL); 1439 CharData_Init(&storage); 1440 XML_SetUserData(g_parser, &storage); 1441 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1442 XML_SetDefaultHandler(g_parser, dummy_default_handler); 1443 1444 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1445 == XML_STATUS_ERROR) 1446 xml_failure(g_parser); 1447 CharData_CheckXMLChars(&storage, expected); 1448 } 1449 END_TEST 1450 1451 START_TEST(test_good_cdata_utf16) { 1452 /* Test data is: 1453 * <?xml version='1.0' encoding='utf-16'?> 1454 * <a><![CDATA[hello]]></a> 1455 */ 1456 const char text[] 1457 = "\0<\0?\0x\0m\0l\0" 1458 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1459 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1460 "1\0" 1461 "6\0'" 1462 "\0?\0>\0\n" 1463 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>"; 1464 const XML_Char *expected = XCS("hello"); 1465 1466 CharData storage; 1467 CharData_Init(&storage); 1468 XML_SetUserData(g_parser, &storage); 1469 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1470 1471 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1472 == XML_STATUS_ERROR) 1473 xml_failure(g_parser); 1474 CharData_CheckXMLChars(&storage, expected); 1475 } 1476 END_TEST 1477 1478 START_TEST(test_good_cdata_utf16_le) { 1479 /* Test data is: 1480 * <?xml version='1.0' encoding='utf-16'?> 1481 * <a><![CDATA[hello]]></a> 1482 */ 1483 const char text[] 1484 = "<\0?\0x\0m\0l\0" 1485 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1486 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1487 "1\0" 1488 "6\0'" 1489 "\0?\0>\0\n" 1490 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0"; 1491 const XML_Char *expected = XCS("hello"); 1492 1493 CharData storage; 1494 CharData_Init(&storage); 1495 XML_SetUserData(g_parser, &storage); 1496 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1497 1498 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1499 == XML_STATUS_ERROR) 1500 xml_failure(g_parser); 1501 CharData_CheckXMLChars(&storage, expected); 1502 } 1503 END_TEST 1504 1505 /* Test UTF16 conversion of a long cdata string */ 1506 1507 /* 16 characters: handy macro to reduce visual clutter */ 1508 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P" 1509 1510 START_TEST(test_long_cdata_utf16) { 1511 /* Test data is: 1512 * <?xlm version='1.0' encoding='utf-16'?> 1513 * <a><![CDATA[ 1514 * ABCDEFGHIJKLMNOP 1515 * ]]></a> 1516 */ 1517 const char text[] 1518 = "\0<\0?\0x\0m\0l\0 " 1519 "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 " 1520 "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>" 1521 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1522 /* 64 characters per line */ 1523 /* clang-format off */ 1524 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1525 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1526 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1527 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1528 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1529 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1530 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1531 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1532 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1533 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1534 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1535 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1536 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1537 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1538 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1539 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1540 A_TO_P_IN_UTF16 1541 /* clang-format on */ 1542 "\0]\0]\0>\0<\0/\0a\0>"; 1543 const XML_Char *expected = 1544 /* clang-format off */ 1545 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1546 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1547 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1548 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1549 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1550 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1551 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1552 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1553 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1554 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1555 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1556 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1557 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1558 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1559 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1560 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1561 XCS("ABCDEFGHIJKLMNOP"); 1562 /* clang-format on */ 1563 CharData storage; 1564 void *buffer; 1565 1566 CharData_Init(&storage); 1567 XML_SetUserData(g_parser, &storage); 1568 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1569 buffer = XML_GetBuffer(g_parser, sizeof(text) - 1); 1570 if (buffer == NULL) 1571 fail("Could not allocate parse buffer"); 1572 assert(buffer != NULL); 1573 memcpy(buffer, text, sizeof(text) - 1); 1574 if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) 1575 xml_failure(g_parser); 1576 CharData_CheckXMLChars(&storage, expected); 1577 } 1578 END_TEST 1579 1580 /* Test handling of multiple unit UTF-16 characters */ 1581 START_TEST(test_multichar_cdata_utf16) { 1582 /* Test data is: 1583 * <?xml version='1.0' encoding='utf-16'?> 1584 * <a><![CDATA[{MINIM}{CROTCHET}]]></a> 1585 * 1586 * where {MINIM} is U+1d15e (a minim or half-note) 1587 * UTF-16: 0xd834 0xdd5e 1588 * UTF-8: 0xf0 0x9d 0x85 0x9e 1589 * and {CROTCHET} is U+1d15f (a crotchet or quarter-note) 1590 * UTF-16: 0xd834 0xdd5f 1591 * UTF-8: 0xf0 0x9d 0x85 0x9f 1592 */ 1593 const char text[] = "\0<\0?\0x\0m\0l\0" 1594 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1595 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1596 "1\0" 1597 "6\0'" 1598 "\0?\0>\0\n" 1599 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1600 "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f" 1601 "\0]\0]\0>\0<\0/\0a\0>"; 1602 #ifdef XML_UNICODE 1603 const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f"); 1604 #else 1605 const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f"); 1606 #endif 1607 CharData storage; 1608 1609 CharData_Init(&storage); 1610 XML_SetUserData(g_parser, &storage); 1611 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1612 1613 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1614 == XML_STATUS_ERROR) 1615 xml_failure(g_parser); 1616 CharData_CheckXMLChars(&storage, expected); 1617 } 1618 END_TEST 1619 1620 /* Test that an element name with a UTF-16 surrogate pair is rejected */ 1621 START_TEST(test_utf16_bad_surrogate_pair) { 1622 /* Test data is: 1623 * <?xml version='1.0' encoding='utf-16'?> 1624 * <a><![CDATA[{BADLINB}]]></a> 1625 * 1626 * where {BADLINB} is U+10000 (the first Linear B character) 1627 * with the UTF-16 surrogate pair in the wrong order, i.e. 1628 * 0xdc00 0xd800 1629 */ 1630 const char text[] = "\0<\0?\0x\0m\0l\0" 1631 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1632 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1633 "1\0" 1634 "6\0'" 1635 "\0?\0>\0\n" 1636 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1637 "\xdc\x00\xd8\x00" 1638 "\0]\0]\0>\0<\0/\0a\0>"; 1639 1640 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1641 != XML_STATUS_ERROR) 1642 fail("Reversed UTF-16 surrogate pair not faulted"); 1643 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 1644 xml_failure(g_parser); 1645 } 1646 END_TEST 1647 1648 START_TEST(test_bad_cdata) { 1649 struct CaseData { 1650 const char *text; 1651 enum XML_Error expectedError; 1652 }; 1653 1654 struct CaseData cases[] 1655 = {{"<a><", XML_ERROR_UNCLOSED_TOKEN}, 1656 {"<a><!", XML_ERROR_UNCLOSED_TOKEN}, 1657 {"<a><![", XML_ERROR_UNCLOSED_TOKEN}, 1658 {"<a><![C", XML_ERROR_UNCLOSED_TOKEN}, 1659 {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN}, 1660 {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN}, 1661 {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN}, 1662 {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN}, 1663 1664 {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1665 {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1666 {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1667 1668 {"<a><!<a/>", XML_ERROR_INVALID_TOKEN}, 1669 {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ 1670 {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ 1671 {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN}, 1672 {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN}, 1673 {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN}, 1674 {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN}, 1675 1676 {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1677 {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1678 {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}}; 1679 1680 size_t i = 0; 1681 for (; i < sizeof(cases) / sizeof(struct CaseData); i++) { 1682 set_subtest("%s", cases[i].text); 1683 const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES( 1684 g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE); 1685 const enum XML_Error actualError = XML_GetErrorCode(g_parser); 1686 1687 assert(actualStatus == XML_STATUS_ERROR); 1688 1689 if (actualError != cases[i].expectedError) { 1690 char message[100]; 1691 snprintf(message, sizeof(message), 1692 "Expected error %d but got error %d for case %u: \"%s\"\n", 1693 cases[i].expectedError, actualError, (unsigned int)i + 1, 1694 cases[i].text); 1695 fail(message); 1696 } 1697 1698 XML_ParserReset(g_parser, NULL); 1699 } 1700 } 1701 END_TEST 1702 1703 /* Test failures in UTF-16 CDATA */ 1704 START_TEST(test_bad_cdata_utf16) { 1705 struct CaseData { 1706 size_t text_bytes; 1707 const char *text; 1708 enum XML_Error expected_error; 1709 }; 1710 1711 const char prolog[] = "\0<\0?\0x\0m\0l\0" 1712 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1713 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1714 "1\0" 1715 "6\0'" 1716 "\0?\0>\0\n" 1717 "\0<\0a\0>"; 1718 struct CaseData cases[] = { 1719 {1, "\0", XML_ERROR_UNCLOSED_TOKEN}, 1720 {2, "\0<", XML_ERROR_UNCLOSED_TOKEN}, 1721 {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN}, 1722 {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN}, 1723 {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN}, 1724 {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN}, 1725 {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN}, 1726 {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN}, 1727 {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN}, 1728 {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN}, 1729 {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN}, 1730 {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN}, 1731 {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN}, 1732 {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN}, 1733 {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN}, 1734 {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN}, 1735 {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN}, 1736 {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1737 {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1738 {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1739 /* Now add a four-byte UTF-16 character */ 1740 {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8", 1741 XML_ERROR_UNCLOSED_CDATA_SECTION}, 1742 {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR}, 1743 {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd", 1744 XML_ERROR_PARTIAL_CHAR}, 1745 {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e", 1746 XML_ERROR_UNCLOSED_CDATA_SECTION}}; 1747 size_t i; 1748 1749 for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) { 1750 set_subtest("case %lu", (long unsigned)(i + 1)); 1751 enum XML_Status actual_status; 1752 enum XML_Error actual_error; 1753 1754 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1, 1755 XML_FALSE) 1756 == XML_STATUS_ERROR) 1757 xml_failure(g_parser); 1758 actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text, 1759 (int)cases[i].text_bytes, XML_TRUE); 1760 assert(actual_status == XML_STATUS_ERROR); 1761 actual_error = XML_GetErrorCode(g_parser); 1762 if (actual_error != cases[i].expected_error) { 1763 char message[1024]; 1764 1765 snprintf(message, sizeof(message), 1766 "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR 1767 ") for case %lu\n", 1768 cases[i].expected_error, 1769 XML_ErrorString(cases[i].expected_error), actual_error, 1770 XML_ErrorString(actual_error), (long unsigned)(i + 1)); 1771 fail(message); 1772 } 1773 XML_ParserReset(g_parser, NULL); 1774 } 1775 } 1776 END_TEST 1777 1778 /* Test stopping the parser in cdata handler */ 1779 START_TEST(test_stop_parser_between_cdata_calls) { 1780 const char *text = long_cdata_text; 1781 1782 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1783 g_resumable = XML_FALSE; 1784 expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler"); 1785 } 1786 END_TEST 1787 1788 /* Test suspending the parser in cdata handler */ 1789 START_TEST(test_suspend_parser_between_cdata_calls) { 1790 const char *text = long_cdata_text; 1791 enum XML_Status result; 1792 1793 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1794 g_resumable = XML_TRUE; 1795 result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE); 1796 if (result != XML_STATUS_SUSPENDED) { 1797 if (result == XML_STATUS_ERROR) 1798 xml_failure(g_parser); 1799 fail("Parse not suspended in CDATA handler"); 1800 } 1801 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 1802 xml_failure(g_parser); 1803 } 1804 END_TEST 1805 1806 /* Test memory allocation functions */ 1807 START_TEST(test_memory_allocation) { 1808 char *buffer = (char *)XML_MemMalloc(g_parser, 256); 1809 char *p; 1810 1811 if (buffer == NULL) { 1812 fail("Allocation failed"); 1813 } else { 1814 /* Try writing to memory; some OSes try to cheat! */ 1815 buffer[0] = 'T'; 1816 buffer[1] = 'E'; 1817 buffer[2] = 'S'; 1818 buffer[3] = 'T'; 1819 buffer[4] = '\0'; 1820 if (strcmp(buffer, "TEST") != 0) { 1821 fail("Memory not writable"); 1822 } else { 1823 p = (char *)XML_MemRealloc(g_parser, buffer, 512); 1824 if (p == NULL) { 1825 fail("Reallocation failed"); 1826 } else { 1827 /* Write again, just to be sure */ 1828 buffer = p; 1829 buffer[0] = 'V'; 1830 if (strcmp(buffer, "VEST") != 0) { 1831 fail("Reallocated memory not writable"); 1832 } 1833 } 1834 } 1835 XML_MemFree(g_parser, buffer); 1836 } 1837 } 1838 END_TEST 1839 1840 /* Test XML_DefaultCurrent() passes handling on correctly */ 1841 START_TEST(test_default_current) { 1842 const char *text = "<doc>hell]</doc>"; 1843 const char *entity_text = "<!DOCTYPE doc [\n" 1844 "<!ENTITY entity '%'>\n" 1845 "]>\n" 1846 "<doc>&entity;</doc>"; 1847 1848 set_subtest("with defaulting"); 1849 { 1850 struct handler_record_list storage; 1851 storage.count = 0; 1852 XML_SetDefaultHandler(g_parser, record_default_handler); 1853 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 1854 XML_SetUserData(g_parser, &storage); 1855 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1856 == XML_STATUS_ERROR) 1857 xml_failure(g_parser); 1858 int i = 0; 1859 assert_record_handler_called(&storage, i++, "record_default_handler", 5); 1860 // we should have gotten one or more cdata callbacks, totaling 5 chars 1861 int cdata_len_remaining = 5; 1862 while (cdata_len_remaining > 0) { 1863 const struct handler_record_entry *c_entry 1864 = handler_record_get(&storage, i++); 1865 assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0); 1866 assert_true(c_entry->arg > 0); 1867 assert_true(c_entry->arg <= cdata_len_remaining); 1868 cdata_len_remaining -= c_entry->arg; 1869 // default handler must follow, with the exact same len argument. 1870 assert_record_handler_called(&storage, i++, "record_default_handler", 1871 c_entry->arg); 1872 } 1873 assert_record_handler_called(&storage, i++, "record_default_handler", 6); 1874 assert_true(storage.count == i); 1875 } 1876 1877 /* Again, without the defaulting */ 1878 set_subtest("no defaulting"); 1879 { 1880 struct handler_record_list storage; 1881 storage.count = 0; 1882 XML_ParserReset(g_parser, NULL); 1883 XML_SetDefaultHandler(g_parser, record_default_handler); 1884 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); 1885 XML_SetUserData(g_parser, &storage); 1886 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1887 == XML_STATUS_ERROR) 1888 xml_failure(g_parser); 1889 int i = 0; 1890 assert_record_handler_called(&storage, i++, "record_default_handler", 5); 1891 // we should have gotten one or more cdata callbacks, totaling 5 chars 1892 int cdata_len_remaining = 5; 1893 while (cdata_len_remaining > 0) { 1894 const struct handler_record_entry *c_entry 1895 = handler_record_get(&storage, i++); 1896 assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0); 1897 assert_true(c_entry->arg > 0); 1898 assert_true(c_entry->arg <= cdata_len_remaining); 1899 cdata_len_remaining -= c_entry->arg; 1900 } 1901 assert_record_handler_called(&storage, i++, "record_default_handler", 6); 1902 assert_true(storage.count == i); 1903 } 1904 1905 /* Now with an internal entity to complicate matters */ 1906 set_subtest("with internal entity"); 1907 { 1908 struct handler_record_list storage; 1909 storage.count = 0; 1910 XML_ParserReset(g_parser, NULL); 1911 XML_SetDefaultHandler(g_parser, record_default_handler); 1912 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 1913 XML_SetUserData(g_parser, &storage); 1914 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 1915 XML_TRUE) 1916 == XML_STATUS_ERROR) 1917 xml_failure(g_parser); 1918 /* The default handler suppresses the entity */ 1919 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 1920 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 1921 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 1922 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 1923 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 1924 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 1925 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 1926 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 1927 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 1928 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 1929 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 1930 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 1931 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 1932 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 1933 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 1934 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 1935 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 1936 assert_record_handler_called(&storage, 17, "record_default_handler", 8); 1937 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 1938 assert_true(storage.count == 19); 1939 } 1940 1941 /* Again, with a skip handler */ 1942 set_subtest("with skip handler"); 1943 { 1944 struct handler_record_list storage; 1945 storage.count = 0; 1946 XML_ParserReset(g_parser, NULL); 1947 XML_SetDefaultHandler(g_parser, record_default_handler); 1948 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 1949 XML_SetSkippedEntityHandler(g_parser, record_skip_handler); 1950 XML_SetUserData(g_parser, &storage); 1951 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 1952 XML_TRUE) 1953 == XML_STATUS_ERROR) 1954 xml_failure(g_parser); 1955 /* The default handler suppresses the entity */ 1956 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 1957 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 1958 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 1959 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 1960 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 1961 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 1962 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 1963 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 1964 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 1965 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 1966 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 1967 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 1968 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 1969 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 1970 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 1971 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 1972 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 1973 assert_record_handler_called(&storage, 17, "record_skip_handler", 0); 1974 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 1975 assert_true(storage.count == 19); 1976 } 1977 1978 /* This time, allow the entity through */ 1979 set_subtest("allow entity"); 1980 { 1981 struct handler_record_list storage; 1982 storage.count = 0; 1983 XML_ParserReset(g_parser, NULL); 1984 XML_SetDefaultHandlerExpand(g_parser, record_default_handler); 1985 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 1986 XML_SetUserData(g_parser, &storage); 1987 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 1988 XML_TRUE) 1989 == XML_STATUS_ERROR) 1990 xml_failure(g_parser); 1991 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 1992 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 1993 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 1994 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 1995 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 1996 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 1997 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 1998 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 1999 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2000 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2001 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2002 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2003 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2004 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2005 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2006 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2007 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2008 assert_record_handler_called(&storage, 17, "record_cdata_handler", 1); 2009 assert_record_handler_called(&storage, 18, "record_default_handler", 1); 2010 assert_record_handler_called(&storage, 19, "record_default_handler", 6); 2011 assert_true(storage.count == 20); 2012 } 2013 2014 /* Finally, without passing the cdata to the default handler */ 2015 set_subtest("not passing cdata"); 2016 { 2017 struct handler_record_list storage; 2018 storage.count = 0; 2019 XML_ParserReset(g_parser, NULL); 2020 XML_SetDefaultHandlerExpand(g_parser, record_default_handler); 2021 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); 2022 XML_SetUserData(g_parser, &storage); 2023 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2024 XML_TRUE) 2025 == XML_STATUS_ERROR) 2026 xml_failure(g_parser); 2027 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2028 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2029 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2030 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2031 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2032 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2033 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2034 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2035 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2036 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2037 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2038 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2039 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2040 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2041 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2042 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2043 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2044 assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler", 2045 1); 2046 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2047 assert_true(storage.count == 19); 2048 } 2049 } 2050 END_TEST 2051 2052 /* Test DTD element parsing code paths */ 2053 START_TEST(test_dtd_elements) { 2054 const char *text = "<!DOCTYPE doc [\n" 2055 "<!ELEMENT doc (chapter)>\n" 2056 "<!ELEMENT chapter (#PCDATA)>\n" 2057 "]>\n" 2058 "<doc><chapter>Wombats are go</chapter></doc>"; 2059 2060 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 2061 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2062 == XML_STATUS_ERROR) 2063 xml_failure(g_parser); 2064 } 2065 END_TEST 2066 2067 static void XMLCALL 2068 element_decl_check_model(void *userData, const XML_Char *name, 2069 XML_Content *model) { 2070 UNUSED_P(userData); 2071 uint32_t errorFlags = 0; 2072 2073 /* Expected model array structure is this: 2074 * [0] (type 6, quant 0) 2075 * [1] (type 5, quant 0) 2076 * [3] (type 4, quant 0, name "bar") 2077 * [4] (type 4, quant 0, name "foo") 2078 * [5] (type 4, quant 3, name "xyz") 2079 * [2] (type 4, quant 2, name "zebra") 2080 */ 2081 errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0)); 2082 errorFlags |= ((model != NULL) ? 0 : (1u << 1)); 2083 2084 if (model != NULL) { 2085 errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2)); 2086 errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3)); 2087 errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4)); 2088 errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5)); 2089 errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6)); 2090 2091 errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7)); 2092 errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8)); 2093 errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9)); 2094 errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10)); 2095 errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11)); 2096 2097 errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12)); 2098 errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13)); 2099 errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14)); 2100 errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15)); 2101 errorFlags 2102 |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16)); 2103 2104 errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17)); 2105 errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18)); 2106 errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19)); 2107 errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20)); 2108 errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21)); 2109 2110 errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22)); 2111 errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23)); 2112 errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24)); 2113 errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25)); 2114 errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26)); 2115 2116 errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27)); 2117 errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28)); 2118 errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29)); 2119 errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30)); 2120 errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31)); 2121 } 2122 2123 XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags); 2124 XML_FreeContentModel(g_parser, model); 2125 } 2126 2127 START_TEST(test_dtd_elements_nesting) { 2128 // Payload inspired by a test in Perl's XML::Parser 2129 const char *text = "<!DOCTYPE foo [\n" 2130 "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n" 2131 "]>\n" 2132 "<foo/>"; 2133 2134 XML_SetUserData(g_parser, (void *)(uintptr_t)-1); 2135 2136 XML_SetElementDeclHandler(g_parser, element_decl_check_model); 2137 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2138 == XML_STATUS_ERROR) 2139 xml_failure(g_parser); 2140 2141 if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0) 2142 fail("Element declaration model regression detected"); 2143 } 2144 END_TEST 2145 2146 /* Test foreign DTD handling */ 2147 START_TEST(test_set_foreign_dtd) { 2148 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"; 2149 const char *text2 = "<doc>&entity;</doc>"; 2150 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2151 2152 /* Check hash salt is passed through too */ 2153 XML_SetHashSalt(g_parser, 0x12345678); 2154 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2155 XML_SetUserData(g_parser, &test_data); 2156 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2157 /* Add a default handler to exercise more code paths */ 2158 XML_SetDefaultHandler(g_parser, dummy_default_handler); 2159 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2160 fail("Could not set foreign DTD"); 2161 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2162 == XML_STATUS_ERROR) 2163 xml_failure(g_parser); 2164 2165 /* Ensure that trying to set the DTD after parsing has started 2166 * is faulted, even if it's the same setting. 2167 */ 2168 if (XML_UseForeignDTD(g_parser, XML_TRUE) 2169 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) 2170 fail("Failed to reject late foreign DTD setting"); 2171 /* Ditto for the hash salt */ 2172 if (XML_SetHashSalt(g_parser, 0x23456789)) 2173 fail("Failed to reject late hash salt change"); 2174 2175 /* Now finish the parse */ 2176 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2177 == XML_STATUS_ERROR) 2178 xml_failure(g_parser); 2179 } 2180 END_TEST 2181 2182 /* Test foreign DTD handling with a failing NotStandalone handler */ 2183 START_TEST(test_foreign_dtd_not_standalone) { 2184 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2185 "<doc>&entity;</doc>"; 2186 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2187 2188 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2189 XML_SetUserData(g_parser, &test_data); 2190 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2191 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 2192 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2193 fail("Could not set foreign DTD"); 2194 expect_failure(text, XML_ERROR_NOT_STANDALONE, 2195 "NotStandalonehandler failed to reject"); 2196 } 2197 END_TEST 2198 2199 /* Test invalid character in a foreign DTD is faulted */ 2200 START_TEST(test_invalid_foreign_dtd) { 2201 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2202 "<doc>&entity;</doc>"; 2203 ExtFaults test_data 2204 = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN}; 2205 2206 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2207 XML_SetUserData(g_parser, &test_data); 2208 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 2209 XML_UseForeignDTD(g_parser, XML_TRUE); 2210 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 2211 "Bad DTD should not have been accepted"); 2212 } 2213 END_TEST 2214 2215 /* Test foreign DTD use with a doctype */ 2216 START_TEST(test_foreign_dtd_with_doctype) { 2217 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n" 2218 "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n"; 2219 const char *text2 = "<doc>&entity;</doc>"; 2220 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2221 2222 /* Check hash salt is passed through too */ 2223 XML_SetHashSalt(g_parser, 0x12345678); 2224 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2225 XML_SetUserData(g_parser, &test_data); 2226 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2227 /* Add a default handler to exercise more code paths */ 2228 XML_SetDefaultHandler(g_parser, dummy_default_handler); 2229 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2230 fail("Could not set foreign DTD"); 2231 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2232 == XML_STATUS_ERROR) 2233 xml_failure(g_parser); 2234 2235 /* Ensure that trying to set the DTD after parsing has started 2236 * is faulted, even if it's the same setting. 2237 */ 2238 if (XML_UseForeignDTD(g_parser, XML_TRUE) 2239 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) 2240 fail("Failed to reject late foreign DTD setting"); 2241 /* Ditto for the hash salt */ 2242 if (XML_SetHashSalt(g_parser, 0x23456789)) 2243 fail("Failed to reject late hash salt change"); 2244 2245 /* Now finish the parse */ 2246 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2247 == XML_STATUS_ERROR) 2248 xml_failure(g_parser); 2249 } 2250 END_TEST 2251 2252 /* Test XML_UseForeignDTD with no external subset present */ 2253 START_TEST(test_foreign_dtd_without_external_subset) { 2254 const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n" 2255 "<doc>&foo;</doc>"; 2256 2257 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2258 XML_SetUserData(g_parser, NULL); 2259 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); 2260 XML_UseForeignDTD(g_parser, XML_TRUE); 2261 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2262 == XML_STATUS_ERROR) 2263 xml_failure(g_parser); 2264 } 2265 END_TEST 2266 2267 START_TEST(test_empty_foreign_dtd) { 2268 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2269 "<doc>&entity;</doc>"; 2270 2271 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2272 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); 2273 XML_UseForeignDTD(g_parser, XML_TRUE); 2274 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 2275 "Undefined entity not faulted"); 2276 } 2277 END_TEST 2278 2279 /* Test XML Base is set and unset appropriately */ 2280 START_TEST(test_set_base) { 2281 const XML_Char *old_base; 2282 const XML_Char *new_base = XCS("/local/file/name.xml"); 2283 2284 old_base = XML_GetBase(g_parser); 2285 if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK) 2286 fail("Unable to set base"); 2287 if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0) 2288 fail("Base setting not correct"); 2289 if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK) 2290 fail("Unable to NULL base"); 2291 if (XML_GetBase(g_parser) != NULL) 2292 fail("Base setting not nulled"); 2293 XML_SetBase(g_parser, old_base); 2294 } 2295 END_TEST 2296 2297 /* Test attribute counts, indexing, etc */ 2298 START_TEST(test_attributes) { 2299 const char *text = "<!DOCTYPE doc [\n" 2300 "<!ELEMENT doc (tag)>\n" 2301 "<!ATTLIST doc id ID #REQUIRED>\n" 2302 "]>" 2303 "<doc a='1' id='one' b='2'>" 2304 "<tag c='3'/>" 2305 "</doc>"; 2306 AttrInfo doc_info[] = {{XCS("a"), XCS("1")}, 2307 {XCS("b"), XCS("2")}, 2308 {XCS("id"), XCS("one")}, 2309 {NULL, NULL}}; 2310 AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}}; 2311 ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL}, 2312 {XCS("tag"), 1, NULL, NULL}, 2313 {NULL, 0, NULL, NULL}}; 2314 info[0].attributes = doc_info; 2315 info[1].attributes = tag_info; 2316 2317 XML_SetStartElementHandler(g_parser, counting_start_element_handler); 2318 XML_SetUserData(g_parser, info); 2319 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2320 == XML_STATUS_ERROR) 2321 xml_failure(g_parser); 2322 } 2323 END_TEST 2324 2325 /* Test reset works correctly in the middle of processing an internal 2326 * entity. Exercises some obscure code in XML_ParserReset(). 2327 */ 2328 START_TEST(test_reset_in_entity) { 2329 const char *text = "<!DOCTYPE doc [\n" 2330 "<!ENTITY wombat 'wom'>\n" 2331 "<!ENTITY entity 'hi &wom; there'>\n" 2332 "]>\n" 2333 "<doc>&entity;</doc>"; 2334 XML_ParsingStatus status; 2335 2336 g_resumable = XML_TRUE; 2337 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2338 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2339 == XML_STATUS_ERROR) 2340 xml_failure(g_parser); 2341 XML_GetParsingStatus(g_parser, &status); 2342 if (status.parsing != XML_SUSPENDED) 2343 fail("Parsing status not SUSPENDED"); 2344 XML_ParserReset(g_parser, NULL); 2345 XML_GetParsingStatus(g_parser, &status); 2346 if (status.parsing != XML_INITIALIZED) 2347 fail("Parsing status doesn't reset to INITIALIZED"); 2348 } 2349 END_TEST 2350 2351 /* Test that resume correctly passes through parse errors */ 2352 START_TEST(test_resume_invalid_parse) { 2353 const char *text = "<doc>Hello</doc"; /* Missing closing wedge */ 2354 2355 g_resumable = XML_TRUE; 2356 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2357 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2358 == XML_STATUS_ERROR) 2359 xml_failure(g_parser); 2360 if (XML_ResumeParser(g_parser) == XML_STATUS_OK) 2361 fail("Resumed invalid parse not faulted"); 2362 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN) 2363 fail("Invalid parse not correctly faulted"); 2364 } 2365 END_TEST 2366 2367 /* Test that re-suspended parses are correctly passed through */ 2368 START_TEST(test_resume_resuspended) { 2369 const char *text = "<doc>Hello<meep/>world</doc>"; 2370 2371 g_resumable = XML_TRUE; 2372 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2373 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2374 == XML_STATUS_ERROR) 2375 xml_failure(g_parser); 2376 g_resumable = XML_TRUE; 2377 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2378 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED) 2379 fail("Resumption not suspended"); 2380 /* This one should succeed and finish up */ 2381 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 2382 xml_failure(g_parser); 2383 } 2384 END_TEST 2385 2386 /* Test that CDATA shows up correctly through a default handler */ 2387 START_TEST(test_cdata_default) { 2388 const char *text = "<doc><![CDATA[Hello\nworld]]></doc>"; 2389 const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>"); 2390 CharData storage; 2391 2392 CharData_Init(&storage); 2393 XML_SetUserData(g_parser, &storage); 2394 XML_SetDefaultHandler(g_parser, accumulate_characters); 2395 2396 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2397 == XML_STATUS_ERROR) 2398 xml_failure(g_parser); 2399 CharData_CheckXMLChars(&storage, expected); 2400 } 2401 END_TEST 2402 2403 /* Test resetting a subordinate parser does exactly nothing */ 2404 START_TEST(test_subordinate_reset) { 2405 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2406 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2407 "<doc>&entity;</doc>"; 2408 2409 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2410 XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter); 2411 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2412 == XML_STATUS_ERROR) 2413 xml_failure(g_parser); 2414 } 2415 END_TEST 2416 2417 /* Test suspending a subordinate parser */ 2418 START_TEST(test_subordinate_suspend) { 2419 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2420 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2421 "<doc>&entity;</doc>"; 2422 2423 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2424 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender); 2425 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2426 == XML_STATUS_ERROR) 2427 xml_failure(g_parser); 2428 } 2429 END_TEST 2430 2431 /* Test suspending a subordinate parser from an XML declaration */ 2432 /* Increases code coverage of the tests */ 2433 2434 START_TEST(test_subordinate_xdecl_suspend) { 2435 const char *text 2436 = "<!DOCTYPE doc [\n" 2437 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n" 2438 "]>\n" 2439 "<doc>&entity;</doc>"; 2440 2441 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2442 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl); 2443 g_resumable = XML_TRUE; 2444 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2445 == XML_STATUS_ERROR) 2446 xml_failure(g_parser); 2447 } 2448 END_TEST 2449 2450 START_TEST(test_subordinate_xdecl_abort) { 2451 const char *text 2452 = "<!DOCTYPE doc [\n" 2453 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n" 2454 "]>\n" 2455 "<doc>&entity;</doc>"; 2456 2457 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2458 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl); 2459 g_resumable = XML_FALSE; 2460 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2461 == XML_STATUS_ERROR) 2462 xml_failure(g_parser); 2463 } 2464 END_TEST 2465 2466 /* Test external entity fault handling with suspension */ 2467 START_TEST(test_ext_entity_invalid_suspended_parse) { 2468 const char *text = "<!DOCTYPE doc [\n" 2469 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2470 "]>\n" 2471 "<doc>&en;</doc>"; 2472 ExtFaults faults[] 2473 = {{"<?xml version='1.0' encoding='us-ascii'?><", 2474 "Incomplete element declaration not faulted", NULL, 2475 XML_ERROR_UNCLOSED_TOKEN}, 2476 {/* First two bytes of a three-byte char */ 2477 "<?xml version='1.0' encoding='utf-8'?>\xe2\x82", 2478 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, 2479 {NULL, NULL, NULL, XML_ERROR_NONE}}; 2480 ExtFaults *fault; 2481 2482 for (fault = &faults[0]; fault->parse_text != NULL; fault++) { 2483 set_subtest("%s", fault->parse_text); 2484 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2485 XML_SetExternalEntityRefHandler(g_parser, 2486 external_entity_suspending_faulter); 2487 XML_SetUserData(g_parser, fault); 2488 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 2489 "Parser did not report external entity error"); 2490 XML_ParserReset(g_parser, NULL); 2491 } 2492 } 2493 END_TEST 2494 2495 /* Test setting an explicit encoding */ 2496 START_TEST(test_explicit_encoding) { 2497 const char *text1 = "<doc>Hello "; 2498 const char *text2 = " World</doc>"; 2499 2500 /* Just check that we can set the encoding to NULL before starting */ 2501 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK) 2502 fail("Failed to initialise encoding to NULL"); 2503 /* Say we are UTF-8 */ 2504 if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK) 2505 fail("Failed to set explicit encoding"); 2506 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2507 == XML_STATUS_ERROR) 2508 xml_failure(g_parser); 2509 /* Try to switch encodings mid-parse */ 2510 if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR) 2511 fail("Allowed encoding change"); 2512 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2513 == XML_STATUS_ERROR) 2514 xml_failure(g_parser); 2515 /* Try now the parse is over */ 2516 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK) 2517 fail("Failed to unset encoding"); 2518 } 2519 END_TEST 2520 2521 /* Test handling of trailing CR (rather than newline) */ 2522 START_TEST(test_trailing_cr) { 2523 const char *text = "<doc>\r"; 2524 int found_cr; 2525 2526 /* Try with a character handler, for code coverage */ 2527 XML_SetCharacterDataHandler(g_parser, cr_cdata_handler); 2528 XML_SetUserData(g_parser, &found_cr); 2529 found_cr = 0; 2530 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2531 == XML_STATUS_OK) 2532 fail("Failed to fault unclosed doc"); 2533 if (found_cr == 0) 2534 fail("Did not catch the carriage return"); 2535 XML_ParserReset(g_parser, NULL); 2536 2537 /* Now with a default handler instead */ 2538 XML_SetDefaultHandler(g_parser, cr_cdata_handler); 2539 XML_SetUserData(g_parser, &found_cr); 2540 found_cr = 0; 2541 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2542 == XML_STATUS_OK) 2543 fail("Failed to fault unclosed doc"); 2544 if (found_cr == 0) 2545 fail("Did not catch default carriage return"); 2546 } 2547 END_TEST 2548 2549 /* Test trailing CR in an external entity parse */ 2550 START_TEST(test_ext_entity_trailing_cr) { 2551 const char *text = "<!DOCTYPE doc [\n" 2552 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2553 "]>\n" 2554 "<doc>&en;</doc>"; 2555 int found_cr; 2556 2557 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2558 XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher); 2559 XML_SetUserData(g_parser, &found_cr); 2560 found_cr = 0; 2561 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2562 != XML_STATUS_OK) 2563 xml_failure(g_parser); 2564 if (found_cr == 0) 2565 fail("No carriage return found"); 2566 XML_ParserReset(g_parser, NULL); 2567 2568 /* Try again with a different trailing CR */ 2569 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2570 XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher); 2571 XML_SetUserData(g_parser, &found_cr); 2572 found_cr = 0; 2573 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2574 != XML_STATUS_OK) 2575 xml_failure(g_parser); 2576 if (found_cr == 0) 2577 fail("No carriage return found"); 2578 } 2579 END_TEST 2580 2581 /* Test handling of trailing square bracket */ 2582 START_TEST(test_trailing_rsqb) { 2583 const char *text8 = "<doc>]"; 2584 const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000"; 2585 int found_rsqb; 2586 int text8_len = (int)strlen(text8); 2587 2588 XML_SetCharacterDataHandler(g_parser, rsqb_handler); 2589 XML_SetUserData(g_parser, &found_rsqb); 2590 found_rsqb = 0; 2591 if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE) 2592 == XML_STATUS_OK) 2593 fail("Failed to fault unclosed doc"); 2594 if (found_rsqb == 0) 2595 fail("Did not catch the right square bracket"); 2596 2597 /* Try again with a different encoding */ 2598 XML_ParserReset(g_parser, NULL); 2599 XML_SetCharacterDataHandler(g_parser, rsqb_handler); 2600 XML_SetUserData(g_parser, &found_rsqb); 2601 found_rsqb = 0; 2602 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1, 2603 XML_TRUE) 2604 == XML_STATUS_OK) 2605 fail("Failed to fault unclosed doc"); 2606 if (found_rsqb == 0) 2607 fail("Did not catch the right square bracket"); 2608 2609 /* And finally with a default handler */ 2610 XML_ParserReset(g_parser, NULL); 2611 XML_SetDefaultHandler(g_parser, rsqb_handler); 2612 XML_SetUserData(g_parser, &found_rsqb); 2613 found_rsqb = 0; 2614 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1, 2615 XML_TRUE) 2616 == XML_STATUS_OK) 2617 fail("Failed to fault unclosed doc"); 2618 if (found_rsqb == 0) 2619 fail("Did not catch the right square bracket"); 2620 } 2621 END_TEST 2622 2623 /* Test trailing right square bracket in an external entity parse */ 2624 START_TEST(test_ext_entity_trailing_rsqb) { 2625 const char *text = "<!DOCTYPE doc [\n" 2626 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2627 "]>\n" 2628 "<doc>&en;</doc>"; 2629 int found_rsqb; 2630 2631 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2632 XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher); 2633 XML_SetUserData(g_parser, &found_rsqb); 2634 found_rsqb = 0; 2635 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2636 != XML_STATUS_OK) 2637 xml_failure(g_parser); 2638 if (found_rsqb == 0) 2639 fail("No right square bracket found"); 2640 } 2641 END_TEST 2642 2643 /* Test CDATA handling in an external entity */ 2644 START_TEST(test_ext_entity_good_cdata) { 2645 const char *text = "<!DOCTYPE doc [\n" 2646 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2647 "]>\n" 2648 "<doc>&en;</doc>"; 2649 2650 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2651 XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii); 2652 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2653 != XML_STATUS_OK) 2654 xml_failure(g_parser); 2655 } 2656 END_TEST 2657 2658 /* Test user parameter settings */ 2659 START_TEST(test_user_parameters) { 2660 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2661 "<!-- Primary parse -->\n" 2662 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2663 "<doc>&entity;"; 2664 const char *epilog = "<!-- Back to primary parser -->\n" 2665 "</doc>"; 2666 2667 g_comment_count = 0; 2668 g_skip_count = 0; 2669 g_xdecl_count = 0; 2670 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2671 XML_SetXmlDeclHandler(g_parser, xml_decl_handler); 2672 XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker); 2673 XML_SetCommentHandler(g_parser, data_check_comment_handler); 2674 XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler); 2675 XML_UseParserAsHandlerArg(g_parser); 2676 XML_SetUserData(g_parser, (void *)1); 2677 g_handler_data = g_parser; 2678 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 2679 == XML_STATUS_ERROR) 2680 xml_failure(g_parser); 2681 /* Ensure we can't change policy mid-parse */ 2682 if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER)) 2683 fail("Changed param entity parsing policy while parsing"); 2684 if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE) 2685 == XML_STATUS_ERROR) 2686 xml_failure(g_parser); 2687 if (g_comment_count != 3) 2688 fail("Comment handler not invoked enough times"); 2689 if (g_skip_count != 1) 2690 fail("Skip handler not invoked enough times"); 2691 if (g_xdecl_count != 1) 2692 fail("XML declaration handler not invoked"); 2693 } 2694 END_TEST 2695 2696 /* Test that an explicit external entity handler argument replaces 2697 * the parser as the first argument. 2698 * 2699 * We do not call the first parameter to the external entity handler 2700 * 'parser' for once, since the first time the handler is called it 2701 * will actually be a text string. We need to be able to access the 2702 * global 'parser' variable to create our external entity parser from, 2703 * since there are code paths we need to ensure get executed. 2704 */ 2705 START_TEST(test_ext_entity_ref_parameter) { 2706 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2707 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2708 "<doc>&entity;</doc>"; 2709 2710 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2711 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker); 2712 /* Set a handler arg that is not NULL and not parser (which is 2713 * what NULL would cause to be passed. 2714 */ 2715 XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text); 2716 g_handler_data = text; 2717 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2718 == XML_STATUS_ERROR) 2719 xml_failure(g_parser); 2720 2721 /* Now try again with unset args */ 2722 XML_ParserReset(g_parser, NULL); 2723 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2724 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker); 2725 XML_SetExternalEntityRefHandlerArg(g_parser, NULL); 2726 g_handler_data = g_parser; 2727 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2728 == XML_STATUS_ERROR) 2729 xml_failure(g_parser); 2730 } 2731 END_TEST 2732 2733 /* Test the parsing of an empty string */ 2734 START_TEST(test_empty_parse) { 2735 const char *text = "<doc></doc>"; 2736 const char *partial = "<doc>"; 2737 2738 if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR) 2739 fail("Parsing empty string faulted"); 2740 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 2741 fail("Parsing final empty string not faulted"); 2742 if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS) 2743 fail("Parsing final empty string faulted for wrong reason"); 2744 2745 /* Now try with valid text before the empty end */ 2746 XML_ParserReset(g_parser, NULL); 2747 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 2748 == XML_STATUS_ERROR) 2749 xml_failure(g_parser); 2750 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR) 2751 fail("Parsing final empty string faulted"); 2752 2753 /* Now try with invalid text before the empty end */ 2754 XML_ParserReset(g_parser, NULL); 2755 if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial), 2756 XML_FALSE) 2757 == XML_STATUS_ERROR) 2758 xml_failure(g_parser); 2759 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 2760 fail("Parsing final incomplete empty string not faulted"); 2761 } 2762 END_TEST 2763 2764 /* Test odd corners of the XML_GetBuffer interface */ 2765 static enum XML_Status 2766 get_feature(enum XML_FeatureEnum feature_id, long *presult) { 2767 const XML_Feature *feature = XML_GetFeatureList(); 2768 2769 if (feature == NULL) 2770 return XML_STATUS_ERROR; 2771 for (; feature->feature != XML_FEATURE_END; feature++) { 2772 if (feature->feature == feature_id) { 2773 *presult = feature->value; 2774 return XML_STATUS_OK; 2775 } 2776 } 2777 return XML_STATUS_ERROR; 2778 } 2779 2780 /* Test odd corners of the XML_GetBuffer interface */ 2781 START_TEST(test_get_buffer_1) { 2782 const char *text = get_buffer_test_text; 2783 void *buffer; 2784 long context_bytes; 2785 2786 /* Attempt to allocate a negative length buffer */ 2787 if (XML_GetBuffer(g_parser, -12) != NULL) 2788 fail("Negative length buffer not failed"); 2789 2790 /* Now get a small buffer and extend it past valid length */ 2791 buffer = XML_GetBuffer(g_parser, 1536); 2792 if (buffer == NULL) 2793 fail("1.5K buffer failed"); 2794 assert(buffer != NULL); 2795 memcpy(buffer, text, strlen(text)); 2796 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE) 2797 == XML_STATUS_ERROR) 2798 xml_failure(g_parser); 2799 if (XML_GetBuffer(g_parser, INT_MAX) != NULL) 2800 fail("INT_MAX buffer not failed"); 2801 2802 /* Now try extending it a more reasonable but still too large 2803 * amount. The allocator in XML_GetBuffer() doubles the buffer 2804 * size until it exceeds the requested amount or INT_MAX. If it 2805 * exceeds INT_MAX, it rejects the request, so we want a request 2806 * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable, 2807 * with an extra byte just to ensure that the request is off any 2808 * boundary. The request will be inflated internally by 2809 * XML_CONTEXT_BYTES (if >=1), so we subtract that from our 2810 * request. 2811 */ 2812 if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK) 2813 context_bytes = 0; 2814 if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL) 2815 fail("INT_MAX- buffer not failed"); 2816 2817 /* Now try extending it a carefully crafted amount */ 2818 if (XML_GetBuffer(g_parser, 1000) == NULL) 2819 fail("1000 buffer failed"); 2820 } 2821 END_TEST 2822 2823 /* Test more corners of the XML_GetBuffer interface */ 2824 START_TEST(test_get_buffer_2) { 2825 const char *text = get_buffer_test_text; 2826 void *buffer; 2827 2828 /* Now get a decent buffer */ 2829 buffer = XML_GetBuffer(g_parser, 1536); 2830 if (buffer == NULL) 2831 fail("1.5K buffer failed"); 2832 assert(buffer != NULL); 2833 memcpy(buffer, text, strlen(text)); 2834 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE) 2835 == XML_STATUS_ERROR) 2836 xml_failure(g_parser); 2837 2838 /* Extend it, to catch a different code path */ 2839 if (XML_GetBuffer(g_parser, 1024) == NULL) 2840 fail("1024 buffer failed"); 2841 } 2842 END_TEST 2843 2844 /* Test for signed integer overflow CVE-2022-23852 */ 2845 #if XML_CONTEXT_BYTES > 0 2846 START_TEST(test_get_buffer_3_overflow) { 2847 XML_Parser parser = XML_ParserCreate(NULL); 2848 assert(parser != NULL); 2849 2850 const char *const text = "\n"; 2851 const int expectedKeepValue = (int)strlen(text); 2852 2853 // After this call, variable "keep" in XML_GetBuffer will 2854 // have value expectedKeepValue 2855 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), 2856 XML_FALSE /* isFinal */) 2857 == XML_STATUS_ERROR) 2858 xml_failure(parser); 2859 2860 assert(expectedKeepValue > 0); 2861 if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL) 2862 fail("enlarging buffer not failed"); 2863 2864 XML_ParserFree(parser); 2865 } 2866 END_TEST 2867 #endif // XML_CONTEXT_BYTES > 0 2868 2869 START_TEST(test_buffer_can_grow_to_max) { 2870 const char *const prefixes[] = { 2871 "", 2872 "<", 2873 "<x a='", 2874 "<doc><x a='", 2875 "<document><x a='", 2876 "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand" 2877 "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif" 2878 "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin" 2879 "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping" 2880 "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"}; 2881 const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]); 2882 int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow 2883 #if defined(__MINGW32__) && ! defined(__MINGW64__) 2884 // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB 2885 // Can we make a big allocation? 2886 void *big = malloc(maxbuf); 2887 if (! big) { 2888 // The big allocation failed. Let's be a little lenient. 2889 maxbuf = maxbuf / 2; 2890 } 2891 free(big); 2892 #endif 2893 2894 for (int i = 0; i < num_prefixes; ++i) { 2895 set_subtest("\"%s\"", prefixes[i]); 2896 XML_Parser parser = XML_ParserCreate(NULL); 2897 const int prefix_len = (int)strlen(prefixes[i]); 2898 const enum XML_Status s 2899 = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE); 2900 if (s != XML_STATUS_OK) 2901 xml_failure(parser); 2902 2903 // XML_CONTEXT_BYTES of the prefix may remain in the buffer; 2904 // subtracting the whole prefix is easiest, and close enough. 2905 assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL); 2906 // The limit should be consistent; no prefix should allow us to 2907 // reach above the max buffer size. 2908 assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL); 2909 XML_ParserFree(parser); 2910 } 2911 } 2912 END_TEST 2913 2914 START_TEST(test_getbuffer_allocates_on_zero_len) { 2915 for (int first_len = 1; first_len >= 0; first_len--) { 2916 set_subtest("with len=%d first", first_len); 2917 XML_Parser parser = XML_ParserCreate(NULL); 2918 assert_true(parser != NULL); 2919 assert_true(XML_GetBuffer(parser, first_len) != NULL); 2920 assert_true(XML_GetBuffer(parser, 0) != NULL); 2921 if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK) 2922 xml_failure(parser); 2923 XML_ParserFree(parser); 2924 } 2925 } 2926 END_TEST 2927 2928 /* Test position information macros */ 2929 START_TEST(test_byte_info_at_end) { 2930 const char *text = "<doc></doc>"; 2931 2932 if (XML_GetCurrentByteIndex(g_parser) != -1 2933 || XML_GetCurrentByteCount(g_parser) != 0) 2934 fail("Byte index/count incorrect at start of parse"); 2935 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2936 == XML_STATUS_ERROR) 2937 xml_failure(g_parser); 2938 /* At end, the count will be zero and the index the end of string */ 2939 if (XML_GetCurrentByteCount(g_parser) != 0) 2940 fail("Terminal byte count incorrect"); 2941 if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text)) 2942 fail("Terminal byte index incorrect"); 2943 } 2944 END_TEST 2945 2946 /* Test position information from errors */ 2947 #define PRE_ERROR_STR "<doc></" 2948 #define POST_ERROR_STR "wombat></doc>" 2949 START_TEST(test_byte_info_at_error) { 2950 const char *text = PRE_ERROR_STR POST_ERROR_STR; 2951 2952 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2953 == XML_STATUS_OK) 2954 fail("Syntax error not faulted"); 2955 if (XML_GetCurrentByteCount(g_parser) != 0) 2956 fail("Error byte count incorrect"); 2957 if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR)) 2958 fail("Error byte index incorrect"); 2959 } 2960 END_TEST 2961 #undef PRE_ERROR_STR 2962 #undef POST_ERROR_STR 2963 2964 /* Test position information in handler */ 2965 #define START_ELEMENT "<e>" 2966 #define CDATA_TEXT "Hello" 2967 #define END_ELEMENT "</e>" 2968 START_TEST(test_byte_info_at_cdata) { 2969 const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT; 2970 int offset, size; 2971 ByteTestData data; 2972 2973 /* Check initial context is empty */ 2974 if (XML_GetInputContext(g_parser, &offset, &size) != NULL) 2975 fail("Unexpected context at start of parse"); 2976 2977 data.start_element_len = (int)strlen(START_ELEMENT); 2978 data.cdata_len = (int)strlen(CDATA_TEXT); 2979 data.total_string_len = (int)strlen(text); 2980 XML_SetCharacterDataHandler(g_parser, byte_character_handler); 2981 XML_SetUserData(g_parser, &data); 2982 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) 2983 xml_failure(g_parser); 2984 } 2985 END_TEST 2986 #undef START_ELEMENT 2987 #undef CDATA_TEXT 2988 #undef END_ELEMENT 2989 2990 /* Test predefined entities are correctly recognised */ 2991 START_TEST(test_predefined_entities) { 2992 const char *text = "<doc><>&"'</doc>"; 2993 const XML_Char *expected = XCS("<doc><>&"'</doc>"); 2994 const XML_Char *result = XCS("<>&\"'"); 2995 CharData storage; 2996 2997 XML_SetDefaultHandler(g_parser, accumulate_characters); 2998 /* run_character_check uses XML_SetCharacterDataHandler(), which 2999 * unfortunately heads off a code path that we need to exercise. 3000 */ 3001 CharData_Init(&storage); 3002 XML_SetUserData(g_parser, &storage); 3003 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3004 == XML_STATUS_ERROR) 3005 xml_failure(g_parser); 3006 /* The default handler doesn't translate the entities */ 3007 CharData_CheckXMLChars(&storage, expected); 3008 3009 /* Now try again and check the translation */ 3010 XML_ParserReset(g_parser, NULL); 3011 run_character_check(text, result); 3012 } 3013 END_TEST 3014 3015 /* Regression test that an invalid tag in an external parameter 3016 * reference in an external DTD is correctly faulted. 3017 * 3018 * Only a few specific tags are legal in DTDs ignoring comments and 3019 * processing instructions, all of which begin with an exclamation 3020 * mark. "<el/>" is not one of them, so the parser should raise an 3021 * error on encountering it. 3022 */ 3023 START_TEST(test_invalid_tag_in_dtd) { 3024 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3025 "<doc></doc>\n"; 3026 3027 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3028 XML_SetExternalEntityRefHandler(g_parser, external_entity_param); 3029 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3030 "Invalid tag IN DTD external param not rejected"); 3031 } 3032 END_TEST 3033 3034 /* Test entities not quite the predefined ones are not mis-recognised */ 3035 START_TEST(test_not_predefined_entities) { 3036 const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>", 3037 "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL}; 3038 int i = 0; 3039 3040 while (text[i] != NULL) { 3041 expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY, 3042 "Undefined entity not rejected"); 3043 XML_ParserReset(g_parser, NULL); 3044 i++; 3045 } 3046 } 3047 END_TEST 3048 3049 /* Test conditional inclusion (IGNORE) */ 3050 START_TEST(test_ignore_section) { 3051 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3052 "<doc><e>&entity;</e></doc>"; 3053 const XML_Char *expected 3054 = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;"); 3055 CharData storage; 3056 3057 CharData_Init(&storage); 3058 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3059 XML_SetUserData(g_parser, &storage); 3060 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore); 3061 XML_SetDefaultHandler(g_parser, accumulate_characters); 3062 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3063 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3064 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3065 XML_SetStartElementHandler(g_parser, dummy_start_element); 3066 XML_SetEndElementHandler(g_parser, dummy_end_element); 3067 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3068 == XML_STATUS_ERROR) 3069 xml_failure(g_parser); 3070 CharData_CheckXMLChars(&storage, expected); 3071 } 3072 END_TEST 3073 3074 START_TEST(test_ignore_section_utf16) { 3075 const char text[] = 3076 /* <!DOCTYPE d SYSTEM 's'> */ 3077 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " 3078 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0" 3079 /* <d><e>&en;</e></d> */ 3080 "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0"; 3081 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;"); 3082 CharData storage; 3083 3084 CharData_Init(&storage); 3085 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3086 XML_SetUserData(g_parser, &storage); 3087 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16); 3088 XML_SetDefaultHandler(g_parser, accumulate_characters); 3089 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3090 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3091 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3092 XML_SetStartElementHandler(g_parser, dummy_start_element); 3093 XML_SetEndElementHandler(g_parser, dummy_end_element); 3094 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 3095 == XML_STATUS_ERROR) 3096 xml_failure(g_parser); 3097 CharData_CheckXMLChars(&storage, expected); 3098 } 3099 END_TEST 3100 3101 START_TEST(test_ignore_section_utf16_be) { 3102 const char text[] = 3103 /* <!DOCTYPE d SYSTEM 's'> */ 3104 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " 3105 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n" 3106 /* <d><e>&en;</e></d> */ 3107 "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>"; 3108 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;"); 3109 CharData storage; 3110 3111 CharData_Init(&storage); 3112 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3113 XML_SetUserData(g_parser, &storage); 3114 XML_SetExternalEntityRefHandler(g_parser, 3115 external_entity_load_ignore_utf16_be); 3116 XML_SetDefaultHandler(g_parser, accumulate_characters); 3117 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3118 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3119 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3120 XML_SetStartElementHandler(g_parser, dummy_start_element); 3121 XML_SetEndElementHandler(g_parser, dummy_end_element); 3122 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 3123 == XML_STATUS_ERROR) 3124 xml_failure(g_parser); 3125 CharData_CheckXMLChars(&storage, expected); 3126 } 3127 END_TEST 3128 3129 /* Test mis-formatted conditional exclusion */ 3130 START_TEST(test_bad_ignore_section) { 3131 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3132 "<doc><e>&entity;</e></doc>"; 3133 ExtFaults faults[] 3134 = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL, 3135 XML_ERROR_SYNTAX}, 3136 {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL, 3137 XML_ERROR_INVALID_TOKEN}, 3138 {/* FIrst two bytes of a three-byte char */ 3139 "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL, 3140 XML_ERROR_PARTIAL_CHAR}, 3141 {NULL, NULL, NULL, XML_ERROR_NONE}}; 3142 ExtFaults *fault; 3143 3144 for (fault = &faults[0]; fault->parse_text != NULL; fault++) { 3145 set_subtest("%s", fault->parse_text); 3146 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3147 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 3148 XML_SetUserData(g_parser, fault); 3149 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3150 "Incomplete IGNORE section not failed"); 3151 XML_ParserReset(g_parser, NULL); 3152 } 3153 } 3154 END_TEST 3155 3156 struct bom_testdata { 3157 const char *external; 3158 int split; 3159 XML_Bool nested_callback_happened; 3160 }; 3161 3162 static int XMLCALL 3163 external_bom_checker(XML_Parser parser, const XML_Char *context, 3164 const XML_Char *base, const XML_Char *systemId, 3165 const XML_Char *publicId) { 3166 const char *text; 3167 UNUSED_P(base); 3168 UNUSED_P(systemId); 3169 UNUSED_P(publicId); 3170 3171 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); 3172 if (ext_parser == NULL) 3173 fail("Could not create external entity parser"); 3174 3175 if (! xcstrcmp(systemId, XCS("004-2.ent"))) { 3176 struct bom_testdata *const testdata 3177 = (struct bom_testdata *)XML_GetUserData(parser); 3178 const char *const external = testdata->external; 3179 const int split = testdata->split; 3180 testdata->nested_callback_happened = XML_TRUE; 3181 3182 if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE) 3183 != XML_STATUS_OK) { 3184 xml_failure(ext_parser); 3185 } 3186 text = external + split; // the parse below will continue where we left off. 3187 } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) { 3188 text = "<!ELEMENT doc EMPTY>\n" 3189 "<!ENTITY % e1 SYSTEM '004-2.ent'>\n" 3190 "<!ENTITY % e2 '%e1;'>\n"; 3191 } else { 3192 fail("unknown systemId"); 3193 } 3194 3195 if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) 3196 != XML_STATUS_OK) 3197 xml_failure(ext_parser); 3198 3199 XML_ParserFree(ext_parser); 3200 return XML_STATUS_OK; 3201 } 3202 3203 /* regression test: BOM should be consumed when followed by a partial token. */ 3204 START_TEST(test_external_bom_consumed) { 3205 const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3206 "<doc></doc>\n"; 3207 const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>"; 3208 const int len = (int)strlen(external); 3209 for (int split = 0; split <= len; ++split) { 3210 set_subtest("split at byte %d", split); 3211 3212 struct bom_testdata testdata; 3213 testdata.external = external; 3214 testdata.split = split; 3215 testdata.nested_callback_happened = XML_FALSE; 3216 3217 XML_Parser parser = XML_ParserCreate(NULL); 3218 if (parser == NULL) { 3219 fail("Couldn't create parser"); 3220 } 3221 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3222 XML_SetExternalEntityRefHandler(parser, external_bom_checker); 3223 XML_SetUserData(parser, &testdata); 3224 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 3225 == XML_STATUS_ERROR) 3226 xml_failure(parser); 3227 if (! testdata.nested_callback_happened) { 3228 fail("ref handler not called"); 3229 } 3230 XML_ParserFree(parser); 3231 } 3232 } 3233 END_TEST 3234 3235 /* Test recursive parsing */ 3236 START_TEST(test_external_entity_values) { 3237 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3238 "<doc></doc>\n"; 3239 ExtFaults data_004_2[] = { 3240 {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE}, 3241 {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL, 3242 XML_ERROR_INVALID_TOKEN}, 3243 {"'wombat", "Unterminated string not faulted", NULL, 3244 XML_ERROR_UNCLOSED_TOKEN}, 3245 {"\xe2\x82", "Partial UTF-8 character not faulted", NULL, 3246 XML_ERROR_PARTIAL_CHAR}, 3247 {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE}, 3248 {"<?xml?>", "Malformed XML declaration not faulted", NULL, 3249 XML_ERROR_XML_DECL}, 3250 {/* UTF-8 BOM */ 3251 "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, 3252 XML_ERROR_NONE}, 3253 {"<?xml version='1.0' encoding='utf-8'?>\n$", 3254 "Invalid token after text declaration not faulted", NULL, 3255 XML_ERROR_INVALID_TOKEN}, 3256 {"<?xml version='1.0' encoding='utf-8'?>\n'wombat", 3257 "Unterminated string after text decl not faulted", NULL, 3258 XML_ERROR_UNCLOSED_TOKEN}, 3259 {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82", 3260 "Partial UTF-8 character after text decl not faulted", NULL, 3261 XML_ERROR_PARTIAL_CHAR}, 3262 {"%e1;", "Recursive parameter entity not faulted", NULL, 3263 XML_ERROR_RECURSIVE_ENTITY_REF}, 3264 {NULL, NULL, NULL, XML_ERROR_NONE}}; 3265 int i; 3266 3267 for (i = 0; data_004_2[i].parse_text != NULL; i++) { 3268 set_subtest("%s", data_004_2[i].parse_text); 3269 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3270 XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer); 3271 XML_SetUserData(g_parser, &data_004_2[i]); 3272 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3273 == XML_STATUS_ERROR) 3274 xml_failure(g_parser); 3275 XML_ParserReset(g_parser, NULL); 3276 } 3277 } 3278 END_TEST 3279 3280 /* Test the recursive parse interacts with a not standalone handler */ 3281 START_TEST(test_ext_entity_not_standalone) { 3282 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3283 "<doc></doc>"; 3284 3285 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3286 XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone); 3287 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3288 "Standalone rejection not caught"); 3289 } 3290 END_TEST 3291 3292 START_TEST(test_ext_entity_value_abort) { 3293 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3294 "<doc></doc>\n"; 3295 3296 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3297 XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter); 3298 g_resumable = XML_FALSE; 3299 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3300 == XML_STATUS_ERROR) 3301 xml_failure(g_parser); 3302 } 3303 END_TEST 3304 3305 START_TEST(test_bad_public_doctype) { 3306 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n" 3307 "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n" 3308 "<doc></doc>"; 3309 3310 /* Setting a handler provokes a particular code path */ 3311 XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler, 3312 dummy_end_doctype_handler); 3313 expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed"); 3314 } 3315 END_TEST 3316 3317 /* Test based on ibm/valid/P32/ibm32v04.xml */ 3318 START_TEST(test_attribute_enum_value) { 3319 const char *text = "<?xml version='1.0' standalone='no'?>\n" 3320 "<!DOCTYPE animal SYSTEM 'test.dtd'>\n" 3321 "<animal>This is a \n <a/> \n\nyellow tiger</animal>"; 3322 ExtTest dtd_data 3323 = {"<!ELEMENT animal (#PCDATA|a)*>\n" 3324 "<!ELEMENT a EMPTY>\n" 3325 "<!ATTLIST animal xml:space (default|preserve) 'preserve'>", 3326 NULL, NULL}; 3327 const XML_Char *expected = XCS("This is a \n \n\nyellow tiger"); 3328 3329 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3330 XML_SetUserData(g_parser, &dtd_data); 3331 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3332 /* An attribute list handler provokes a different code path */ 3333 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 3334 run_ext_character_check(text, &dtd_data, expected); 3335 } 3336 END_TEST 3337 3338 /* Slightly bizarrely, the library seems to silently ignore entity 3339 * definitions for predefined entities, even when they are wrong. The 3340 * language of the XML 1.0 spec is somewhat unhelpful as to what ought 3341 * to happen, so this is currently treated as acceptable. 3342 */ 3343 START_TEST(test_predefined_entity_redefinition) { 3344 const char *text = "<!DOCTYPE doc [\n" 3345 "<!ENTITY apos 'foo'>\n" 3346 "]>\n" 3347 "<doc>'</doc>"; 3348 run_character_check(text, XCS("'")); 3349 } 3350 END_TEST 3351 3352 /* Test that the parser stops processing the DTD after an unresolved 3353 * parameter entity is encountered. 3354 */ 3355 START_TEST(test_dtd_stop_processing) { 3356 const char *text = "<!DOCTYPE doc [\n" 3357 "%foo;\n" 3358 "<!ENTITY bar 'bas'>\n" 3359 "]><doc/>"; 3360 3361 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 3362 init_dummy_handlers(); 3363 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3364 == XML_STATUS_ERROR) 3365 xml_failure(g_parser); 3366 if (get_dummy_handler_flags() != 0) 3367 fail("DTD processing still going after undefined PE"); 3368 } 3369 END_TEST 3370 3371 /* Test public notations with no system ID */ 3372 START_TEST(test_public_notation_no_sysid) { 3373 const char *text = "<!DOCTYPE doc [\n" 3374 "<!NOTATION note PUBLIC 'foo'>\n" 3375 "<!ELEMENT doc EMPTY>\n" 3376 "]>\n<doc/>"; 3377 3378 init_dummy_handlers(); 3379 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); 3380 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3381 == XML_STATUS_ERROR) 3382 xml_failure(g_parser); 3383 if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG) 3384 fail("Notation declaration handler not called"); 3385 } 3386 END_TEST 3387 3388 START_TEST(test_nested_groups) { 3389 const char *text 3390 = "<!DOCTYPE doc [\n" 3391 "<!ELEMENT doc " 3392 /* Sixteen elements per line */ 3393 "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?," 3394 "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?" 3395 "))))))))))))))))))))))))))))))))>\n" 3396 "<!ELEMENT e EMPTY>" 3397 "]>\n" 3398 "<doc><e/></doc>"; 3399 CharData storage; 3400 3401 CharData_Init(&storage); 3402 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3403 XML_SetStartElementHandler(g_parser, record_element_start_handler); 3404 XML_SetUserData(g_parser, &storage); 3405 init_dummy_handlers(); 3406 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3407 == XML_STATUS_ERROR) 3408 xml_failure(g_parser); 3409 CharData_CheckXMLChars(&storage, XCS("doce")); 3410 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG) 3411 fail("Element handler not fired"); 3412 } 3413 END_TEST 3414 3415 START_TEST(test_group_choice) { 3416 const char *text = "<!DOCTYPE doc [\n" 3417 "<!ELEMENT doc (a|b|c)+>\n" 3418 "<!ELEMENT a EMPTY>\n" 3419 "<!ELEMENT b (#PCDATA)>\n" 3420 "<!ELEMENT c ANY>\n" 3421 "]>\n" 3422 "<doc>\n" 3423 "<a/>\n" 3424 "<b attr='foo'>This is a foo</b>\n" 3425 "<c></c>\n" 3426 "</doc>\n"; 3427 3428 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3429 init_dummy_handlers(); 3430 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3431 == XML_STATUS_ERROR) 3432 xml_failure(g_parser); 3433 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG) 3434 fail("Element handler flag not raised"); 3435 } 3436 END_TEST 3437 3438 START_TEST(test_standalone_parameter_entity) { 3439 const char *text = "<?xml version='1.0' standalone='yes'?>\n" 3440 "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n" 3441 "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n" 3442 "%entity;\n" 3443 "]>\n" 3444 "<doc></doc>"; 3445 char dtd_data[] = "<!ENTITY % e1 'foo'>\n"; 3446 3447 XML_SetUserData(g_parser, dtd_data); 3448 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3449 XML_SetExternalEntityRefHandler(g_parser, external_entity_public); 3450 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3451 == XML_STATUS_ERROR) 3452 xml_failure(g_parser); 3453 } 3454 END_TEST 3455 3456 /* Test skipping of parameter entity in an external DTD */ 3457 /* Derived from ibm/invalid/P69/ibm69i01.xml */ 3458 START_TEST(test_skipped_parameter_entity) { 3459 const char *text = "<?xml version='1.0'?>\n" 3460 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n" 3461 "<!ELEMENT root (#PCDATA|a)* >\n" 3462 "]>\n" 3463 "<root></root>"; 3464 ExtTest dtd_data = {"%pe2;", NULL, NULL}; 3465 3466 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3467 XML_SetUserData(g_parser, &dtd_data); 3468 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3469 XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler); 3470 init_dummy_handlers(); 3471 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3472 == XML_STATUS_ERROR) 3473 xml_failure(g_parser); 3474 if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG) 3475 fail("Skip handler not executed"); 3476 } 3477 END_TEST 3478 3479 /* Test recursive parameter entity definition rejected in external DTD */ 3480 START_TEST(test_recursive_external_parameter_entity) { 3481 const char *text = "<?xml version='1.0'?>\n" 3482 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n" 3483 "<!ELEMENT root (#PCDATA|a)* >\n" 3484 "]>\n" 3485 "<root></root>"; 3486 ExtFaults dtd_data = {"<!ENTITY % pe2 '%pe2;'>\n%pe2;", 3487 "Recursive external parameter entity not faulted", NULL, 3488 XML_ERROR_RECURSIVE_ENTITY_REF}; 3489 3490 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 3491 XML_SetUserData(g_parser, &dtd_data); 3492 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3493 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3494 "Recursive external parameter not spotted"); 3495 } 3496 END_TEST 3497 3498 /* Test undefined parameter entity in external entity handler */ 3499 START_TEST(test_undefined_ext_entity_in_external_dtd) { 3500 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3501 "<doc></doc>\n"; 3502 3503 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3504 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer); 3505 XML_SetUserData(g_parser, NULL); 3506 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3507 == XML_STATUS_ERROR) 3508 xml_failure(g_parser); 3509 3510 /* Now repeat without the external entity ref handler invoking 3511 * another copy of itself. 3512 */ 3513 XML_ParserReset(g_parser, NULL); 3514 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3515 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer); 3516 XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */ 3517 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3518 == XML_STATUS_ERROR) 3519 xml_failure(g_parser); 3520 } 3521 END_TEST 3522 3523 /* Test suspending the parse on receiving an XML declaration works */ 3524 START_TEST(test_suspend_xdecl) { 3525 const char *text = long_character_data_text; 3526 3527 XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler); 3528 XML_SetUserData(g_parser, g_parser); 3529 g_resumable = XML_TRUE; 3530 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3531 != XML_STATUS_SUSPENDED) 3532 xml_failure(g_parser); 3533 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 3534 xml_failure(g_parser); 3535 /* Attempt to start a new parse while suspended */ 3536 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3537 != XML_STATUS_ERROR) 3538 fail("Attempt to parse while suspended not faulted"); 3539 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) 3540 fail("Suspended parse not faulted with correct error"); 3541 } 3542 END_TEST 3543 3544 /* Test aborting the parse in an epilog works */ 3545 START_TEST(test_abort_epilog) { 3546 const char *text = "<doc></doc>\n\r\n"; 3547 XML_Char trigger_char = XCS('\r'); 3548 3549 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 3550 XML_SetUserData(g_parser, &trigger_char); 3551 g_resumable = XML_FALSE; 3552 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3553 != XML_STATUS_ERROR) 3554 fail("Abort not triggered"); 3555 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED) 3556 xml_failure(g_parser); 3557 } 3558 END_TEST 3559 3560 /* Test a different code path for abort in the epilog */ 3561 START_TEST(test_abort_epilog_2) { 3562 const char *text = "<doc></doc>\n"; 3563 XML_Char trigger_char = XCS('\n'); 3564 3565 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 3566 XML_SetUserData(g_parser, &trigger_char); 3567 g_resumable = XML_FALSE; 3568 expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered"); 3569 } 3570 END_TEST 3571 3572 /* Test suspension from the epilog */ 3573 START_TEST(test_suspend_epilog) { 3574 const char *text = "<doc></doc>\n"; 3575 XML_Char trigger_char = XCS('\n'); 3576 3577 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 3578 XML_SetUserData(g_parser, &trigger_char); 3579 g_resumable = XML_TRUE; 3580 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3581 != XML_STATUS_SUSPENDED) 3582 xml_failure(g_parser); 3583 } 3584 END_TEST 3585 3586 START_TEST(test_suspend_in_sole_empty_tag) { 3587 const char *text = "<doc/>"; 3588 enum XML_Status rc; 3589 3590 XML_SetEndElementHandler(g_parser, suspending_end_handler); 3591 XML_SetUserData(g_parser, g_parser); 3592 rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE); 3593 if (rc == XML_STATUS_ERROR) 3594 xml_failure(g_parser); 3595 else if (rc != XML_STATUS_SUSPENDED) 3596 fail("Suspend not triggered"); 3597 rc = XML_ResumeParser(g_parser); 3598 if (rc == XML_STATUS_ERROR) 3599 xml_failure(g_parser); 3600 else if (rc != XML_STATUS_OK) 3601 fail("Resume failed"); 3602 } 3603 END_TEST 3604 3605 START_TEST(test_unfinished_epilog) { 3606 const char *text = "<doc></doc><"; 3607 3608 expect_failure(text, XML_ERROR_UNCLOSED_TOKEN, 3609 "Incomplete epilog entry not faulted"); 3610 } 3611 END_TEST 3612 3613 START_TEST(test_partial_char_in_epilog) { 3614 const char *text = "<doc></doc>\xe2\x82"; 3615 3616 /* First check that no fault is raised if the parse is not finished */ 3617 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 3618 == XML_STATUS_ERROR) 3619 xml_failure(g_parser); 3620 /* Now check that it is faulted once we finish */ 3621 if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR) 3622 fail("Partial character in epilog not faulted"); 3623 if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR) 3624 xml_failure(g_parser); 3625 } 3626 END_TEST 3627 3628 /* Test resuming a parse suspended in entity substitution */ 3629 START_TEST(test_suspend_resume_internal_entity) { 3630 const char *text 3631 = "<!DOCTYPE doc [\n" 3632 "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n" 3633 "]>\n" 3634 "<doc>&foo;</doc>\n"; 3635 const XML_Char *expected1 = XCS("Hi"); 3636 const XML_Char *expected2 = XCS("HiHo"); 3637 CharData storage; 3638 3639 CharData_Init(&storage); 3640 XML_SetStartElementHandler(g_parser, start_element_suspender); 3641 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 3642 XML_SetUserData(g_parser, &storage); 3643 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 3644 // we won't know exactly how much input we actually managed to give Expat. 3645 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 3646 != XML_STATUS_SUSPENDED) 3647 xml_failure(g_parser); 3648 CharData_CheckXMLChars(&storage, XCS("")); 3649 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED) 3650 xml_failure(g_parser); 3651 CharData_CheckXMLChars(&storage, expected1); 3652 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 3653 xml_failure(g_parser); 3654 CharData_CheckXMLChars(&storage, expected2); 3655 } 3656 END_TEST 3657 3658 START_TEST(test_suspend_resume_internal_entity_issue_629) { 3659 const char *const text 3660 = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n" 3661 "<" 3662 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3663 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3664 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3665 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3666 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3667 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3668 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3669 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3670 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3671 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3672 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3673 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3674 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3675 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3676 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3677 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3678 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3679 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3680 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3681 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3682 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3683 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3684 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3685 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3686 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3687 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3688 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3689 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3690 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3691 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3692 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3693 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3694 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3695 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3696 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3697 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3698 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3699 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3700 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3701 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3702 "/>" 3703 "</b></a>"; 3704 const size_t firstChunkSizeBytes = 54; 3705 3706 XML_Parser parser = XML_ParserCreate(NULL); 3707 XML_SetUserData(parser, parser); 3708 XML_SetCommentHandler(parser, suspending_comment_handler); 3709 3710 if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE) 3711 != XML_STATUS_SUSPENDED) 3712 xml_failure(parser); 3713 if (XML_ResumeParser(parser) != XML_STATUS_OK) 3714 xml_failure(parser); 3715 if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes, 3716 (int)(strlen(text) - firstChunkSizeBytes), 3717 XML_TRUE) 3718 != XML_STATUS_OK) 3719 xml_failure(parser); 3720 XML_ParserFree(parser); 3721 } 3722 END_TEST 3723 3724 /* Test syntax error is caught at parse resumption */ 3725 START_TEST(test_resume_entity_with_syntax_error) { 3726 const char *text = "<!DOCTYPE doc [\n" 3727 "<!ENTITY foo '<suspend>Hi</wombat>'>\n" 3728 "]>\n" 3729 "<doc>&foo;</doc>\n"; 3730 3731 XML_SetStartElementHandler(g_parser, start_element_suspender); 3732 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3733 != XML_STATUS_SUSPENDED) 3734 xml_failure(g_parser); 3735 if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR) 3736 fail("Syntax error in entity not faulted"); 3737 if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH) 3738 xml_failure(g_parser); 3739 } 3740 END_TEST 3741 3742 /* Test suspending and resuming in a parameter entity substitution */ 3743 START_TEST(test_suspend_resume_parameter_entity) { 3744 const char *text = "<!DOCTYPE doc [\n" 3745 "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n" 3746 "%foo;\n" 3747 "]>\n" 3748 "<doc>Hello, world</doc>"; 3749 const XML_Char *expected = XCS("Hello, world"); 3750 CharData storage; 3751 3752 CharData_Init(&storage); 3753 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3754 XML_SetElementDeclHandler(g_parser, element_decl_suspender); 3755 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 3756 XML_SetUserData(g_parser, &storage); 3757 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 3758 != XML_STATUS_SUSPENDED) 3759 xml_failure(g_parser); 3760 CharData_CheckXMLChars(&storage, XCS("")); 3761 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 3762 xml_failure(g_parser); 3763 CharData_CheckXMLChars(&storage, expected); 3764 } 3765 END_TEST 3766 3767 /* Test attempting to use parser after an error is faulted */ 3768 START_TEST(test_restart_on_error) { 3769 const char *text = "<$doc><doc></doc>"; 3770 3771 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3772 != XML_STATUS_ERROR) 3773 fail("Invalid tag name not faulted"); 3774 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 3775 xml_failure(g_parser); 3776 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 3777 fail("Restarting invalid parse not faulted"); 3778 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 3779 xml_failure(g_parser); 3780 } 3781 END_TEST 3782 3783 /* Test that angle brackets in an attribute default value are faulted */ 3784 START_TEST(test_reject_lt_in_attribute_value) { 3785 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n" 3786 "<doc></doc>"; 3787 3788 expect_failure(text, XML_ERROR_INVALID_TOKEN, 3789 "Bad attribute default not faulted"); 3790 } 3791 END_TEST 3792 3793 START_TEST(test_reject_unfinished_param_in_att_value) { 3794 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n" 3795 "<doc></doc>"; 3796 3797 expect_failure(text, XML_ERROR_INVALID_TOKEN, 3798 "Bad attribute default not faulted"); 3799 } 3800 END_TEST 3801 3802 START_TEST(test_trailing_cr_in_att_value) { 3803 const char *text = "<doc a='value\r'/>"; 3804 3805 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3806 == XML_STATUS_ERROR) 3807 xml_failure(g_parser); 3808 } 3809 END_TEST 3810 3811 /* Try parsing a general entity within a parameter entity in a 3812 * standalone internal DTD. Covers a corner case in the parser. 3813 */ 3814 START_TEST(test_standalone_internal_entity) { 3815 const char *text = "<?xml version='1.0' standalone='yes' ?>\n" 3816 "<!DOCTYPE doc [\n" 3817 " <!ELEMENT doc (#PCDATA)>\n" 3818 " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"≥\">'>\n" 3819 " <!ENTITY ge 'AttDefaultValue'>\n" 3820 " %pe;\n" 3821 "]>\n" 3822 "<doc att2='any'/>"; 3823 3824 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3825 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3826 == XML_STATUS_ERROR) 3827 xml_failure(g_parser); 3828 } 3829 END_TEST 3830 3831 /* Test that a reference to an unknown external entity is skipped */ 3832 START_TEST(test_skipped_external_entity) { 3833 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" 3834 "<doc></doc>\n"; 3835 ExtTest test_data = {"<!ELEMENT doc EMPTY>\n" 3836 "<!ENTITY % e2 '%e1;'>\n", 3837 NULL, NULL}; 3838 3839 XML_SetUserData(g_parser, &test_data); 3840 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3841 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3842 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3843 == XML_STATUS_ERROR) 3844 xml_failure(g_parser); 3845 } 3846 END_TEST 3847 3848 /* Test a different form of unknown external entity */ 3849 START_TEST(test_skipped_null_loaded_ext_entity) { 3850 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" 3851 "<doc />"; 3852 ExtHdlrData test_data 3853 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n" 3854 "<!ENTITY % pe2 '%pe1;'>\n" 3855 "%pe2;\n", 3856 external_entity_null_loader}; 3857 3858 XML_SetUserData(g_parser, &test_data); 3859 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3860 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader); 3861 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3862 == XML_STATUS_ERROR) 3863 xml_failure(g_parser); 3864 } 3865 END_TEST 3866 3867 START_TEST(test_skipped_unloaded_ext_entity) { 3868 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" 3869 "<doc />"; 3870 ExtHdlrData test_data 3871 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n" 3872 "<!ENTITY % pe2 '%pe1;'>\n" 3873 "%pe2;\n", 3874 NULL}; 3875 3876 XML_SetUserData(g_parser, &test_data); 3877 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3878 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader); 3879 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3880 == XML_STATUS_ERROR) 3881 xml_failure(g_parser); 3882 } 3883 END_TEST 3884 3885 /* Test that a parameter entity value ending with a carriage return 3886 * has it translated internally into a newline. 3887 */ 3888 START_TEST(test_param_entity_with_trailing_cr) { 3889 #define PARAM_ENTITY_NAME "pe" 3890 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">" 3891 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" 3892 "<doc/>"; 3893 ExtTest test_data 3894 = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n" 3895 "%" PARAM_ENTITY_NAME ";\n", 3896 NULL, NULL}; 3897 3898 XML_SetUserData(g_parser, &test_data); 3899 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3900 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3901 XML_SetEntityDeclHandler(g_parser, param_entity_match_handler); 3902 param_entity_match_init(XCS(PARAM_ENTITY_NAME), 3903 XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n")); 3904 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3905 == XML_STATUS_ERROR) 3906 xml_failure(g_parser); 3907 int entity_match_flag = get_param_entity_match_flag(); 3908 if (entity_match_flag == ENTITY_MATCH_FAIL) 3909 fail("Parameter entity CR->NEWLINE conversion failed"); 3910 else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND) 3911 fail("Parameter entity not parsed"); 3912 } 3913 #undef PARAM_ENTITY_NAME 3914 #undef PARAM_ENTITY_CORE_VALUE 3915 END_TEST 3916 3917 START_TEST(test_invalid_character_entity) { 3918 const char *text = "<!DOCTYPE doc [\n" 3919 " <!ENTITY entity '�'>\n" 3920 "]>\n" 3921 "<doc>&entity;</doc>"; 3922 3923 expect_failure(text, XML_ERROR_BAD_CHAR_REF, 3924 "Out of range character reference not faulted"); 3925 } 3926 END_TEST 3927 3928 START_TEST(test_invalid_character_entity_2) { 3929 const char *text = "<!DOCTYPE doc [\n" 3930 " <!ENTITY entity '&#xg0;'>\n" 3931 "]>\n" 3932 "<doc>&entity;</doc>"; 3933 3934 expect_failure(text, XML_ERROR_INVALID_TOKEN, 3935 "Out of range character reference not faulted"); 3936 } 3937 END_TEST 3938 3939 START_TEST(test_invalid_character_entity_3) { 3940 const char text[] = 3941 /* <!DOCTYPE doc [\n */ 3942 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" 3943 /* U+0E04 = KHO KHWAI 3944 * U+0E08 = CHO CHAN */ 3945 /* <!ENTITY entity '&\u0e04\u0e08;'>\n */ 3946 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 " 3947 "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n" 3948 /* ]>\n */ 3949 "\0]\0>\0\n" 3950 /* <doc>&entity;</doc> */ 3951 "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>"; 3952 3953 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 3954 != XML_STATUS_ERROR) 3955 fail("Invalid start of entity name not faulted"); 3956 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY) 3957 xml_failure(g_parser); 3958 } 3959 END_TEST 3960 3961 START_TEST(test_invalid_character_entity_4) { 3962 const char *text = "<!DOCTYPE doc [\n" 3963 " <!ENTITY entity '�'>\n" /* = � */ 3964 "]>\n" 3965 "<doc>&entity;</doc>"; 3966 3967 expect_failure(text, XML_ERROR_BAD_CHAR_REF, 3968 "Out of range character reference not faulted"); 3969 } 3970 END_TEST 3971 3972 /* Test that processing instructions are picked up by a default handler */ 3973 START_TEST(test_pi_handled_in_default) { 3974 const char *text = "<?test processing instruction?>\n<doc/>"; 3975 const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>"); 3976 CharData storage; 3977 3978 CharData_Init(&storage); 3979 XML_SetDefaultHandler(g_parser, accumulate_characters); 3980 XML_SetUserData(g_parser, &storage); 3981 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3982 == XML_STATUS_ERROR) 3983 xml_failure(g_parser); 3984 CharData_CheckXMLChars(&storage, expected); 3985 } 3986 END_TEST 3987 3988 /* Test that comments are picked up by a default handler */ 3989 START_TEST(test_comment_handled_in_default) { 3990 const char *text = "<!-- This is a comment -->\n<doc/>"; 3991 const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>"); 3992 CharData storage; 3993 3994 CharData_Init(&storage); 3995 XML_SetDefaultHandler(g_parser, accumulate_characters); 3996 XML_SetUserData(g_parser, &storage); 3997 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3998 == XML_STATUS_ERROR) 3999 xml_failure(g_parser); 4000 CharData_CheckXMLChars(&storage, expected); 4001 } 4002 END_TEST 4003 4004 /* Test PIs that look almost but not quite like XML declarations */ 4005 START_TEST(test_pi_yml) { 4006 const char *text = "<?yml something like data?><doc/>"; 4007 const XML_Char *expected = XCS("yml: something like data\n"); 4008 CharData storage; 4009 4010 CharData_Init(&storage); 4011 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4012 XML_SetUserData(g_parser, &storage); 4013 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4014 == XML_STATUS_ERROR) 4015 xml_failure(g_parser); 4016 CharData_CheckXMLChars(&storage, expected); 4017 } 4018 END_TEST 4019 4020 START_TEST(test_pi_xnl) { 4021 const char *text = "<?xnl nothing like data?><doc/>"; 4022 const XML_Char *expected = XCS("xnl: nothing like data\n"); 4023 CharData storage; 4024 4025 CharData_Init(&storage); 4026 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4027 XML_SetUserData(g_parser, &storage); 4028 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4029 == XML_STATUS_ERROR) 4030 xml_failure(g_parser); 4031 CharData_CheckXMLChars(&storage, expected); 4032 } 4033 END_TEST 4034 4035 START_TEST(test_pi_xmm) { 4036 const char *text = "<?xmm everything like data?><doc/>"; 4037 const XML_Char *expected = XCS("xmm: everything like data\n"); 4038 CharData storage; 4039 4040 CharData_Init(&storage); 4041 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4042 XML_SetUserData(g_parser, &storage); 4043 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4044 == XML_STATUS_ERROR) 4045 xml_failure(g_parser); 4046 CharData_CheckXMLChars(&storage, expected); 4047 } 4048 END_TEST 4049 4050 START_TEST(test_utf16_pi) { 4051 const char text[] = 4052 /* <?{KHO KHWAI}{CHO CHAN}?> 4053 * where {KHO KHWAI} = U+0E04 4054 * and {CHO CHAN} = U+0E08 4055 */ 4056 "<\0?\0\x04\x0e\x08\x0e?\0>\0" 4057 /* <q/> */ 4058 "<\0q\0/\0>\0"; 4059 #ifdef XML_UNICODE 4060 const XML_Char *expected = XCS("\x0e04\x0e08: \n"); 4061 #else 4062 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n"); 4063 #endif 4064 CharData storage; 4065 4066 CharData_Init(&storage); 4067 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4068 XML_SetUserData(g_parser, &storage); 4069 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4070 == XML_STATUS_ERROR) 4071 xml_failure(g_parser); 4072 CharData_CheckXMLChars(&storage, expected); 4073 } 4074 END_TEST 4075 4076 START_TEST(test_utf16_be_pi) { 4077 const char text[] = 4078 /* <?{KHO KHWAI}{CHO CHAN}?> 4079 * where {KHO KHWAI} = U+0E04 4080 * and {CHO CHAN} = U+0E08 4081 */ 4082 "\0<\0?\x0e\x04\x0e\x08\0?\0>" 4083 /* <q/> */ 4084 "\0<\0q\0/\0>"; 4085 #ifdef XML_UNICODE 4086 const XML_Char *expected = XCS("\x0e04\x0e08: \n"); 4087 #else 4088 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n"); 4089 #endif 4090 CharData storage; 4091 4092 CharData_Init(&storage); 4093 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4094 XML_SetUserData(g_parser, &storage); 4095 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4096 == XML_STATUS_ERROR) 4097 xml_failure(g_parser); 4098 CharData_CheckXMLChars(&storage, expected); 4099 } 4100 END_TEST 4101 4102 /* Test that comments can be picked up and translated */ 4103 START_TEST(test_utf16_be_comment) { 4104 const char text[] = 4105 /* <!-- Comment A --> */ 4106 "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n" 4107 /* <doc/> */ 4108 "\0<\0d\0o\0c\0/\0>"; 4109 const XML_Char *expected = XCS(" Comment A "); 4110 CharData storage; 4111 4112 CharData_Init(&storage); 4113 XML_SetCommentHandler(g_parser, accumulate_comment); 4114 XML_SetUserData(g_parser, &storage); 4115 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4116 == XML_STATUS_ERROR) 4117 xml_failure(g_parser); 4118 CharData_CheckXMLChars(&storage, expected); 4119 } 4120 END_TEST 4121 4122 START_TEST(test_utf16_le_comment) { 4123 const char text[] = 4124 /* <!-- Comment B --> */ 4125 "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0" 4126 /* <doc/> */ 4127 "<\0d\0o\0c\0/\0>\0"; 4128 const XML_Char *expected = XCS(" Comment B "); 4129 CharData storage; 4130 4131 CharData_Init(&storage); 4132 XML_SetCommentHandler(g_parser, accumulate_comment); 4133 XML_SetUserData(g_parser, &storage); 4134 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4135 == XML_STATUS_ERROR) 4136 xml_failure(g_parser); 4137 CharData_CheckXMLChars(&storage, expected); 4138 } 4139 END_TEST 4140 4141 /* Test that the unknown encoding handler with map entries that expect 4142 * conversion but no conversion function is faulted 4143 */ 4144 START_TEST(test_missing_encoding_conversion_fn) { 4145 const char *text = "<?xml version='1.0' encoding='no-conv'?>\n" 4146 "<doc>\x81</doc>"; 4147 4148 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4149 /* MiscEncodingHandler sets up an encoding with every top-bit-set 4150 * character introducing a two-byte sequence. For this, it 4151 * requires a convert function. The above function call doesn't 4152 * pass one through, so when BadEncodingHandler actually gets 4153 * called it should supply an invalid encoding. 4154 */ 4155 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4156 "Encoding with missing convert() not faulted"); 4157 } 4158 END_TEST 4159 4160 START_TEST(test_failing_encoding_conversion_fn) { 4161 const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n" 4162 "<doc>\x81</doc>"; 4163 4164 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4165 /* BadEncodingHandler sets up an encoding with every top-bit-set 4166 * character introducing a two-byte sequence. For this, it 4167 * requires a convert function. The above function call passes 4168 * one that insists all possible sequences are invalid anyway. 4169 */ 4170 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4171 "Encoding with failing convert() not faulted"); 4172 } 4173 END_TEST 4174 4175 /* Test unknown encoding conversions */ 4176 START_TEST(test_unknown_encoding_success) { 4177 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4178 /* Equivalent to <eoc>Hello, world</eoc> */ 4179 "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>"; 4180 4181 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4182 run_character_check(text, XCS("Hello, world")); 4183 } 4184 END_TEST 4185 4186 /* Test bad name character in unknown encoding */ 4187 START_TEST(test_unknown_encoding_bad_name) { 4188 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4189 "<\xff\x64oc>Hello, world</\xff\x64oc>"; 4190 4191 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4192 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4193 "Bad name start in unknown encoding not faulted"); 4194 } 4195 END_TEST 4196 4197 /* Test bad mid-name character in unknown encoding */ 4198 START_TEST(test_unknown_encoding_bad_name_2) { 4199 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4200 "<d\xffoc>Hello, world</d\xffoc>"; 4201 4202 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4203 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4204 "Bad name in unknown encoding not faulted"); 4205 } 4206 END_TEST 4207 4208 /* Test element name that is long enough to fill the conversion buffer 4209 * in an unknown encoding, finishing with an encoded character. 4210 */ 4211 START_TEST(test_unknown_encoding_long_name_1) { 4212 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4213 "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>" 4214 "Hi" 4215 "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"; 4216 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop"); 4217 CharData storage; 4218 4219 CharData_Init(&storage); 4220 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4221 XML_SetStartElementHandler(g_parser, record_element_start_handler); 4222 XML_SetUserData(g_parser, &storage); 4223 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4224 == XML_STATUS_ERROR) 4225 xml_failure(g_parser); 4226 CharData_CheckXMLChars(&storage, expected); 4227 } 4228 END_TEST 4229 4230 /* Test element name that is long enough to fill the conversion buffer 4231 * in an unknown encoding, finishing with an simple character. 4232 */ 4233 START_TEST(test_unknown_encoding_long_name_2) { 4234 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4235 "<abcdefghabcdefghabcdefghijklmnop>" 4236 "Hi" 4237 "</abcdefghabcdefghabcdefghijklmnop>"; 4238 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop"); 4239 CharData storage; 4240 4241 CharData_Init(&storage); 4242 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4243 XML_SetStartElementHandler(g_parser, record_element_start_handler); 4244 XML_SetUserData(g_parser, &storage); 4245 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4246 == XML_STATUS_ERROR) 4247 xml_failure(g_parser); 4248 CharData_CheckXMLChars(&storage, expected); 4249 } 4250 END_TEST 4251 4252 START_TEST(test_invalid_unknown_encoding) { 4253 const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n" 4254 "<doc>Hello world</doc>"; 4255 4256 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4257 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4258 "Invalid unknown encoding not faulted"); 4259 } 4260 END_TEST 4261 4262 START_TEST(test_unknown_ascii_encoding_ok) { 4263 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n" 4264 "<doc>Hello, world</doc>"; 4265 4266 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4267 run_character_check(text, XCS("Hello, world")); 4268 } 4269 END_TEST 4270 4271 START_TEST(test_unknown_ascii_encoding_fail) { 4272 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n" 4273 "<doc>Hello, \x80 world</doc>"; 4274 4275 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4276 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4277 "Invalid character not faulted"); 4278 } 4279 END_TEST 4280 4281 START_TEST(test_unknown_encoding_invalid_length) { 4282 const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n" 4283 "<doc>Hello, world</doc>"; 4284 4285 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4286 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4287 "Invalid unknown encoding not faulted"); 4288 } 4289 END_TEST 4290 4291 START_TEST(test_unknown_encoding_invalid_topbit) { 4292 const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n" 4293 "<doc>Hello, world</doc>"; 4294 4295 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4296 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4297 "Invalid unknown encoding not faulted"); 4298 } 4299 END_TEST 4300 4301 START_TEST(test_unknown_encoding_invalid_surrogate) { 4302 const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n" 4303 "<doc>Hello, \x82 world</doc>"; 4304 4305 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4306 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4307 "Invalid unknown encoding not faulted"); 4308 } 4309 END_TEST 4310 4311 START_TEST(test_unknown_encoding_invalid_high) { 4312 const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n" 4313 "<doc>Hello, world</doc>"; 4314 4315 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4316 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4317 "Invalid unknown encoding not faulted"); 4318 } 4319 END_TEST 4320 4321 START_TEST(test_unknown_encoding_invalid_attr_value) { 4322 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4323 "<doc attr='\xff\x30'/>"; 4324 4325 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4326 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4327 "Invalid attribute valid not faulted"); 4328 } 4329 END_TEST 4330 4331 /* Test an external entity parser set to use latin-1 detects UTF-16 4332 * BOMs correctly. 4333 */ 4334 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */ 4335 START_TEST(test_ext_entity_latin1_utf16le_bom) { 4336 const char *text = "<!DOCTYPE doc [\n" 4337 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4338 "]>\n" 4339 "<doc>&en;</doc>"; 4340 ExtTest2 test_data 4341 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4342 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4343 * 0x4c = L and 0x20 is a space 4344 */ 4345 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL}; 4346 #ifdef XML_UNICODE 4347 const XML_Char *expected = XCS("\x00ff\x00feL "); 4348 #else 4349 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4350 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL "); 4351 #endif 4352 CharData storage; 4353 4354 CharData_Init(&storage); 4355 test_data.storage = &storage; 4356 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4357 XML_SetUserData(g_parser, &test_data); 4358 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4359 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4360 == XML_STATUS_ERROR) 4361 xml_failure(g_parser); 4362 CharData_CheckXMLChars(&storage, expected); 4363 } 4364 END_TEST 4365 4366 START_TEST(test_ext_entity_latin1_utf16be_bom) { 4367 const char *text = "<!DOCTYPE doc [\n" 4368 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4369 "]>\n" 4370 "<doc>&en;</doc>"; 4371 ExtTest2 test_data 4372 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4373 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4374 * 0x4c = L and 0x20 is a space 4375 */ 4376 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL}; 4377 #ifdef XML_UNICODE 4378 const XML_Char *expected = XCS("\x00fe\x00ff L"); 4379 #else 4380 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4381 const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L"); 4382 #endif 4383 CharData storage; 4384 4385 CharData_Init(&storage); 4386 test_data.storage = &storage; 4387 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4388 XML_SetUserData(g_parser, &test_data); 4389 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4390 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4391 == XML_STATUS_ERROR) 4392 xml_failure(g_parser); 4393 CharData_CheckXMLChars(&storage, expected); 4394 } 4395 END_TEST 4396 4397 /* Parsing the full buffer rather than a byte at a time makes a 4398 * difference to the encoding scanning code, so repeat the above tests 4399 * without breaking them down by byte. 4400 */ 4401 START_TEST(test_ext_entity_latin1_utf16le_bom2) { 4402 const char *text = "<!DOCTYPE doc [\n" 4403 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4404 "]>\n" 4405 "<doc>&en;</doc>"; 4406 ExtTest2 test_data 4407 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4408 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4409 * 0x4c = L and 0x20 is a space 4410 */ 4411 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL}; 4412 #ifdef XML_UNICODE 4413 const XML_Char *expected = XCS("\x00ff\x00feL "); 4414 #else 4415 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4416 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL "); 4417 #endif 4418 CharData storage; 4419 4420 CharData_Init(&storage); 4421 test_data.storage = &storage; 4422 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4423 XML_SetUserData(g_parser, &test_data); 4424 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4425 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4426 == XML_STATUS_ERROR) 4427 xml_failure(g_parser); 4428 CharData_CheckXMLChars(&storage, expected); 4429 } 4430 END_TEST 4431 4432 START_TEST(test_ext_entity_latin1_utf16be_bom2) { 4433 const char *text = "<!DOCTYPE doc [\n" 4434 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4435 "]>\n" 4436 "<doc>&en;</doc>"; 4437 ExtTest2 test_data 4438 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4439 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4440 * 0x4c = L and 0x20 is a space 4441 */ 4442 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL}; 4443 #ifdef XML_UNICODE 4444 const XML_Char *expected = XCS("\x00fe\x00ff L"); 4445 #else 4446 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4447 const XML_Char *expected = "\xc3\xbe\xc3\xbf L"; 4448 #endif 4449 CharData storage; 4450 4451 CharData_Init(&storage); 4452 test_data.storage = &storage; 4453 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4454 XML_SetUserData(g_parser, &test_data); 4455 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4456 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4457 == XML_STATUS_ERROR) 4458 xml_failure(g_parser); 4459 CharData_CheckXMLChars(&storage, expected); 4460 } 4461 END_TEST 4462 4463 /* Test little-endian UTF-16 given an explicit big-endian encoding */ 4464 START_TEST(test_ext_entity_utf16_be) { 4465 const char *text = "<!DOCTYPE doc [\n" 4466 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4467 "]>\n" 4468 "<doc>&en;</doc>"; 4469 ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL}; 4470 #ifdef XML_UNICODE 4471 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00"); 4472 #else 4473 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */ 4474 "\xe6\x94\x80" /* U+6500 */ 4475 "\xe2\xbc\x80" /* U+2F00 */ 4476 "\xe3\xb8\x80"); /* U+3E00 */ 4477 #endif 4478 CharData storage; 4479 4480 CharData_Init(&storage); 4481 test_data.storage = &storage; 4482 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4483 XML_SetUserData(g_parser, &test_data); 4484 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4485 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4486 == XML_STATUS_ERROR) 4487 xml_failure(g_parser); 4488 CharData_CheckXMLChars(&storage, expected); 4489 } 4490 END_TEST 4491 4492 /* Test big-endian UTF-16 given an explicit little-endian encoding */ 4493 START_TEST(test_ext_entity_utf16_le) { 4494 const char *text = "<!DOCTYPE doc [\n" 4495 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4496 "]>\n" 4497 "<doc>&en;</doc>"; 4498 ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL}; 4499 #ifdef XML_UNICODE 4500 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00"); 4501 #else 4502 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */ 4503 "\xe6\x94\x80" /* U+6500 */ 4504 "\xe2\xbc\x80" /* U+2F00 */ 4505 "\xe3\xb8\x80"); /* U+3E00 */ 4506 #endif 4507 CharData storage; 4508 4509 CharData_Init(&storage); 4510 test_data.storage = &storage; 4511 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4512 XML_SetUserData(g_parser, &test_data); 4513 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4514 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4515 == XML_STATUS_ERROR) 4516 xml_failure(g_parser); 4517 CharData_CheckXMLChars(&storage, expected); 4518 } 4519 END_TEST 4520 4521 /* Test little-endian UTF-16 given no explicit encoding. 4522 * The existing default encoding (UTF-8) is assumed to hold without a 4523 * BOM to contradict it, so the entity value will in fact provoke an 4524 * error because 0x00 is not a valid XML character. We parse the 4525 * whole buffer in one go rather than feeding it in byte by byte to 4526 * exercise different code paths in the initial scanning routines. 4527 */ 4528 START_TEST(test_ext_entity_utf16_unknown) { 4529 const char *text = "<!DOCTYPE doc [\n" 4530 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4531 "]>\n" 4532 "<doc>&en;</doc>"; 4533 ExtFaults2 test_data 4534 = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL, 4535 XML_ERROR_INVALID_TOKEN}; 4536 4537 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2); 4538 XML_SetUserData(g_parser, &test_data); 4539 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 4540 "Invalid character should not have been accepted"); 4541 } 4542 END_TEST 4543 4544 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */ 4545 START_TEST(test_ext_entity_utf8_non_bom) { 4546 const char *text = "<!DOCTYPE doc [\n" 4547 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4548 "]>\n" 4549 "<doc>&en;</doc>"; 4550 ExtTest2 test_data 4551 = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */ 4552 3, NULL, NULL}; 4553 #ifdef XML_UNICODE 4554 const XML_Char *expected = XCS("\xfec0"); 4555 #else 4556 const XML_Char *expected = XCS("\xef\xbb\x80"); 4557 #endif 4558 CharData storage; 4559 4560 CharData_Init(&storage); 4561 test_data.storage = &storage; 4562 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4563 XML_SetUserData(g_parser, &test_data); 4564 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4565 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4566 == XML_STATUS_ERROR) 4567 xml_failure(g_parser); 4568 CharData_CheckXMLChars(&storage, expected); 4569 } 4570 END_TEST 4571 4572 /* Test that UTF-8 in a CDATA section is correctly passed through */ 4573 START_TEST(test_utf8_in_cdata_section) { 4574 const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>"; 4575 #ifdef XML_UNICODE 4576 const XML_Char *expected = XCS("one \x00e9 two"); 4577 #else 4578 const XML_Char *expected = XCS("one \xc3\xa9 two"); 4579 #endif 4580 4581 run_character_check(text, expected); 4582 } 4583 END_TEST 4584 4585 /* Test that little-endian UTF-16 in a CDATA section is handled */ 4586 START_TEST(test_utf8_in_cdata_section_2) { 4587 const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>"; 4588 #ifdef XML_UNICODE 4589 const XML_Char *expected = XCS("\x00e9]\x00e9two"); 4590 #else 4591 const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two"); 4592 #endif 4593 4594 run_character_check(text, expected); 4595 } 4596 END_TEST 4597 4598 START_TEST(test_utf8_in_start_tags) { 4599 struct test_case { 4600 bool goodName; 4601 bool goodNameStart; 4602 const char *tagName; 4603 }; 4604 4605 // The idea with the tests below is this: 4606 // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences 4607 // go to isNever and are hence not a concern. 4608 // 4609 // We start with a character that is a valid name character 4610 // (or even name-start character, see XML 1.0r4 spec) and then we flip 4611 // single bits at places where (1) the result leaves the UTF-8 encoding space 4612 // and (2) we stay in the same n-byte sequence family. 4613 // 4614 // The flipped bits are highlighted in angle brackets in comments, 4615 // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped 4616 // the most significant bit to 1 to leave UTF-8 encoding space. 4617 struct test_case cases[] = { 4618 // 1-byte UTF-8: [0xxx xxxx] 4619 {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':' 4620 {false, false, "\xBA"}, // [<1>011 1010] 4621 {true, false, "\x39"}, // [0011 1001] = ASCII nine '9' 4622 {false, false, "\xB9"}, // [<1>011 1001] 4623 4624 // 2-byte UTF-8: [110x xxxx] [10xx xxxx] 4625 {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] = 4626 // Arabic small waw U+06E5 4627 {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101] 4628 {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101] 4629 {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101] 4630 {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] = 4631 // combining char U+0301 4632 {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001] 4633 {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001] 4634 {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001] 4635 4636 // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx] 4637 {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] = 4638 // Devanagari Letter A U+0905 4639 {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101] 4640 {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101] 4641 {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101] 4642 {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101] 4643 {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101] 4644 {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] = 4645 // combining char U+0901 4646 {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001] 4647 {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001] 4648 {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001] 4649 {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001] 4650 {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001] 4651 }; 4652 const bool atNameStart[] = {true, false}; 4653 4654 size_t i = 0; 4655 char doc[1024]; 4656 size_t failCount = 0; 4657 4658 // we need all the bytes to be parsed, but we don't want the errors that can 4659 // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on. 4660 if (g_reparseDeferralEnabledDefault) { 4661 return; 4662 } 4663 4664 for (; i < sizeof(cases) / sizeof(cases[0]); i++) { 4665 size_t j = 0; 4666 for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { 4667 const bool expectedSuccess 4668 = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName; 4669 snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a", 4670 cases[i].tagName); 4671 XML_Parser parser = XML_ParserCreate(NULL); 4672 4673 const enum XML_Status status = _XML_Parse_SINGLE_BYTES( 4674 parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE); 4675 4676 bool success = true; 4677 if ((status == XML_STATUS_OK) != expectedSuccess) { 4678 success = false; 4679 } 4680 if ((status == XML_STATUS_ERROR) 4681 && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) { 4682 success = false; 4683 } 4684 4685 if (! success) { 4686 fprintf( 4687 stderr, 4688 "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n", 4689 (unsigned)i + 1u, atNameStart[j] ? " " : "not ", 4690 (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser)); 4691 failCount++; 4692 } 4693 4694 XML_ParserFree(parser); 4695 } 4696 } 4697 4698 if (failCount > 0) { 4699 fail("UTF-8 regression detected"); 4700 } 4701 } 4702 END_TEST 4703 4704 /* Test trailing spaces in elements are accepted */ 4705 START_TEST(test_trailing_spaces_in_elements) { 4706 const char *text = "<doc >Hi</doc >"; 4707 const XML_Char *expected = XCS("doc/doc"); 4708 CharData storage; 4709 4710 CharData_Init(&storage); 4711 XML_SetElementHandler(g_parser, record_element_start_handler, 4712 record_element_end_handler); 4713 XML_SetUserData(g_parser, &storage); 4714 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4715 == XML_STATUS_ERROR) 4716 xml_failure(g_parser); 4717 CharData_CheckXMLChars(&storage, expected); 4718 } 4719 END_TEST 4720 4721 START_TEST(test_utf16_attribute) { 4722 const char text[] = 4723 /* <d {KHO KHWAI}{CHO CHAN}='a'/> 4724 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 4725 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 4726 */ 4727 "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0"; 4728 const XML_Char *expected = XCS("a"); 4729 CharData storage; 4730 4731 CharData_Init(&storage); 4732 XML_SetStartElementHandler(g_parser, accumulate_attribute); 4733 XML_SetUserData(g_parser, &storage); 4734 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4735 == XML_STATUS_ERROR) 4736 xml_failure(g_parser); 4737 CharData_CheckXMLChars(&storage, expected); 4738 } 4739 END_TEST 4740 4741 START_TEST(test_utf16_second_attr) { 4742 /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/> 4743 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 4744 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 4745 */ 4746 const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0" 4747 "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0"; 4748 const XML_Char *expected = XCS("1"); 4749 CharData storage; 4750 4751 CharData_Init(&storage); 4752 XML_SetStartElementHandler(g_parser, accumulate_attribute); 4753 XML_SetUserData(g_parser, &storage); 4754 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4755 == XML_STATUS_ERROR) 4756 xml_failure(g_parser); 4757 CharData_CheckXMLChars(&storage, expected); 4758 } 4759 END_TEST 4760 4761 START_TEST(test_attr_after_solidus) { 4762 const char *text = "<doc attr1='a' / attr2='b'>"; 4763 4764 expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted"); 4765 } 4766 END_TEST 4767 4768 START_TEST(test_utf16_pe) { 4769 /* <!DOCTYPE doc [ 4770 * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'> 4771 * %{KHO KHWAI}{CHO CHAN}; 4772 * ]> 4773 * <doc></doc> 4774 * 4775 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 4776 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 4777 */ 4778 const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" 4779 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 " 4780 "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 " 4781 "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n" 4782 "\0%\x0e\x04\x0e\x08\0;\0\n" 4783 "\0]\0>\0\n" 4784 "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>"; 4785 #ifdef XML_UNICODE 4786 const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n"); 4787 #else 4788 const XML_Char *expected 4789 = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n"); 4790 #endif 4791 CharData storage; 4792 4793 CharData_Init(&storage); 4794 XML_SetUserData(g_parser, &storage); 4795 XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl); 4796 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4797 == XML_STATUS_ERROR) 4798 xml_failure(g_parser); 4799 CharData_CheckXMLChars(&storage, expected); 4800 } 4801 END_TEST 4802 4803 /* Test that duff attribute description keywords are rejected */ 4804 START_TEST(test_bad_attr_desc_keyword) { 4805 const char *text = "<!DOCTYPE doc [\n" 4806 " <!ATTLIST doc attr CDATA #!IMPLIED>\n" 4807 "]>\n" 4808 "<doc />"; 4809 4810 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4811 "Bad keyword !IMPLIED not faulted"); 4812 } 4813 END_TEST 4814 4815 /* Test that an invalid attribute description keyword consisting of 4816 * UTF-16 characters with their top bytes non-zero are correctly 4817 * faulted 4818 */ 4819 START_TEST(test_bad_attr_desc_keyword_utf16) { 4820 /* <!DOCTYPE d [ 4821 * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}> 4822 * ]><d/> 4823 * 4824 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 4825 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 4826 */ 4827 const char text[] 4828 = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" 4829 "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 " 4830 "\0#\x0e\x04\x0e\x08\0>\0\n" 4831 "\0]\0>\0<\0d\0/\0>"; 4832 4833 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4834 != XML_STATUS_ERROR) 4835 fail("Invalid UTF16 attribute keyword not faulted"); 4836 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX) 4837 xml_failure(g_parser); 4838 } 4839 END_TEST 4840 4841 /* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this 4842 * using prefix-encoding (see above) to trigger specific code paths 4843 */ 4844 START_TEST(test_bad_doctype) { 4845 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4846 "<!DOCTYPE doc [ \x80\x44 ]><doc/>"; 4847 4848 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4849 expect_failure(text, XML_ERROR_SYNTAX, 4850 "Invalid bytes in DOCTYPE not faulted"); 4851 } 4852 END_TEST 4853 4854 START_TEST(test_bad_doctype_utf8) { 4855 const char *text = "<!DOCTYPE \xDB\x25" 4856 "doc><doc/>"; // [1101 1011] [<0>010 0101] 4857 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4858 "Invalid UTF-8 in DOCTYPE not faulted"); 4859 } 4860 END_TEST 4861 4862 START_TEST(test_bad_doctype_utf16) { 4863 const char text[] = 4864 /* <!DOCTYPE doc [ \x06f2 ]><doc/> 4865 * 4866 * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number 4867 * (name character) but not a valid letter (name start character) 4868 */ 4869 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 " 4870 "\x06\xf2" 4871 "\0 \0]\0>\0<\0d\0o\0c\0/\0>"; 4872 4873 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4874 != XML_STATUS_ERROR) 4875 fail("Invalid bytes in DOCTYPE not faulted"); 4876 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX) 4877 xml_failure(g_parser); 4878 } 4879 END_TEST 4880 4881 START_TEST(test_bad_doctype_plus) { 4882 const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n" 4883 "<1+>&foo;</1+>"; 4884 4885 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4886 "'+' in document name not faulted"); 4887 } 4888 END_TEST 4889 4890 START_TEST(test_bad_doctype_star) { 4891 const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n" 4892 "<1*>&foo;</1*>"; 4893 4894 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4895 "'*' in document name not faulted"); 4896 } 4897 END_TEST 4898 4899 START_TEST(test_bad_doctype_query) { 4900 const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n" 4901 "<1?>&foo;</1?>"; 4902 4903 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4904 "'?' in document name not faulted"); 4905 } 4906 END_TEST 4907 4908 START_TEST(test_unknown_encoding_bad_ignore) { 4909 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>" 4910 "<!DOCTYPE doc SYSTEM 'foo'>" 4911 "<doc><e>&entity;</e></doc>"; 4912 ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>", 4913 "Invalid character not faulted", XCS("prefix-conv"), 4914 XML_ERROR_INVALID_TOKEN}; 4915 4916 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4917 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4918 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 4919 XML_SetUserData(g_parser, &fault); 4920 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 4921 "Bad IGNORE section with unknown encoding not failed"); 4922 } 4923 END_TEST 4924 4925 START_TEST(test_entity_in_utf16_be_attr) { 4926 const char text[] = 4927 /* <e a='ä ä'></e> */ 4928 "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 " 4929 "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>"; 4930 #ifdef XML_UNICODE 4931 const XML_Char *expected = XCS("\x00e4 \x00e4"); 4932 #else 4933 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4"); 4934 #endif 4935 CharData storage; 4936 4937 CharData_Init(&storage); 4938 XML_SetUserData(g_parser, &storage); 4939 XML_SetStartElementHandler(g_parser, accumulate_attribute); 4940 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4941 == XML_STATUS_ERROR) 4942 xml_failure(g_parser); 4943 CharData_CheckXMLChars(&storage, expected); 4944 } 4945 END_TEST 4946 4947 START_TEST(test_entity_in_utf16_le_attr) { 4948 const char text[] = 4949 /* <e a='ä ä'></e> */ 4950 "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0" 4951 "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0"; 4952 #ifdef XML_UNICODE 4953 const XML_Char *expected = XCS("\x00e4 \x00e4"); 4954 #else 4955 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4"); 4956 #endif 4957 CharData storage; 4958 4959 CharData_Init(&storage); 4960 XML_SetUserData(g_parser, &storage); 4961 XML_SetStartElementHandler(g_parser, accumulate_attribute); 4962 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4963 == XML_STATUS_ERROR) 4964 xml_failure(g_parser); 4965 CharData_CheckXMLChars(&storage, expected); 4966 } 4967 END_TEST 4968 4969 START_TEST(test_entity_public_utf16_be) { 4970 const char text[] = 4971 /* <!DOCTYPE d [ */ 4972 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" 4973 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */ 4974 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 " 4975 "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n" 4976 /* %e; */ 4977 "\0%\0e\0;\0\n" 4978 /* ]> */ 4979 "\0]\0>\0\n" 4980 /* <d>&j;</d> */ 4981 "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>"; 4982 ExtTest2 test_data 4983 = {/* <!ENTITY j 'baz'> */ 4984 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL}; 4985 const XML_Char *expected = XCS("baz"); 4986 CharData storage; 4987 4988 CharData_Init(&storage); 4989 test_data.storage = &storage; 4990 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4991 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4992 XML_SetUserData(g_parser, &test_data); 4993 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4994 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4995 == XML_STATUS_ERROR) 4996 xml_failure(g_parser); 4997 CharData_CheckXMLChars(&storage, expected); 4998 } 4999 END_TEST 5000 5001 START_TEST(test_entity_public_utf16_le) { 5002 const char text[] = 5003 /* <!DOCTYPE d [ */ 5004 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0" 5005 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */ 5006 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0" 5007 "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0" 5008 /* %e; */ 5009 "%\0e\0;\0\n\0" 5010 /* ]> */ 5011 "]\0>\0\n\0" 5012 /* <d>&j;</d> */ 5013 "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0"; 5014 ExtTest2 test_data 5015 = {/* <!ENTITY j 'baz'> */ 5016 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL}; 5017 const XML_Char *expected = XCS("baz"); 5018 CharData storage; 5019 5020 CharData_Init(&storage); 5021 test_data.storage = &storage; 5022 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5023 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5024 XML_SetUserData(g_parser, &test_data); 5025 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5026 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5027 == XML_STATUS_ERROR) 5028 xml_failure(g_parser); 5029 CharData_CheckXMLChars(&storage, expected); 5030 } 5031 END_TEST 5032 5033 /* Test that a doctype with neither an internal nor external subset is 5034 * faulted 5035 */ 5036 START_TEST(test_short_doctype) { 5037 const char *text = "<!DOCTYPE doc></doc>"; 5038 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5039 "DOCTYPE without subset not rejected"); 5040 } 5041 END_TEST 5042 5043 START_TEST(test_short_doctype_2) { 5044 const char *text = "<!DOCTYPE doc PUBLIC></doc>"; 5045 expect_failure(text, XML_ERROR_SYNTAX, 5046 "DOCTYPE without Public ID not rejected"); 5047 } 5048 END_TEST 5049 5050 START_TEST(test_short_doctype_3) { 5051 const char *text = "<!DOCTYPE doc SYSTEM></doc>"; 5052 expect_failure(text, XML_ERROR_SYNTAX, 5053 "DOCTYPE without System ID not rejected"); 5054 } 5055 END_TEST 5056 5057 START_TEST(test_long_doctype) { 5058 const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>"; 5059 expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected"); 5060 } 5061 END_TEST 5062 5063 START_TEST(test_bad_entity) { 5064 const char *text = "<!DOCTYPE doc [\n" 5065 " <!ENTITY foo PUBLIC>\n" 5066 "]>\n" 5067 "<doc/>"; 5068 expect_failure(text, XML_ERROR_SYNTAX, 5069 "ENTITY without Public ID is not rejected"); 5070 } 5071 END_TEST 5072 5073 /* Test unquoted value is faulted */ 5074 START_TEST(test_bad_entity_2) { 5075 const char *text = "<!DOCTYPE doc [\n" 5076 " <!ENTITY % foo bar>\n" 5077 "]>\n" 5078 "<doc/>"; 5079 expect_failure(text, XML_ERROR_SYNTAX, 5080 "ENTITY without Public ID is not rejected"); 5081 } 5082 END_TEST 5083 5084 START_TEST(test_bad_entity_3) { 5085 const char *text = "<!DOCTYPE doc [\n" 5086 " <!ENTITY % foo PUBLIC>\n" 5087 "]>\n" 5088 "<doc/>"; 5089 expect_failure(text, XML_ERROR_SYNTAX, 5090 "Parameter ENTITY without Public ID is not rejected"); 5091 } 5092 END_TEST 5093 5094 START_TEST(test_bad_entity_4) { 5095 const char *text = "<!DOCTYPE doc [\n" 5096 " <!ENTITY % foo SYSTEM>\n" 5097 "]>\n" 5098 "<doc/>"; 5099 expect_failure(text, XML_ERROR_SYNTAX, 5100 "Parameter ENTITY without Public ID is not rejected"); 5101 } 5102 END_TEST 5103 5104 START_TEST(test_bad_notation) { 5105 const char *text = "<!DOCTYPE doc [\n" 5106 " <!NOTATION n SYSTEM>\n" 5107 "]>\n" 5108 "<doc/>"; 5109 expect_failure(text, XML_ERROR_SYNTAX, 5110 "Notation without System ID is not rejected"); 5111 } 5112 END_TEST 5113 5114 /* Test for issue #11, wrongly suppressed default handler */ 5115 START_TEST(test_default_doctype_handler) { 5116 const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n" 5117 " <!ENTITY foo 'bar'>\n" 5118 "]>\n" 5119 "<doc>&foo;</doc>"; 5120 DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE}, 5121 {XCS("'test.dtd'"), 10, XML_FALSE}, 5122 {NULL, 0, XML_FALSE}}; 5123 int i; 5124 5125 XML_SetUserData(g_parser, &test_data); 5126 XML_SetDefaultHandler(g_parser, checking_default_handler); 5127 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 5128 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5129 == XML_STATUS_ERROR) 5130 xml_failure(g_parser); 5131 for (i = 0; test_data[i].expected != NULL; i++) 5132 if (! test_data[i].seen) 5133 fail("Default handler not run for public !DOCTYPE"); 5134 } 5135 END_TEST 5136 5137 START_TEST(test_empty_element_abort) { 5138 const char *text = "<abort/>"; 5139 5140 XML_SetStartElementHandler(g_parser, start_element_suspender); 5141 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5142 != XML_STATUS_ERROR) 5143 fail("Expected to error on abort"); 5144 } 5145 END_TEST 5146 5147 /* Regression test for GH issue #612: unfinished m_declAttributeType 5148 * allocation in ->m_tempPool can corrupt following allocation. 5149 */ 5150 START_TEST(test_pool_integrity_with_unfinished_attr) { 5151 const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n" 5152 "<!DOCTYPE foo [\n" 5153 "<!ELEMENT foo ANY>\n" 5154 "<!ENTITY % entp SYSTEM \"external.dtd\">\n" 5155 "%entp;\n" 5156 "]>\n" 5157 "<a></a>\n"; 5158 const XML_Char *expected = XCS("COMMENT"); 5159 CharData storage; 5160 5161 CharData_Init(&storage); 5162 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5163 XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist); 5164 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 5165 XML_SetCommentHandler(g_parser, accumulate_comment); 5166 XML_SetUserData(g_parser, &storage); 5167 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5168 == XML_STATUS_ERROR) 5169 xml_failure(g_parser); 5170 CharData_CheckXMLChars(&storage, expected); 5171 } 5172 END_TEST 5173 5174 START_TEST(test_nested_entity_suspend) { 5175 const char *const text = "<!DOCTYPE a [\n" 5176 " <!ENTITY e1 '<!--e1-->'>\n" 5177 " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n" 5178 " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n" 5179 "]>\n" 5180 "<a><!--start-->&e3;<!--end--></a>"; 5181 const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head") 5182 XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end"); 5183 CharData storage; 5184 CharData_Init(&storage); 5185 XML_Parser parser = XML_ParserCreate(NULL); 5186 ParserPlusStorage parserPlusStorage = {parser, &storage}; 5187 5188 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5189 XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler); 5190 XML_SetUserData(parser, &parserPlusStorage); 5191 5192 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE); 5193 while (status == XML_STATUS_SUSPENDED) { 5194 status = XML_ResumeParser(parser); 5195 } 5196 if (status != XML_STATUS_OK) 5197 xml_failure(parser); 5198 5199 CharData_CheckXMLChars(&storage, expected); 5200 XML_ParserFree(parser); 5201 } 5202 END_TEST 5203 5204 /* Regression test for quadratic parsing on large tokens */ 5205 START_TEST(test_big_tokens_take_linear_time) { 5206 const char *const too_slow_failure_message 5207 = "Compared to the baseline runtime of the first test, this test has a " 5208 "slowdown of more than <max_slowdown>. " 5209 "Please keep increasing the value by 1 until it reliably passes the " 5210 "test on your hardware and open a bug sharing that number with us. " 5211 "Thanks in advance!"; 5212 const struct { 5213 const char *pre; 5214 const char *post; 5215 } text[] = { 5216 {"<a>", "</a>"}, // assumed good, used as baseline 5217 {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch 5218 {"<c attr='", "'></c>"}, // big attribute, used to be O(N²) 5219 {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²) 5220 {"<e><", "/></e>"}, // big elem name, used to be O(N²) 5221 }; 5222 const int num_cases = sizeof(text) / sizeof(text[0]); 5223 // For the test we need a <max_slowdown> value that is: 5224 // (1) big enough that the test passes reliably (avoiding flaky tests), and 5225 // (2) small enough that the test actually catches regressions. 5226 const int max_slowdown = 15; 5227 char aaaaaa[4096]; 5228 const int fillsize = (int)sizeof(aaaaaa); 5229 const int fillcount = 100; 5230 5231 memset(aaaaaa, 'a', fillsize); 5232 5233 if (! g_reparseDeferralEnabledDefault) { 5234 return; // heuristic is disabled; we would get O(n^2) and fail. 5235 } 5236 #if ! defined(__linux__) 5237 if (CLOCKS_PER_SEC < 100000) { 5238 // Skip this test if clock() doesn't have reasonably good resolution. 5239 // This workaround is primarily targeting Windows and FreeBSD, since 5240 // XSI requires the value to be 1.000.000 (10x the condition here), and 5241 // we want to be very sure that at least one platform in CI can catch 5242 // regressions (through a failing test). 5243 return; 5244 } 5245 #endif 5246 5247 clock_t baseline = 0; 5248 for (int i = 0; i < num_cases; ++i) { 5249 XML_Parser parser = XML_ParserCreate(NULL); 5250 assert_true(parser != NULL); 5251 enum XML_Status status; 5252 set_subtest("max_slowdown=%d text=\"%saaaaaa%s\"", max_slowdown, 5253 text[i].pre, text[i].post); 5254 const clock_t start = clock(); 5255 5256 // parse the start text 5257 status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre, 5258 (int)strlen(text[i].pre), XML_FALSE); 5259 if (status != XML_STATUS_OK) { 5260 xml_failure(parser); 5261 } 5262 // parse lots of 'a', failing the test early if it takes too long 5263 for (int f = 0; f < fillcount; ++f) { 5264 status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE); 5265 if (status != XML_STATUS_OK) { 5266 xml_failure(parser); 5267 } 5268 // i == 0 means we're still calculating the baseline value 5269 if (i > 0) { 5270 const clock_t now = clock(); 5271 const clock_t clocks_so_far = now - start; 5272 const int slowdown = clocks_so_far / baseline; 5273 if (slowdown >= max_slowdown) { 5274 fprintf( 5275 stderr, 5276 "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n", 5277 f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown); 5278 fail(too_slow_failure_message); 5279 } 5280 } 5281 } 5282 // parse the end text 5283 status = _XML_Parse_SINGLE_BYTES(parser, text[i].post, 5284 (int)strlen(text[i].post), XML_TRUE); 5285 if (status != XML_STATUS_OK) { 5286 xml_failure(parser); 5287 } 5288 5289 // how long did it take in total? 5290 const clock_t end = clock(); 5291 const clock_t taken = end - start; 5292 if (i == 0) { 5293 assert_true(taken > 0); // just to make sure we don't div-by-0 later 5294 baseline = taken; 5295 } 5296 const int slowdown = taken / baseline; 5297 if (slowdown >= max_slowdown) { 5298 fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n", 5299 (int)taken, (int)baseline, slowdown, max_slowdown); 5300 fail(too_slow_failure_message); 5301 } 5302 5303 XML_ParserFree(parser); 5304 } 5305 } 5306 END_TEST 5307 5308 START_TEST(test_set_reparse_deferral) { 5309 const char *const pre = "<d>"; 5310 const char *const start = "<x attr='"; 5311 const char *const end = "'></x>"; 5312 char eeeeee[100]; 5313 const int fillsize = (int)sizeof(eeeeee); 5314 memset(eeeeee, 'e', fillsize); 5315 5316 for (int enabled = 0; enabled <= 1; enabled += 1) { 5317 set_subtest("deferral=%d", enabled); 5318 5319 XML_Parser parser = XML_ParserCreate(NULL); 5320 assert_true(parser != NULL); 5321 assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); 5322 // pre-grow the buffer to avoid reparsing due to almost-fullness 5323 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); 5324 5325 CharData storage; 5326 CharData_Init(&storage); 5327 XML_SetUserData(parser, &storage); 5328 XML_SetStartElementHandler(parser, start_element_event_handler); 5329 5330 enum XML_Status status; 5331 // parse the start text 5332 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 5333 if (status != XML_STATUS_OK) { 5334 xml_failure(parser); 5335 } 5336 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done 5337 5338 // ..and the start of the token 5339 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); 5340 if (status != XML_STATUS_OK) { 5341 xml_failure(parser); 5342 } 5343 CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one 5344 5345 // try to parse lots of 'e', but the token isn't finished 5346 for (int c = 0; c < 100; ++c) { 5347 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 5348 if (status != XML_STATUS_OK) { 5349 xml_failure(parser); 5350 } 5351 } 5352 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one 5353 5354 // end the <x> token. 5355 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 5356 if (status != XML_STATUS_OK) { 5357 xml_failure(parser); 5358 } 5359 5360 if (enabled) { 5361 // In general, we may need to push more data to trigger a reparse attempt, 5362 // but in this test, the data is constructed to always require it. 5363 CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect 5364 // 2x the token length should suffice; the +1 covers the start and end. 5365 for (int c = 0; c < 101; ++c) { 5366 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 5367 if (status != XML_STATUS_OK) { 5368 xml_failure(parser); 5369 } 5370 } 5371 } 5372 CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done 5373 5374 XML_ParserFree(parser); 5375 } 5376 } 5377 END_TEST 5378 5379 struct element_decl_data { 5380 XML_Parser parser; 5381 int count; 5382 }; 5383 5384 static void 5385 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) { 5386 UNUSED_P(name); 5387 struct element_decl_data *testdata = (struct element_decl_data *)userData; 5388 testdata->count += 1; 5389 XML_FreeContentModel(testdata->parser, model); 5390 } 5391 5392 static int 5393 external_inherited_parser(XML_Parser p, const XML_Char *context, 5394 const XML_Char *base, const XML_Char *systemId, 5395 const XML_Char *publicId) { 5396 UNUSED_P(base); 5397 UNUSED_P(systemId); 5398 UNUSED_P(publicId); 5399 const char *const pre = "<!ELEMENT document ANY>\n"; 5400 const char *const start = "<!ELEMENT "; 5401 const char *const end = " ANY>\n"; 5402 const char *const post = "<!ELEMENT xyz ANY>\n"; 5403 const int enabled = *(int *)XML_GetUserData(p); 5404 char eeeeee[100]; 5405 char spaces[100]; 5406 const int fillsize = (int)sizeof(eeeeee); 5407 assert_true(fillsize == (int)sizeof(spaces)); 5408 memset(eeeeee, 'e', fillsize); 5409 memset(spaces, ' ', fillsize); 5410 5411 XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL); 5412 assert_true(parser != NULL); 5413 // pre-grow the buffer to avoid reparsing due to almost-fullness 5414 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); 5415 5416 struct element_decl_data testdata; 5417 testdata.parser = parser; 5418 testdata.count = 0; 5419 XML_SetUserData(parser, &testdata); 5420 XML_SetElementDeclHandler(parser, element_decl_counter); 5421 5422 enum XML_Status status; 5423 // parse the initial text 5424 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 5425 if (status != XML_STATUS_OK) { 5426 xml_failure(parser); 5427 } 5428 assert_true(testdata.count == 1); // first element should be done 5429 5430 // ..and the start of the big token 5431 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); 5432 if (status != XML_STATUS_OK) { 5433 xml_failure(parser); 5434 } 5435 assert_true(testdata.count == 1); // still just the first one 5436 5437 // try to parse lots of 'e', but the token isn't finished 5438 for (int c = 0; c < 100; ++c) { 5439 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 5440 if (status != XML_STATUS_OK) { 5441 xml_failure(parser); 5442 } 5443 } 5444 assert_true(testdata.count == 1); // *still* just the first one 5445 5446 // end the big token. 5447 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 5448 if (status != XML_STATUS_OK) { 5449 xml_failure(parser); 5450 } 5451 5452 if (enabled) { 5453 // In general, we may need to push more data to trigger a reparse attempt, 5454 // but in this test, the data is constructed to always require it. 5455 assert_true(testdata.count == 1); // or the test is incorrect 5456 // 2x the token length should suffice; the +1 covers the start and end. 5457 for (int c = 0; c < 101; ++c) { 5458 status = XML_Parse(parser, spaces, fillsize, XML_FALSE); 5459 if (status != XML_STATUS_OK) { 5460 xml_failure(parser); 5461 } 5462 } 5463 } 5464 assert_true(testdata.count == 2); // the big token should be done 5465 5466 // parse the final text 5467 status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE); 5468 if (status != XML_STATUS_OK) { 5469 xml_failure(parser); 5470 } 5471 assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done 5472 5473 XML_ParserFree(parser); 5474 return XML_STATUS_OK; 5475 } 5476 5477 START_TEST(test_reparse_deferral_is_inherited) { 5478 const char *const text 5479 = "<!DOCTYPE document SYSTEM 'something.ext'><document/>"; 5480 for (int enabled = 0; enabled <= 1; ++enabled) { 5481 set_subtest("deferral=%d", enabled); 5482 5483 XML_Parser parser = XML_ParserCreate(NULL); 5484 assert_true(parser != NULL); 5485 XML_SetUserData(parser, (void *)&enabled); 5486 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5487 // this handler creates a sub-parser and checks that its deferral behavior 5488 // is what we expected, based on the value of `enabled` (in userdata). 5489 XML_SetExternalEntityRefHandler(parser, external_inherited_parser); 5490 assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); 5491 if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) 5492 xml_failure(parser); 5493 5494 XML_ParserFree(parser); 5495 } 5496 } 5497 END_TEST 5498 5499 START_TEST(test_set_reparse_deferral_on_null_parser) { 5500 assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE); 5501 assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE); 5502 assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE); 5503 assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE); 5504 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN) 5505 == XML_FALSE); 5506 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX) 5507 == XML_FALSE); 5508 } 5509 END_TEST 5510 5511 START_TEST(test_set_reparse_deferral_on_the_fly) { 5512 const char *const pre = "<d><x attr='"; 5513 const char *const end = "'></x>"; 5514 char iiiiii[100]; 5515 const int fillsize = (int)sizeof(iiiiii); 5516 memset(iiiiii, 'i', fillsize); 5517 5518 XML_Parser parser = XML_ParserCreate(NULL); 5519 assert_true(parser != NULL); 5520 assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE)); 5521 5522 CharData storage; 5523 CharData_Init(&storage); 5524 XML_SetUserData(parser, &storage); 5525 XML_SetStartElementHandler(parser, start_element_event_handler); 5526 5527 enum XML_Status status; 5528 // parse the start text 5529 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 5530 if (status != XML_STATUS_OK) { 5531 xml_failure(parser); 5532 } 5533 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done 5534 5535 // try to parse some 'i', but the token isn't finished 5536 status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE); 5537 if (status != XML_STATUS_OK) { 5538 xml_failure(parser); 5539 } 5540 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one 5541 5542 // end the <x> token. 5543 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 5544 if (status != XML_STATUS_OK) { 5545 xml_failure(parser); 5546 } 5547 CharData_CheckXMLChars(&storage, XCS("d")); // not yet. 5548 5549 // now change the heuristic setting and add *no* data 5550 assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE)); 5551 // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic. 5552 status = XML_Parse(parser, "", 0, XML_FALSE); 5553 if (status != XML_STATUS_OK) { 5554 xml_failure(parser); 5555 } 5556 CharData_CheckXMLChars(&storage, XCS("dx")); 5557 5558 XML_ParserFree(parser); 5559 } 5560 END_TEST 5561 5562 START_TEST(test_set_bad_reparse_option) { 5563 XML_Parser parser = XML_ParserCreate(NULL); 5564 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2)); 5565 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3)); 5566 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99)); 5567 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127)); 5568 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128)); 5569 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129)); 5570 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255)); 5571 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0)); 5572 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1)); 5573 XML_ParserFree(parser); 5574 } 5575 END_TEST 5576 5577 static size_t g_totalAlloc = 0; 5578 static size_t g_biggestAlloc = 0; 5579 5580 static void * 5581 counting_realloc(void *ptr, size_t size) { 5582 g_totalAlloc += size; 5583 if (size > g_biggestAlloc) { 5584 g_biggestAlloc = size; 5585 } 5586 return realloc(ptr, size); 5587 } 5588 5589 static void * 5590 counting_malloc(size_t size) { 5591 return counting_realloc(NULL, size); 5592 } 5593 5594 START_TEST(test_bypass_heuristic_when_close_to_bufsize) { 5595 if (g_chunkSize != 0) { 5596 // this test does not use SINGLE_BYTES, because it depends on very precise 5597 // buffer fills. 5598 return; 5599 } 5600 if (! g_reparseDeferralEnabledDefault) { 5601 return; // this test is irrelevant when the deferral heuristic is disabled. 5602 } 5603 5604 const int document_length = 65536; 5605 char *const document = (char *)malloc(document_length); 5606 5607 const XML_Memory_Handling_Suite memfuncs = { 5608 counting_malloc, 5609 counting_realloc, 5610 free, 5611 }; 5612 5613 const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1}; 5614 const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1}; 5615 const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1}; 5616 5617 for (const int *leading = leading_list; *leading >= 0; leading++) { 5618 for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) { 5619 for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) { 5620 set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken, 5621 *fillsize); 5622 // start by checking that the test looks reasonably valid 5623 assert_true(*leading + *bigtoken <= document_length); 5624 5625 // put 'x' everywhere; some will be overwritten by elements. 5626 memset(document, 'x', document_length); 5627 // maybe add an initial tag 5628 if (*leading) { 5629 assert_true(*leading >= 3); // or the test case is invalid 5630 memcpy(document, "<a>", 3); 5631 } 5632 // add the large token 5633 document[*leading + 0] = '<'; 5634 document[*leading + 1] = 'b'; 5635 memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token 5636 document[*leading + *bigtoken - 1] = '>'; 5637 5638 // 1 for 'b', plus 1 or 0 depending on the presence of 'a' 5639 const int expected_elem_total = 1 + (*leading ? 1 : 0); 5640 5641 XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL); 5642 assert_true(parser != NULL); 5643 5644 CharData storage; 5645 CharData_Init(&storage); 5646 XML_SetUserData(parser, &storage); 5647 XML_SetStartElementHandler(parser, start_element_event_handler); 5648 5649 g_biggestAlloc = 0; 5650 g_totalAlloc = 0; 5651 int offset = 0; 5652 // fill data until the big token is covered (but not necessarily parsed) 5653 while (offset < *leading + *bigtoken) { 5654 assert_true(offset + *fillsize <= document_length); 5655 const enum XML_Status status 5656 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 5657 if (status != XML_STATUS_OK) { 5658 xml_failure(parser); 5659 } 5660 offset += *fillsize; 5661 } 5662 // Now, check that we've had a buffer allocation that could fit the 5663 // context bytes and our big token. In order to detect a special case, 5664 // we need to know how many bytes of our big token were included in the 5665 // first push that contained _any_ bytes of the big token: 5666 const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize); 5667 if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) { 5668 // Special case: we aren't saving any context, and the whole big token 5669 // was covered by a single fill, so Expat may have parsed directly 5670 // from our input pointer, without allocating an internal buffer. 5671 } else if (*leading < XML_CONTEXT_BYTES) { 5672 assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken); 5673 } else { 5674 assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken); 5675 } 5676 // fill data until the big token is actually parsed 5677 while (storage.count < expected_elem_total) { 5678 const size_t alloc_before = g_totalAlloc; 5679 assert_true(offset + *fillsize <= document_length); 5680 const enum XML_Status status 5681 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 5682 if (status != XML_STATUS_OK) { 5683 xml_failure(parser); 5684 } 5685 offset += *fillsize; 5686 // since all the bytes of the big token are already in the buffer, 5687 // the bufsize ceiling should make us finish its parsing without any 5688 // further buffer allocations. We assume that there will be no other 5689 // large allocations in this test. 5690 assert_true(g_totalAlloc - alloc_before < 4096); 5691 } 5692 // test-the-test: was our alloc even called? 5693 assert_true(g_totalAlloc > 0); 5694 // test-the-test: there shouldn't be any extra start elements 5695 assert_true(storage.count == expected_elem_total); 5696 5697 XML_ParserFree(parser); 5698 } 5699 } 5700 } 5701 free(document); 5702 } 5703 END_TEST 5704 5705 START_TEST(test_varying_buffer_fills) { 5706 const int KiB = 1024; 5707 const int MiB = 1024 * KiB; 5708 const int document_length = 16 * MiB; 5709 const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB 5710 5711 if (g_chunkSize != 0) { 5712 return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES(). 5713 } 5714 5715 char *const document = (char *)malloc(document_length); 5716 assert_true(document != NULL); 5717 memset(document, 'x', document_length); 5718 document[0] = '<'; 5719 document[1] = 't'; 5720 memset(&document[2], ' ', big - 2); // a very spacy token 5721 document[big - 1] = '>'; 5722 5723 // Each testcase is a list of buffer fill sizes, terminated by a value < 0. 5724 // When reparse deferral is enabled, the final (negated) value is the expected 5725 // maximum number of bytes scanned in parse attempts. 5726 const int testcases[][30] = { 5727 {8 * MiB, -8 * MiB}, 5728 {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total 5729 // zero-size fills shouldn't trigger the bypass 5730 {4 * MiB, 0, 4 * MiB, -12 * MiB}, 5731 {4 * MiB, 0, 0, 4 * MiB, -12 * MiB}, 5732 {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB}, 5733 // try to hit the buffer ceiling only once (at the end) 5734 {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB}, 5735 // try to hit the same buffer ceiling multiple times 5736 {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB}, 5737 5738 // try to hit every ceiling, by always landing 1K shy of the buffer size 5739 {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB, 5740 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB}, 5741 5742 // try to avoid every ceiling, by always landing 1B past the buffer size 5743 // the normal 2x heuristic threshold still forces parse attempts. 5744 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 5745 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 5746 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 5747 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 5748 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 5749 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 5750 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7 5751 -(10 * MiB + 682 * KiB + 7)}, 5752 // try to avoid every ceiling again, except on our last fill. 5753 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 5754 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 5755 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 5756 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 5757 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 5758 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 5759 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6 5760 -(10 * MiB + 682 * KiB + 6)}, 5761 5762 // try to hit ceilings on the way multiple times 5763 {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer 5764 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer 5765 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer 5766 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer 5767 // we'll make a parse attempt at every parse call 5768 -(45 * MiB + 12)}, 5769 }; 5770 const int testcount = sizeof(testcases) / sizeof(testcases[0]); 5771 for (int test_i = 0; test_i < testcount; test_i++) { 5772 const int *fillsize = testcases[test_i]; 5773 set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1], 5774 fillsize[2], fillsize[3]); 5775 XML_Parser parser = XML_ParserCreate(NULL); 5776 assert_true(parser != NULL); 5777 g_parseAttempts = 0; 5778 5779 CharData storage; 5780 CharData_Init(&storage); 5781 XML_SetUserData(parser, &storage); 5782 XML_SetStartElementHandler(parser, start_element_event_handler); 5783 5784 int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call) 5785 int scanned_bytes = 0; // sum of (buffered bytes at each actual parse) 5786 int offset = 0; 5787 while (*fillsize >= 0) { 5788 assert_true(offset + *fillsize <= document_length); // or test is invalid 5789 const unsigned attempts_before = g_parseAttempts; 5790 const enum XML_Status status 5791 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 5792 if (status != XML_STATUS_OK) { 5793 xml_failure(parser); 5794 } 5795 offset += *fillsize; 5796 fillsize++; 5797 assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow 5798 worstcase_bytes += offset; // we might've tried to parse all pending bytes 5799 if (g_parseAttempts != attempts_before) { 5800 assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse 5801 assert_true(offset <= INT_MAX - scanned_bytes); // avoid overflow 5802 scanned_bytes += offset; // we *did* try to parse all pending bytes 5803 } 5804 } 5805 assert_true(storage.count == 1); // the big token should've been parsed 5806 assert_true(scanned_bytes > 0); // test-the-test: does our counter work? 5807 if (g_reparseDeferralEnabledDefault) { 5808 // heuristic is enabled; some XML_Parse calls may have deferred reparsing 5809 const int max_bytes_scanned = -*fillsize; 5810 if (scanned_bytes > max_bytes_scanned) { 5811 fprintf(stderr, 5812 "bytes scanned in parse attempts: actual=%d limit=%d \n", 5813 scanned_bytes, max_bytes_scanned); 5814 fail("too many bytes scanned in parse attempts"); 5815 } 5816 assert_true(scanned_bytes <= worstcase_bytes); 5817 } else { 5818 // heuristic is disabled; every XML_Parse() will have reparsed 5819 assert_true(scanned_bytes == worstcase_bytes); 5820 } 5821 5822 XML_ParserFree(parser); 5823 } 5824 free(document); 5825 } 5826 END_TEST 5827 5828 void 5829 make_basic_test_case(Suite *s) { 5830 TCase *tc_basic = tcase_create("basic tests"); 5831 5832 suite_add_tcase(s, tc_basic); 5833 tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown); 5834 5835 tcase_add_test(tc_basic, test_nul_byte); 5836 tcase_add_test(tc_basic, test_u0000_char); 5837 tcase_add_test(tc_basic, test_siphash_self); 5838 tcase_add_test(tc_basic, test_siphash_spec); 5839 tcase_add_test(tc_basic, test_bom_utf8); 5840 tcase_add_test(tc_basic, test_bom_utf16_be); 5841 tcase_add_test(tc_basic, test_bom_utf16_le); 5842 tcase_add_test(tc_basic, test_nobom_utf16_le); 5843 tcase_add_test(tc_basic, test_hash_collision); 5844 tcase_add_test(tc_basic, test_illegal_utf8); 5845 tcase_add_test(tc_basic, test_utf8_auto_align); 5846 tcase_add_test(tc_basic, test_utf16); 5847 tcase_add_test(tc_basic, test_utf16_le_epilog_newline); 5848 tcase_add_test(tc_basic, test_not_utf16); 5849 tcase_add_test(tc_basic, test_bad_encoding); 5850 tcase_add_test(tc_basic, test_latin1_umlauts); 5851 tcase_add_test(tc_basic, test_long_utf8_character); 5852 tcase_add_test(tc_basic, test_long_latin1_attribute); 5853 tcase_add_test(tc_basic, test_long_ascii_attribute); 5854 /* Regression test for SF bug #491986. */ 5855 tcase_add_test(tc_basic, test_danish_latin1); 5856 /* Regression test for SF bug #514281. */ 5857 tcase_add_test(tc_basic, test_french_charref_hexidecimal); 5858 tcase_add_test(tc_basic, test_french_charref_decimal); 5859 tcase_add_test(tc_basic, test_french_latin1); 5860 tcase_add_test(tc_basic, test_french_utf8); 5861 tcase_add_test(tc_basic, test_utf8_false_rejection); 5862 tcase_add_test(tc_basic, test_line_number_after_parse); 5863 tcase_add_test(tc_basic, test_column_number_after_parse); 5864 tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers); 5865 tcase_add_test(tc_basic, test_line_number_after_error); 5866 tcase_add_test(tc_basic, test_column_number_after_error); 5867 tcase_add_test(tc_basic, test_really_long_lines); 5868 tcase_add_test(tc_basic, test_really_long_encoded_lines); 5869 tcase_add_test(tc_basic, test_end_element_events); 5870 tcase_add_test(tc_basic, test_helper_is_whitespace_normalized); 5871 tcase_add_test(tc_basic, test_attr_whitespace_normalization); 5872 tcase_add_test(tc_basic, test_xmldecl_misplaced); 5873 tcase_add_test(tc_basic, test_xmldecl_invalid); 5874 tcase_add_test(tc_basic, test_xmldecl_missing_attr); 5875 tcase_add_test(tc_basic, test_xmldecl_missing_value); 5876 tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity); 5877 tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity); 5878 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding); 5879 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler); 5880 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom); 5881 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding); 5882 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2); 5883 tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset); 5884 tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset); 5885 tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone); 5886 tcase_add_test(tc_basic, 5887 test_wfc_undeclared_entity_with_external_subset_standalone); 5888 tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone); 5889 tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset); 5890 tcase_add_test(tc_basic, test_not_standalone_handler_reject); 5891 tcase_add_test(tc_basic, test_not_standalone_handler_accept); 5892 tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs); 5893 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse); 5894 tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling); 5895 tcase_add_test(tc_basic, test_dtd_attr_handling); 5896 tcase_add_test(tc_basic, test_empty_ns_without_namespaces); 5897 tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces); 5898 tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls); 5899 tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls); 5900 tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls); 5901 tcase_add_test(tc_basic, test_good_cdata_ascii); 5902 tcase_add_test(tc_basic, test_good_cdata_utf16); 5903 tcase_add_test(tc_basic, test_good_cdata_utf16_le); 5904 tcase_add_test(tc_basic, test_long_cdata_utf16); 5905 tcase_add_test(tc_basic, test_multichar_cdata_utf16); 5906 tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair); 5907 tcase_add_test(tc_basic, test_bad_cdata); 5908 tcase_add_test(tc_basic, test_bad_cdata_utf16); 5909 tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls); 5910 tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls); 5911 tcase_add_test(tc_basic, test_memory_allocation); 5912 tcase_add_test__if_xml_ge(tc_basic, test_default_current); 5913 tcase_add_test(tc_basic, test_dtd_elements); 5914 tcase_add_test(tc_basic, test_dtd_elements_nesting); 5915 tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd); 5916 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone); 5917 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd); 5918 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype); 5919 tcase_add_test__ifdef_xml_dtd(tc_basic, 5920 test_foreign_dtd_without_external_subset); 5921 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd); 5922 tcase_add_test(tc_basic, test_set_base); 5923 tcase_add_test(tc_basic, test_attributes); 5924 tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity); 5925 tcase_add_test(tc_basic, test_resume_invalid_parse); 5926 tcase_add_test(tc_basic, test_resume_resuspended); 5927 tcase_add_test(tc_basic, test_cdata_default); 5928 tcase_add_test(tc_basic, test_subordinate_reset); 5929 tcase_add_test(tc_basic, test_subordinate_suspend); 5930 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend); 5931 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort); 5932 tcase_add_test__ifdef_xml_dtd(tc_basic, 5933 test_ext_entity_invalid_suspended_parse); 5934 tcase_add_test(tc_basic, test_explicit_encoding); 5935 tcase_add_test(tc_basic, test_trailing_cr); 5936 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr); 5937 tcase_add_test(tc_basic, test_trailing_rsqb); 5938 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb); 5939 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata); 5940 tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters); 5941 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter); 5942 tcase_add_test(tc_basic, test_empty_parse); 5943 tcase_add_test(tc_basic, test_get_buffer_1); 5944 tcase_add_test(tc_basic, test_get_buffer_2); 5945 #if XML_CONTEXT_BYTES > 0 5946 tcase_add_test(tc_basic, test_get_buffer_3_overflow); 5947 #endif 5948 tcase_add_test(tc_basic, test_buffer_can_grow_to_max); 5949 tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len); 5950 tcase_add_test(tc_basic, test_byte_info_at_end); 5951 tcase_add_test(tc_basic, test_byte_info_at_error); 5952 tcase_add_test(tc_basic, test_byte_info_at_cdata); 5953 tcase_add_test(tc_basic, test_predefined_entities); 5954 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd); 5955 tcase_add_test(tc_basic, test_not_predefined_entities); 5956 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section); 5957 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16); 5958 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be); 5959 tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section); 5960 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed); 5961 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values); 5962 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone); 5963 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort); 5964 tcase_add_test(tc_basic, test_bad_public_doctype); 5965 tcase_add_test(tc_basic, test_attribute_enum_value); 5966 tcase_add_test(tc_basic, test_predefined_entity_redefinition); 5967 tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing); 5968 tcase_add_test(tc_basic, test_public_notation_no_sysid); 5969 tcase_add_test(tc_basic, test_nested_groups); 5970 tcase_add_test(tc_basic, test_group_choice); 5971 tcase_add_test(tc_basic, test_standalone_parameter_entity); 5972 tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity); 5973 tcase_add_test__ifdef_xml_dtd(tc_basic, 5974 test_recursive_external_parameter_entity); 5975 tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd); 5976 tcase_add_test(tc_basic, test_suspend_xdecl); 5977 tcase_add_test(tc_basic, test_abort_epilog); 5978 tcase_add_test(tc_basic, test_abort_epilog_2); 5979 tcase_add_test(tc_basic, test_suspend_epilog); 5980 tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag); 5981 tcase_add_test(tc_basic, test_unfinished_epilog); 5982 tcase_add_test(tc_basic, test_partial_char_in_epilog); 5983 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity); 5984 tcase_add_test__ifdef_xml_dtd(tc_basic, 5985 test_suspend_resume_internal_entity_issue_629); 5986 tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error); 5987 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity); 5988 tcase_add_test(tc_basic, test_restart_on_error); 5989 tcase_add_test(tc_basic, test_reject_lt_in_attribute_value); 5990 tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value); 5991 tcase_add_test(tc_basic, test_trailing_cr_in_att_value); 5992 tcase_add_test(tc_basic, test_standalone_internal_entity); 5993 tcase_add_test(tc_basic, test_skipped_external_entity); 5994 tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity); 5995 tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity); 5996 tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr); 5997 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity); 5998 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2); 5999 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3); 6000 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4); 6001 tcase_add_test(tc_basic, test_pi_handled_in_default); 6002 tcase_add_test(tc_basic, test_comment_handled_in_default); 6003 tcase_add_test(tc_basic, test_pi_yml); 6004 tcase_add_test(tc_basic, test_pi_xnl); 6005 tcase_add_test(tc_basic, test_pi_xmm); 6006 tcase_add_test(tc_basic, test_utf16_pi); 6007 tcase_add_test(tc_basic, test_utf16_be_pi); 6008 tcase_add_test(tc_basic, test_utf16_be_comment); 6009 tcase_add_test(tc_basic, test_utf16_le_comment); 6010 tcase_add_test(tc_basic, test_missing_encoding_conversion_fn); 6011 tcase_add_test(tc_basic, test_failing_encoding_conversion_fn); 6012 tcase_add_test(tc_basic, test_unknown_encoding_success); 6013 tcase_add_test(tc_basic, test_unknown_encoding_bad_name); 6014 tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2); 6015 tcase_add_test(tc_basic, test_unknown_encoding_long_name_1); 6016 tcase_add_test(tc_basic, test_unknown_encoding_long_name_2); 6017 tcase_add_test(tc_basic, test_invalid_unknown_encoding); 6018 tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok); 6019 tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail); 6020 tcase_add_test(tc_basic, test_unknown_encoding_invalid_length); 6021 tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit); 6022 tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate); 6023 tcase_add_test(tc_basic, test_unknown_encoding_invalid_high); 6024 tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value); 6025 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom); 6026 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom); 6027 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2); 6028 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2); 6029 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be); 6030 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le); 6031 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown); 6032 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom); 6033 tcase_add_test(tc_basic, test_utf8_in_cdata_section); 6034 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2); 6035 tcase_add_test(tc_basic, test_utf8_in_start_tags); 6036 tcase_add_test(tc_basic, test_trailing_spaces_in_elements); 6037 tcase_add_test(tc_basic, test_utf16_attribute); 6038 tcase_add_test(tc_basic, test_utf16_second_attr); 6039 tcase_add_test(tc_basic, test_attr_after_solidus); 6040 tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe); 6041 tcase_add_test(tc_basic, test_bad_attr_desc_keyword); 6042 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16); 6043 tcase_add_test(tc_basic, test_bad_doctype); 6044 tcase_add_test(tc_basic, test_bad_doctype_utf8); 6045 tcase_add_test(tc_basic, test_bad_doctype_utf16); 6046 tcase_add_test(tc_basic, test_bad_doctype_plus); 6047 tcase_add_test(tc_basic, test_bad_doctype_star); 6048 tcase_add_test(tc_basic, test_bad_doctype_query); 6049 tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore); 6050 tcase_add_test(tc_basic, test_entity_in_utf16_be_attr); 6051 tcase_add_test(tc_basic, test_entity_in_utf16_le_attr); 6052 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be); 6053 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le); 6054 tcase_add_test(tc_basic, test_short_doctype); 6055 tcase_add_test(tc_basic, test_short_doctype_2); 6056 tcase_add_test(tc_basic, test_short_doctype_3); 6057 tcase_add_test(tc_basic, test_long_doctype); 6058 tcase_add_test(tc_basic, test_bad_entity); 6059 tcase_add_test(tc_basic, test_bad_entity_2); 6060 tcase_add_test(tc_basic, test_bad_entity_3); 6061 tcase_add_test(tc_basic, test_bad_entity_4); 6062 tcase_add_test(tc_basic, test_bad_notation); 6063 tcase_add_test(tc_basic, test_default_doctype_handler); 6064 tcase_add_test(tc_basic, test_empty_element_abort); 6065 tcase_add_test__ifdef_xml_dtd(tc_basic, 6066 test_pool_integrity_with_unfinished_attr); 6067 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend); 6068 tcase_add_test(tc_basic, test_big_tokens_take_linear_time); 6069 tcase_add_test(tc_basic, test_set_reparse_deferral); 6070 tcase_add_test(tc_basic, test_reparse_deferral_is_inherited); 6071 tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser); 6072 tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly); 6073 tcase_add_test(tc_basic, test_set_bad_reparse_option); 6074 tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize); 6075 tcase_add_test(tc_basic, test_varying_buffer_fills); 6076 } 6077