1 /* Tests in the "basic" test case for the Expat test suite
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net>
11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org>
14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15 Copyright (c) 2017 Joe Orton <jorton@redhat.com>
16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com>
17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com>
20 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
21 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22 Copyright (c) 2024-2026 Berkay Eren Ürün <berkay.ueruen@siemens.com>
23 Copyright (c) 2026 Francesco Bertolaccini
24 Copyright (c) 2026 Matthew Fernandez <matthew.fernandez@gmail.com>
25 Licensed under the MIT license:
26
27 Permission is hereby granted, free of charge, to any person obtaining
28 a copy of this software and associated documentation files (the
29 "Software"), to deal in the Software without restriction, including
30 without limitation the rights to use, copy, modify, merge, publish,
31 distribute, sublicense, and/or sell copies of the Software, and to permit
32 persons to whom the Software is furnished to do so, subject to the
33 following conditions:
34
35 The above copyright notice and this permission notice shall be included
36 in all copies or substantial portions of the Software.
37
38 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
39 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
40 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
41 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
42 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
43 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
44 USE OR OTHER DEALINGS IN THE SOFTWARE.
45 */
46
47 #if defined(NDEBUG)
48 # undef NDEBUG /* because test suite relies on assert(...) at the moment */
49 #endif
50
51 #include <assert.h>
52
53 #include <stdio.h>
54 #include <string.h>
55 #include <time.h>
56
57 #if ! defined(__cplusplus)
58 # include <stdbool.h>
59 #endif
60
61 #include "expat_config.h"
62
63 #include "expat.h"
64 #include "internal.h"
65 #include "minicheck.h"
66 #include "structdata.h"
67 #include "common.h"
68 #include "dummy.h"
69 #include "handlers.h"
70 #include "siphash.h"
71 #include "basic_tests.h"
72
73 static void
basic_setup(void)74 basic_setup(void) {
75 g_parser = XML_ParserCreate(NULL);
76 if (g_parser == NULL)
77 fail("Parser not created.");
78 }
79
80 /*
81 * Character & encoding tests.
82 */
83
START_TEST(test_nul_byte)84 START_TEST(test_nul_byte) {
85 char text[] = "<doc>\0</doc>";
86
87 /* test that a NUL byte (in US-ASCII data) is an error */
88 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
89 == XML_STATUS_OK)
90 fail("Parser did not report error on NUL-byte.");
91 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
92 xml_failure(g_parser);
93 }
94 END_TEST
95
START_TEST(test_u0000_char)96 START_TEST(test_u0000_char) {
97 /* test that a NUL byte (in US-ASCII data) is an error */
98 expect_failure("<doc>�</doc>", XML_ERROR_BAD_CHAR_REF,
99 "Parser did not report error on NUL-byte.");
100 }
101 END_TEST
102
START_TEST(test_siphash_self)103 START_TEST(test_siphash_self) {
104 if (! sip24_valid())
105 fail("SipHash self-test failed");
106 }
107 END_TEST
108
START_TEST(test_siphash_spec)109 START_TEST(test_siphash_spec) {
110 /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
111 const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
112 "\x0a\x0b\x0c\x0d\x0e";
113 const size_t len = sizeof(message) - 1;
114 const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
115 struct siphash state;
116 struct sipkey key;
117
118 sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
119 "\x0a\x0b\x0c\x0d\x0e\x0f");
120 sip24_init(&state, &key);
121
122 /* Cover spread across calls */
123 sip24_update(&state, message, 4);
124 sip24_update(&state, message + 4, len - 4);
125
126 /* Cover null length */
127 sip24_update(&state, message, 0);
128
129 if (sip24_final(&state) != expected)
130 fail("sip24_final failed spec test\n");
131
132 /* Cover wrapper */
133 if (siphash24(message, len, &key) != expected)
134 fail("siphash24 failed spec test\n");
135 }
136 END_TEST
137
START_TEST(test_bom_utf8)138 START_TEST(test_bom_utf8) {
139 /* This test is really just making sure we don't core on a UTF-8 BOM. */
140 const char *text = "\357\273\277<e/>";
141
142 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
143 == XML_STATUS_ERROR)
144 xml_failure(g_parser);
145 }
146 END_TEST
147
START_TEST(test_bom_utf16_be)148 START_TEST(test_bom_utf16_be) {
149 char text[] = "\376\377\0<\0e\0/\0>";
150
151 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
152 == XML_STATUS_ERROR)
153 xml_failure(g_parser);
154 }
155 END_TEST
156
START_TEST(test_bom_utf16_le)157 START_TEST(test_bom_utf16_le) {
158 char text[] = "\377\376<\0e\0/\0>\0";
159
160 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
161 == XML_STATUS_ERROR)
162 xml_failure(g_parser);
163 }
164 END_TEST
165
START_TEST(test_nobom_utf16_le)166 START_TEST(test_nobom_utf16_le) {
167 char text[] = " \0<\0e\0/\0>\0";
168
169 if (g_chunkSize == 1) {
170 // TODO: with just the first byte, we can't tell the difference between
171 // UTF-16-LE and UTF-8. Avoid the failure for now.
172 return;
173 }
174
175 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
176 == XML_STATUS_ERROR)
177 xml_failure(g_parser);
178 }
179 END_TEST
180
START_TEST(test_hash_collision)181 START_TEST(test_hash_collision) {
182 /* For full coverage of the lookup routine, we need to ensure a
183 * hash collision even though we can only tell that we have one
184 * through breakpoint debugging or coverage statistics. The
185 * following will cause a hash collision on machines with a 64-bit
186 * long type; others will have to experiment. The full coverage
187 * tests invoked from qa.sh usually provide a hash collision, but
188 * not always. This is an attempt to provide insurance.
189 */
190 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
191 const char *text
192 = "<doc>\n"
193 "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
194 "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
195 "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
196 "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
197 "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
198 "<d8>This triggers the table growth and collides with b2</d8>\n"
199 "</doc>\n";
200
201 XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
202 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
203 == XML_STATUS_ERROR)
204 xml_failure(g_parser);
205 }
206 END_TEST
207 #undef COLLIDING_HASH_SALT
208
START_TEST(test_hash_salt_setter)209 START_TEST(test_hash_salt_setter) {
210 const uint8_t entropy[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
211 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
212 XML_Parser parser = XML_ParserCreate(NULL);
213
214 // NULL parser should be rejected
215 assert_true(XML_SetHashSalt16Bytes(NULL, entropy) == XML_FALSE);
216
217 // NULL entropy should be rejected
218 assert_true(XML_SetHashSalt16Bytes(parser, NULL) == XML_FALSE);
219
220 // Setting should be allowed more than once
221 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE);
222 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE);
223
224 // But not after parsing has started
225 assert_true(XML_Parse(parser, "", 0, XML_FALSE /* isFinal */)
226 == XML_STATUS_OK);
227 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_FALSE);
228
229 XML_ParserFree(parser);
230 }
231 END_TEST
232
233 /* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)234 START_TEST(test_danish_latin1) {
235 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
236 "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
237 #ifdef XML_UNICODE
238 const XML_Char *expected
239 = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
240 #else
241 const XML_Char *expected
242 = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
243 #endif
244 run_character_check(text, expected);
245 }
246 END_TEST
247
248 /* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)249 START_TEST(test_french_charref_hexidecimal) {
250 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
251 "<doc>éèàçêÈ</doc>";
252 #ifdef XML_UNICODE
253 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
254 #else
255 const XML_Char *expected
256 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
257 #endif
258 run_character_check(text, expected);
259 }
260 END_TEST
261
START_TEST(test_french_charref_decimal)262 START_TEST(test_french_charref_decimal) {
263 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
264 "<doc>éèàçêÈ</doc>";
265 #ifdef XML_UNICODE
266 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
267 #else
268 const XML_Char *expected
269 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
270 #endif
271 run_character_check(text, expected);
272 }
273 END_TEST
274
START_TEST(test_french_latin1)275 START_TEST(test_french_latin1) {
276 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
277 "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
278 #ifdef XML_UNICODE
279 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
280 #else
281 const XML_Char *expected
282 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
283 #endif
284 run_character_check(text, expected);
285 }
286 END_TEST
287
START_TEST(test_french_utf8)288 START_TEST(test_french_utf8) {
289 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
290 "<doc>\xC3\xA9</doc>";
291 #ifdef XML_UNICODE
292 const XML_Char *expected = XCS("\x00e9");
293 #else
294 const XML_Char *expected = XCS("\xC3\xA9");
295 #endif
296 run_character_check(text, expected);
297 }
298 END_TEST
299
300 /* Regression test for SF bug #600479.
301 XXX There should be a test that exercises all legal XML Unicode
302 characters as PCDATA and attribute value content, and XML Name
303 characters as part of element and attribute names.
304 */
START_TEST(test_utf8_false_rejection)305 START_TEST(test_utf8_false_rejection) {
306 const char *text = "<doc>\xEF\xBA\xBF</doc>";
307 #ifdef XML_UNICODE
308 const XML_Char *expected = XCS("\xfebf");
309 #else
310 const XML_Char *expected = XCS("\xEF\xBA\xBF");
311 #endif
312 run_character_check(text, expected);
313 }
314 END_TEST
315
316 /* Regression test for SF bug #477667.
317 This test assures that any 8-bit character followed by a 7-bit
318 character will not be mistakenly interpreted as a valid UTF-8
319 sequence.
320 */
START_TEST(test_illegal_utf8)321 START_TEST(test_illegal_utf8) {
322 char text[100];
323 int i;
324
325 for (i = 128; i <= 255; ++i) {
326 snprintf(text, sizeof(text), "<e>%ccd</e>", i);
327 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
328 == XML_STATUS_OK) {
329 snprintf(text, sizeof(text),
330 "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
331 i);
332 fail(text);
333 } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
334 xml_failure(g_parser);
335 /* Reset the parser since we use the same parser repeatedly. */
336 XML_ParserReset(g_parser, NULL);
337 }
338 }
339 END_TEST
340
341 /* Examples, not masks: */
342 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
343 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
344 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
345 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
346 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
347
START_TEST(test_utf8_auto_align)348 START_TEST(test_utf8_auto_align) {
349 struct TestCase {
350 ptrdiff_t expectedMovementInChars;
351 const char *input;
352 };
353
354 struct TestCase cases[] = {
355 {00, ""},
356
357 {00, UTF8_LEAD_1},
358
359 {-1, UTF8_LEAD_2},
360 {00, UTF8_LEAD_2 UTF8_FOLLOW},
361
362 {-1, UTF8_LEAD_3},
363 {-2, UTF8_LEAD_3 UTF8_FOLLOW},
364 {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
365
366 {-1, UTF8_LEAD_4},
367 {-2, UTF8_LEAD_4 UTF8_FOLLOW},
368 {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
369 {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
370 };
371
372 size_t i = 0;
373 bool success = true;
374 for (; i < sizeof(cases) / sizeof(*cases); i++) {
375 const char *fromLim = cases[i].input + strlen(cases[i].input);
376 const char *const fromLimInitially = fromLim;
377 ptrdiff_t actualMovementInChars;
378
379 _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
380
381 actualMovementInChars = (fromLim - fromLimInitially);
382 if (actualMovementInChars != cases[i].expectedMovementInChars) {
383 size_t j = 0;
384 success = false;
385 printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
386 ", actually moved by %2d chars: \"",
387 (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
388 (int)actualMovementInChars);
389 for (; j < strlen(cases[i].input); j++) {
390 printf("\\x%02x", (unsigned char)cases[i].input[j]);
391 }
392 printf("\"\n");
393 }
394 }
395
396 if (! success) {
397 fail("UTF-8 auto-alignment is not bullet-proof\n");
398 }
399 }
400 END_TEST
401
START_TEST(test_utf16)402 START_TEST(test_utf16) {
403 /* <?xml version="1.0" encoding="UTF-16"?>
404 * <doc a='123'>some {A} text</doc>
405 *
406 * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
407 */
408 char text[]
409 = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
410 "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
411 "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
412 "\000'\000?\000>\000\n"
413 "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
414 "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
415 "<\000/\000d\000o\000c\000>";
416 #ifdef XML_UNICODE
417 const XML_Char *expected = XCS("some \xff21 text");
418 #else
419 const XML_Char *expected = XCS("some \357\274\241 text");
420 #endif
421 CharData storage;
422
423 CharData_Init(&storage);
424 XML_SetUserData(g_parser, &storage);
425 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
426 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
427 == XML_STATUS_ERROR)
428 xml_failure(g_parser);
429 CharData_CheckXMLChars(&storage, expected);
430 }
431 END_TEST
432
START_TEST(test_utf16_le_epilog_newline)433 START_TEST(test_utf16_le_epilog_newline) {
434 unsigned int first_chunk_bytes = 17;
435 char text[] = "\xFF\xFE" /* BOM */
436 "<\000e\000/\000>\000" /* document element */
437 "\r\000\n\000\r\000\n\000"; /* epilog */
438
439 if (first_chunk_bytes >= sizeof(text) - 1)
440 fail("bad value of first_chunk_bytes");
441 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)first_chunk_bytes, XML_FALSE)
442 == XML_STATUS_ERROR)
443 xml_failure(g_parser);
444 else {
445 enum XML_Status rc;
446 rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
447 (int)(sizeof(text) - first_chunk_bytes - 1),
448 XML_TRUE);
449 if (rc == XML_STATUS_ERROR)
450 xml_failure(g_parser);
451 }
452 }
453 END_TEST
454
455 /* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)456 START_TEST(test_not_utf16) {
457 const char *text = "<?xml version='1.0' encoding='utf-16'?>"
458 "<doc>Hi</doc>";
459
460 /* Use a handler to provoke the appropriate code paths */
461 XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
462 expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
463 "UTF-16 declared in UTF-8 not faulted");
464 }
465 END_TEST
466
467 /* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)468 START_TEST(test_bad_encoding) {
469 const char *text = "<doc>Hi</doc>";
470
471 if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
472 fail("XML_SetEncoding failed");
473 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
474 "Unknown encoding not faulted");
475 }
476 END_TEST
477
478 /* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)479 START_TEST(test_latin1_umlauts) {
480 const char *text
481 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
482 "<e a='\xE4 \xF6 \xFC ä ö ü ä ö ü >'\n"
483 " >\xE4 \xF6 \xFC ä ö ü ä ö ü ></e>";
484 #ifdef XML_UNICODE
485 /* Expected results in UTF-16 */
486 const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
487 XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
488 #else
489 /* Expected results in UTF-8 */
490 const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
491 XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
492 #endif
493
494 run_character_check(text, expected);
495 XML_ParserReset(g_parser, NULL);
496 run_attribute_check(text, expected);
497 /* Repeat with a default handler */
498 XML_ParserReset(g_parser, NULL);
499 XML_SetDefaultHandler(g_parser, dummy_default_handler);
500 run_character_check(text, expected);
501 XML_ParserReset(g_parser, NULL);
502 XML_SetDefaultHandler(g_parser, dummy_default_handler);
503 run_attribute_check(text, expected);
504 }
505 END_TEST
506
507 /* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)508 START_TEST(test_long_utf8_character) {
509 const char *text
510 = "<?xml version='1.0' encoding='utf-8'?>\n"
511 /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
512 "<do\xf0\x90\x80\x80/>";
513 expect_failure(text, XML_ERROR_INVALID_TOKEN,
514 "4-byte UTF-8 character in element name not faulted");
515 }
516 END_TEST
517
518 /* Test that a long latin-1 attribute (too long to convert in one go)
519 * is correctly converted
520 */
START_TEST(test_long_latin1_attribute)521 START_TEST(test_long_latin1_attribute) {
522 const char *text
523 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
524 "<doc att='"
525 /* 64 characters per line */
526 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
527 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
528 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
529 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
530 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
531 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
532 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
533 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
534 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
535 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
536 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
537 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
538 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
539 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
540 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
541 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
542 /* Last character splits across a buffer boundary */
543 "\xe4'>\n</doc>";
544
545 const XML_Char *expected =
546 /* 64 characters per line */
547 /* clang-format off */
548 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
549 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
550 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
551 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
552 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
553 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
554 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
555 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
556 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
557 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
558 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
559 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
560 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
561 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
562 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
563 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
564 /* clang-format on */
565 #ifdef XML_UNICODE
566 XCS("\x00e4");
567 #else
568 XCS("\xc3\xa4");
569 #endif
570
571 run_attribute_check(text, expected);
572 }
573 END_TEST
574
575 /* Test that a long ASCII attribute (too long to convert in one go)
576 * is correctly converted
577 */
START_TEST(test_long_ascii_attribute)578 START_TEST(test_long_ascii_attribute) {
579 const char *text
580 = "<?xml version='1.0' encoding='us-ascii'?>\n"
581 "<doc att='"
582 /* 64 characters per line */
583 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
584 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
585 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
586 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
587 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
588 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
589 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
590 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
591 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
592 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
593 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
594 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
595 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
596 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
597 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
598 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
599 "01234'>\n</doc>";
600 const XML_Char *expected =
601 /* 64 characters per line */
602 /* clang-format off */
603 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
604 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
605 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
606 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
607 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
608 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
609 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
610 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
611 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
612 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
613 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
614 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
615 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
616 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
617 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
618 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
619 XCS("01234");
620 /* clang-format on */
621
622 run_attribute_check(text, expected);
623 }
624 END_TEST
625
626 /* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)627 START_TEST(test_line_number_after_parse) {
628 const char *text = "<tag>\n"
629 "\n"
630 "\n</tag>";
631 XML_Size lineno;
632
633 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
634 == XML_STATUS_ERROR)
635 xml_failure(g_parser);
636 lineno = XML_GetCurrentLineNumber(g_parser);
637 if (lineno != 4) {
638 char buffer[100];
639 snprintf(buffer, sizeof(buffer),
640 "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
641 fail(buffer);
642 }
643 }
644 END_TEST
645
646 /* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)647 START_TEST(test_column_number_after_parse) {
648 const char *text = "<tag></tag>";
649 XML_Size colno;
650
651 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
652 == XML_STATUS_ERROR)
653 xml_failure(g_parser);
654 colno = XML_GetCurrentColumnNumber(g_parser);
655 if (colno != 11) {
656 char buffer[100];
657 snprintf(buffer, sizeof(buffer),
658 "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
659 fail(buffer);
660 }
661 }
662 END_TEST
663
664 /* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)665 START_TEST(test_line_and_column_numbers_inside_handlers) {
666 const char *text = "<a>\n" /* Unix end-of-line */
667 " <b>\r\n" /* Windows end-of-line */
668 " <c/>\r" /* Mac OS end-of-line */
669 " </b>\n"
670 " <d>\n"
671 " <f/>\n"
672 " </d>\n"
673 "</a>";
674 const StructDataEntry expected[]
675 = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
676 {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
677 {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG},
678 {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
679 {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}};
680 const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
681 StructData storage;
682
683 StructData_Init(&storage);
684 XML_SetUserData(g_parser, &storage);
685 XML_SetStartElementHandler(g_parser, start_element_event_handler2);
686 XML_SetEndElementHandler(g_parser, end_element_event_handler2);
687 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
688 == XML_STATUS_ERROR)
689 xml_failure(g_parser);
690
691 StructData_CheckItems(&storage, expected, expected_count);
692 StructData_Dispose(&storage);
693 }
694 END_TEST
695
696 /* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)697 START_TEST(test_line_number_after_error) {
698 const char *text = "<a>\n"
699 " <b>\n"
700 " </a>"; /* missing </b> */
701 XML_Size lineno;
702 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
703 != XML_STATUS_ERROR)
704 fail("Expected a parse error");
705
706 lineno = XML_GetCurrentLineNumber(g_parser);
707 if (lineno != 3) {
708 char buffer[100];
709 snprintf(buffer, sizeof(buffer),
710 "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
711 fail(buffer);
712 }
713 }
714 END_TEST
715
716 /* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)717 START_TEST(test_column_number_after_error) {
718 const char *text = "<a>\n"
719 " <b>\n"
720 " </a>"; /* missing </b> */
721 XML_Size colno;
722 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
723 != XML_STATUS_ERROR)
724 fail("Expected a parse error");
725
726 colno = XML_GetCurrentColumnNumber(g_parser);
727 if (colno != 4) {
728 char buffer[100];
729 snprintf(buffer, sizeof(buffer),
730 "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
731 fail(buffer);
732 }
733 }
734 END_TEST
735
736 /* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)737 START_TEST(test_really_long_lines) {
738 /* This parses an input line longer than INIT_DATA_BUF_SIZE
739 characters long (defined to be 1024 in xmlparse.c). We take a
740 really cheesy approach to building the input buffer, because
741 this avoids writing bugs in buffer-filling code.
742 */
743 const char *text
744 = "<e>"
745 /* 64 chars */
746 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
747 /* until we have at least 1024 characters on the line: */
748 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
749 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
750 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
751 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
752 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
753 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
754 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
755 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
757 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764 "</e>";
765 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
766 == XML_STATUS_ERROR)
767 xml_failure(g_parser);
768 }
769 END_TEST
770
771 /* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)772 START_TEST(test_really_long_encoded_lines) {
773 /* As above, except that we want to provoke an output buffer
774 * overflow with a non-trivial encoding. For this we need to pass
775 * the whole cdata in one go, not byte-by-byte.
776 */
777 void *buffer;
778 const char *text
779 = "<?xml version='1.0' encoding='iso-8859-1'?>"
780 "<e>"
781 /* 64 chars */
782 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
783 /* until we have at least 1024 characters on the line: */
784 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
785 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
786 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
787 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
788 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
789 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
790 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
791 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
792 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
793 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
794 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
795 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
796 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
797 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
798 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
799 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
800 "</e>";
801 int parse_len = (int)strlen(text);
802
803 /* Need a cdata handler to provoke the code path we want to test */
804 XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
805 buffer = XML_GetBuffer(g_parser, parse_len);
806 if (buffer == NULL)
807 fail("Could not allocate parse buffer");
808 assert(buffer != NULL);
809 memcpy(buffer, text, parse_len);
810 if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
811 xml_failure(g_parser);
812 }
813 END_TEST
814
815 /*
816 * Element event tests.
817 */
818
START_TEST(test_end_element_events)819 START_TEST(test_end_element_events) {
820 const char *text = "<a><b><c/></b><d><f/></d></a>";
821 const XML_Char *expected = XCS("/c/b/f/d/a");
822 CharData storage;
823
824 CharData_Init(&storage);
825 XML_SetUserData(g_parser, &storage);
826 XML_SetEndElementHandler(g_parser, end_element_event_handler);
827 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
828 == XML_STATUS_ERROR)
829 xml_failure(g_parser);
830 CharData_CheckXMLChars(&storage, expected);
831 }
832 END_TEST
833
834 /*
835 * Attribute tests.
836 */
837
838 /* Helper used by the following tests; this checks any "attr" and "refs"
839 attributes to make sure whitespace has been normalized.
840
841 Return true if whitespace has been normalized in a string, using
842 the rules for attribute value normalization. The 'is_cdata' flag
843 is needed since CDATA attributes don't need to have multiple
844 whitespace characters collapsed to a single space, while other
845 attribute data types do. (Section 3.3.3 of the recommendation.)
846 */
847 static int
is_whitespace_normalized(const XML_Char * s,int is_cdata)848 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
849 int blanks = 0;
850 int at_start = 1;
851 while (*s) {
852 if (*s == XCS(' '))
853 ++blanks;
854 else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
855 return 0;
856 else {
857 if (at_start) {
858 at_start = 0;
859 if (blanks && ! is_cdata)
860 /* illegal leading blanks */
861 return 0;
862 } else if (blanks > 1 && ! is_cdata)
863 return 0;
864 blanks = 0;
865 }
866 ++s;
867 }
868 if (blanks && ! is_cdata)
869 return 0;
870 return 1;
871 }
872
873 /* Check the attribute whitespace checker: */
START_TEST(test_helper_is_whitespace_normalized)874 START_TEST(test_helper_is_whitespace_normalized) {
875 assert(is_whitespace_normalized(XCS("abc"), 0));
876 assert(is_whitespace_normalized(XCS("abc"), 1));
877 assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
878 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
879 assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
880 assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
881 assert(! is_whitespace_normalized(XCS("abc def ghi"), 0));
882 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
883 assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
884 assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
885 assert(! is_whitespace_normalized(XCS(" "), 0));
886 assert(is_whitespace_normalized(XCS(" "), 1));
887 assert(! is_whitespace_normalized(XCS("\t"), 0));
888 assert(! is_whitespace_normalized(XCS("\t"), 1));
889 assert(! is_whitespace_normalized(XCS("\n"), 0));
890 assert(! is_whitespace_normalized(XCS("\n"), 1));
891 assert(! is_whitespace_normalized(XCS("\r"), 0));
892 assert(! is_whitespace_normalized(XCS("\r"), 1));
893 assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
894 }
895 END_TEST
896
897 static void XMLCALL
check_attr_contains_normalized_whitespace(void * userData,const XML_Char * name,const XML_Char ** atts)898 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
899 const XML_Char **atts) {
900 int i;
901 UNUSED_P(userData);
902 UNUSED_P(name);
903 for (i = 0; atts[i] != NULL; i += 2) {
904 const XML_Char *attrname = atts[i];
905 const XML_Char *value = atts[i + 1];
906 if (xcstrcmp(XCS("attr"), attrname) == 0
907 || xcstrcmp(XCS("ents"), attrname) == 0
908 || xcstrcmp(XCS("refs"), attrname) == 0) {
909 if (! is_whitespace_normalized(value, 0)) {
910 char buffer[256];
911 snprintf(buffer, sizeof(buffer),
912 "attribute value not normalized: %" XML_FMT_STR
913 "='%" XML_FMT_STR "'",
914 attrname, value);
915 fail(buffer);
916 }
917 }
918 }
919 }
920
START_TEST(test_attr_whitespace_normalization)921 START_TEST(test_attr_whitespace_normalization) {
922 const char *text
923 = "<!DOCTYPE doc [\n"
924 " <!ATTLIST doc\n"
925 " attr NMTOKENS #REQUIRED\n"
926 " ents ENTITIES #REQUIRED\n"
927 " refs IDREFS #REQUIRED>\n"
928 "]>\n"
929 "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n"
930 " ents=' ent-1 \t\r\n"
931 " ent-2 ' >\n"
932 " <e id='id-1'/>\n"
933 " <e id='id-2'/>\n"
934 "</doc>";
935
936 XML_SetStartElementHandler(g_parser,
937 check_attr_contains_normalized_whitespace);
938 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
939 == XML_STATUS_ERROR)
940 xml_failure(g_parser);
941 }
942 END_TEST
943
944 /*
945 * XML declaration tests.
946 */
947
START_TEST(test_xmldecl_misplaced)948 START_TEST(test_xmldecl_misplaced) {
949 expect_failure("\n"
950 "<?xml version='1.0'?>\n"
951 "<a/>",
952 XML_ERROR_MISPLACED_XML_PI,
953 "failed to report misplaced XML declaration");
954 }
955 END_TEST
956
START_TEST(test_xmldecl_invalid)957 START_TEST(test_xmldecl_invalid) {
958 expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
959 "Failed to report invalid XML declaration");
960 }
961 END_TEST
962
START_TEST(test_xmldecl_missing_attr)963 START_TEST(test_xmldecl_missing_attr) {
964 expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
965 "Failed to report missing XML declaration attribute");
966 }
967 END_TEST
968
START_TEST(test_xmldecl_missing_value)969 START_TEST(test_xmldecl_missing_value) {
970 expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
971 "<doc/>",
972 XML_ERROR_XML_DECL,
973 "Failed to report missing attribute value");
974 }
975 END_TEST
976
977 /* Regression test for SF bug #584832. */
START_TEST(test_unknown_encoding_internal_entity)978 START_TEST(test_unknown_encoding_internal_entity) {
979 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
980 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
981 "<test a='&foo;'/>";
982
983 XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
984 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
985 == XML_STATUS_ERROR)
986 xml_failure(g_parser);
987 }
988 END_TEST
989
990 /* Test unrecognised encoding handler */
START_TEST(test_unrecognised_encoding_internal_entity)991 START_TEST(test_unrecognised_encoding_internal_entity) {
992 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
993 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
994 "<test a='&foo;'/>";
995
996 XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
997 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
998 != XML_STATUS_ERROR)
999 fail("Unrecognised encoding not rejected");
1000 }
1001 END_TEST
1002
1003 /* Regression test for SF bug #620106. */
START_TEST(test_ext_entity_set_encoding)1004 START_TEST(test_ext_entity_set_encoding) {
1005 const char *text = "<!DOCTYPE doc [\n"
1006 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1007 "]>\n"
1008 "<doc>&en;</doc>";
1009 ExtTest test_data
1010 = {/* This text says it's an unsupported encoding, but it's really
1011 UTF-8, which we tell Expat using XML_SetEncoding().
1012 */
1013 "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
1014 #ifdef XML_UNICODE
1015 const XML_Char *expected = XCS("\x00e9");
1016 #else
1017 const XML_Char *expected = XCS("\xc3\xa9");
1018 #endif
1019
1020 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1021 run_ext_character_check(text, &test_data, expected);
1022 }
1023 END_TEST
1024
1025 /* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)1026 START_TEST(test_ext_entity_no_handler) {
1027 const char *text = "<!DOCTYPE doc [\n"
1028 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1029 "]>\n"
1030 "<doc>&en;</doc>";
1031
1032 XML_SetDefaultHandler(g_parser, dummy_default_handler);
1033 run_character_check(text, XCS(""));
1034 }
1035 END_TEST
1036
1037 /* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)1038 START_TEST(test_ext_entity_set_bom) {
1039 const char *text = "<!DOCTYPE doc [\n"
1040 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1041 "]>\n"
1042 "<doc>&en;</doc>";
1043 ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1044 "<?xml encoding='iso-8859-3'?>"
1045 "\xC3\xA9",
1046 XCS("utf-8"), NULL};
1047 #ifdef XML_UNICODE
1048 const XML_Char *expected = XCS("\x00e9");
1049 #else
1050 const XML_Char *expected = XCS("\xc3\xa9");
1051 #endif
1052
1053 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1054 run_ext_character_check(text, &test_data, expected);
1055 }
1056 END_TEST
1057
1058 /* Test that bad encodings are faulted */
START_TEST(test_ext_entity_bad_encoding)1059 START_TEST(test_ext_entity_bad_encoding) {
1060 const char *text = "<!DOCTYPE doc [\n"
1061 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1062 "]>\n"
1063 "<doc>&en;</doc>";
1064 ExtFaults fault
1065 = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1066 XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1067
1068 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1069 XML_SetUserData(g_parser, &fault);
1070 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1071 "Bad encoding should not have been accepted");
1072 }
1073 END_TEST
1074
1075 /* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)1076 START_TEST(test_ext_entity_bad_encoding_2) {
1077 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1078 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1079 "<doc>&entity;</doc>";
1080 ExtFaults fault
1081 = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1082 XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1083
1084 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1085 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1086 XML_SetUserData(g_parser, &fault);
1087 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1088 "Bad encoding not faulted in external entity handler");
1089 }
1090 END_TEST
1091
1092 /* Test that no error is reported for unknown entities if we don't
1093 read an external subset. This was fixed in Expat 1.95.5.
1094 */
START_TEST(test_wfc_undeclared_entity_unread_external_subset)1095 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1096 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1097 "<doc>&entity;</doc>";
1098
1099 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1100 == XML_STATUS_ERROR)
1101 xml_failure(g_parser);
1102 }
1103 END_TEST
1104
1105 /* Test that an error is reported for unknown entities if we don't
1106 have an external subset.
1107 */
START_TEST(test_wfc_undeclared_entity_no_external_subset)1108 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1109 expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1110 "Parser did not report undefined entity w/out a DTD.");
1111 }
1112 END_TEST
1113
1114 /* Test that an error is reported for unknown entities if we don't
1115 read an external subset, but have been declared standalone.
1116 */
START_TEST(test_wfc_undeclared_entity_standalone)1117 START_TEST(test_wfc_undeclared_entity_standalone) {
1118 const char *text
1119 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1120 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1121 "<doc>&entity;</doc>";
1122
1123 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1124 "Parser did not report undefined entity (standalone).");
1125 }
1126 END_TEST
1127
1128 /* Test that an error is reported for unknown entities if we have read
1129 an external subset, and standalone is true.
1130 */
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone)1131 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1132 const char *text
1133 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1134 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1135 "<doc>&entity;</doc>";
1136 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1137
1138 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1139 XML_SetUserData(g_parser, &test_data);
1140 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1141 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1142 "Parser did not report undefined entity (external DTD).");
1143 }
1144 END_TEST
1145
1146 /* Test that external entity handling is not done if the parsing flag
1147 * is set to UNLESS_STANDALONE
1148 */
START_TEST(test_entity_with_external_subset_unless_standalone)1149 START_TEST(test_entity_with_external_subset_unless_standalone) {
1150 const char *text
1151 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1152 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1153 "<doc>&entity;</doc>";
1154 ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1155
1156 XML_SetParamEntityParsing(g_parser,
1157 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1158 XML_SetUserData(g_parser, &test_data);
1159 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1160 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1161 "Parser did not report undefined entity");
1162 }
1163 END_TEST
1164
1165 /* Test that no error is reported for unknown entities if we have read
1166 an external subset, and standalone is false.
1167 */
START_TEST(test_wfc_undeclared_entity_with_external_subset)1168 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1169 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1170 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1171 "<doc>&entity;</doc>";
1172 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1173
1174 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1175 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1176 run_ext_character_check(text, &test_data, XCS(""));
1177 }
1178 END_TEST
1179
1180 /* Test that an error is reported if our NotStandalone handler fails */
START_TEST(test_not_standalone_handler_reject)1181 START_TEST(test_not_standalone_handler_reject) {
1182 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1183 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1184 "<doc>&entity;</doc>";
1185 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1186
1187 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1188 XML_SetUserData(g_parser, &test_data);
1189 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1190 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1191 expect_failure(text, XML_ERROR_NOT_STANDALONE,
1192 "NotStandalone handler failed to reject");
1193
1194 /* Try again but without external entity handling */
1195 XML_ParserReset(g_parser, NULL);
1196 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1197 expect_failure(text, XML_ERROR_NOT_STANDALONE,
1198 "NotStandalone handler failed to reject");
1199 }
1200 END_TEST
1201
1202 /* Test that no error is reported if our NotStandalone handler succeeds */
START_TEST(test_not_standalone_handler_accept)1203 START_TEST(test_not_standalone_handler_accept) {
1204 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1205 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1206 "<doc>&entity;</doc>";
1207 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1208
1209 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1210 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1211 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1212 run_ext_character_check(text, &test_data, XCS(""));
1213
1214 /* Repeat without the external entity handler */
1215 XML_ParserReset(g_parser, NULL);
1216 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1217 run_character_check(text, XCS(""));
1218 }
1219 END_TEST
1220
START_TEST(test_entity_start_tag_level_greater_than_one)1221 START_TEST(test_entity_start_tag_level_greater_than_one) {
1222 const char *const text = "<!DOCTYPE t1 [\n"
1223 " <!ENTITY e1 'hello'>\n"
1224 "]>\n"
1225 "<t1>\n"
1226 " <t2>&e1;</t2>\n"
1227 "</t1>\n";
1228
1229 XML_Parser parser = XML_ParserCreate(NULL);
1230 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
1231 /*isFinal*/ XML_TRUE)
1232 == XML_STATUS_OK);
1233 XML_ParserFree(parser);
1234 }
1235 END_TEST
1236
START_TEST(test_wfc_no_recursive_entity_refs)1237 START_TEST(test_wfc_no_recursive_entity_refs) {
1238 const char *text = "<!DOCTYPE doc [\n"
1239 " <!ENTITY entity '&entity;'>\n"
1240 "]>\n"
1241 "<doc>&entity;</doc>";
1242
1243 expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1244 "Parser did not report recursive entity reference.");
1245 }
1246 END_TEST
1247
START_TEST(test_no_indirectly_recursive_entity_refs)1248 START_TEST(test_no_indirectly_recursive_entity_refs) {
1249 struct TestCase {
1250 const char *doc;
1251 bool usesParameterEntities;
1252 };
1253
1254 const struct TestCase cases[] = {
1255 // general entity + character data
1256 {"<!DOCTYPE a [\n"
1257 " <!ENTITY e1 '&e2;'>\n"
1258 " <!ENTITY e2 '&e1;'>\n"
1259 "]><a>&e2;</a>\n",
1260 false},
1261
1262 // general entity + attribute value
1263 {"<!DOCTYPE a [\n"
1264 " <!ENTITY e1 '&e2;'>\n"
1265 " <!ENTITY e2 '&e1;'>\n"
1266 "]><a k1='&e2;' />\n",
1267 false},
1268
1269 // parameter entity
1270 {"<!DOCTYPE doc [\n"
1271 " <!ENTITY % p1 '%p2;'>\n"
1272 " <!ENTITY % p2 '%p1;'>\n"
1273 " <!ENTITY % define_g \"<!ENTITY g '%p2;'>\">\n"
1274 " %define_g;\n"
1275 "]>\n"
1276 "<doc/>\n",
1277 true},
1278 };
1279 const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE};
1280
1281 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1282 for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]);
1283 j++) {
1284 const XML_Bool reset_wanted = reset_or_not[j];
1285 const char *const doc = cases[i].doc;
1286 const bool usesParameterEntities = cases[i].usesParameterEntities;
1287
1288 set_subtest("[%i,reset=%i] %s", (int)i, (int)j, doc);
1289
1290 #ifdef XML_DTD // both GE and DTD
1291 const bool rejection_expected = true;
1292 #elif XML_GE == 1 // GE but not DTD
1293 const bool rejection_expected = ! usesParameterEntities;
1294 #else // neither DTD nor GE
1295 const bool rejection_expected = false;
1296 #endif
1297
1298 XML_Parser parser = XML_ParserCreate(NULL);
1299
1300 #ifdef XML_DTD
1301 if (usesParameterEntities) {
1302 assert_true(
1303 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS)
1304 == 1);
1305 }
1306 #else
1307 UNUSED_P(usesParameterEntities);
1308 #endif // XML_DTD
1309
1310 const enum XML_Status status
1311 = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
1312 /*isFinal*/ XML_TRUE);
1313
1314 if (rejection_expected) {
1315 assert_true(status == XML_STATUS_ERROR);
1316 assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF);
1317 } else {
1318 assert_true(status == XML_STATUS_OK);
1319 }
1320
1321 if (reset_wanted) {
1322 // This covers free'ing of (eventually) all three open entity lists by
1323 // XML_ParserReset.
1324 XML_ParserReset(parser, NULL);
1325 }
1326
1327 // This covers free'ing of (eventually) all three open entity lists by
1328 // XML_ParserFree (unless XML_ParserReset has already done that above).
1329 XML_ParserFree(parser);
1330 }
1331 }
1332 }
1333 END_TEST
1334
START_TEST(test_recursive_external_parameter_entity_2)1335 START_TEST(test_recursive_external_parameter_entity_2) {
1336 struct TestCase {
1337 const char *doc;
1338 enum XML_Status expectedStatus;
1339 };
1340
1341 struct TestCase cases[] = {
1342 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1343 {"<!ENTITY % p1 '%p1;'>"
1344 "<!ENTITY % p1 'first declaration wins'>",
1345 XML_STATUS_ERROR},
1346 {"<!ENTITY % p1 'first declaration wins'>"
1347 "<!ENTITY % p1 '%p1;'>",
1348 XML_STATUS_OK},
1349 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_OK},
1350 };
1351
1352 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1353 const char *const doc = cases[i].doc;
1354 const enum XML_Status expectedStatus = cases[i].expectedStatus;
1355 set_subtest("%s", doc);
1356
1357 XML_Parser parser = XML_ParserCreate(NULL);
1358 assert_true(parser != NULL);
1359
1360 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1361 assert_true(ext_parser != NULL);
1362
1363 const enum XML_Status actualStatus
1364 = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1365
1366 assert_true(actualStatus == expectedStatus);
1367 if (actualStatus != XML_STATUS_OK) {
1368 assert_true(XML_GetErrorCode(ext_parser)
1369 == XML_ERROR_RECURSIVE_ENTITY_REF);
1370 }
1371
1372 XML_ParserFree(ext_parser);
1373 XML_ParserFree(parser);
1374 }
1375 }
1376 END_TEST
1377
1378 /* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)1379 START_TEST(test_ext_entity_invalid_parse) {
1380 const char *text = "<!DOCTYPE doc [\n"
1381 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1382 "]>\n"
1383 "<doc>&en;</doc>";
1384 const ExtFaults faults[]
1385 = {{"<", "Incomplete element declaration not faulted", NULL,
1386 XML_ERROR_UNCLOSED_TOKEN},
1387 {"<\xe2\x82", /* First two bytes of a three-byte char */
1388 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1389 {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1390 XML_ERROR_PARTIAL_CHAR},
1391 {NULL, NULL, NULL, XML_ERROR_NONE}};
1392 const ExtFaults *fault = faults;
1393
1394 for (; fault->parse_text != NULL; fault++) {
1395 set_subtest("\"%s\"", fault->parse_text);
1396 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1397 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1398 XML_SetUserData(g_parser, (void *)fault);
1399 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1400 "Parser did not report external entity error");
1401 XML_ParserReset(g_parser, NULL);
1402 }
1403 }
1404 END_TEST
1405
1406 /* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)1407 START_TEST(test_dtd_default_handling) {
1408 const char *text = "<!DOCTYPE doc [\n"
1409 "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1410 "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1411 "<!ELEMENT doc EMPTY>\n"
1412 "<!ATTLIST doc a CDATA #IMPLIED>\n"
1413 "<?pi in dtd?>\n"
1414 "<!--comment in dtd-->\n"
1415 "]><doc/>";
1416
1417 XML_SetDefaultHandler(g_parser, accumulate_characters);
1418 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1419 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1420 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1421 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1422 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1423 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1424 XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1425 XML_SetCommentHandler(g_parser, dummy_comment_handler);
1426 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1427 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1428 run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1429 }
1430 END_TEST
1431
1432 /* Test handling of attribute declarations */
START_TEST(test_dtd_attr_handling)1433 START_TEST(test_dtd_attr_handling) {
1434 const char *prolog = "<!DOCTYPE doc [\n"
1435 "<!ELEMENT doc EMPTY>\n";
1436 AttTest attr_data[]
1437 = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1438 "]>"
1439 "<doc a='two'/>",
1440 XCS("doc"), XCS("a"),
1441 XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1442 NULL, XML_TRUE},
1443 {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1444 "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1445 "]>"
1446 "<doc/>",
1447 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1448 {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1449 "]>"
1450 "<doc/>",
1451 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1452 {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1453 "]>"
1454 "<doc/>",
1455 XCS("doc"), XCS("a"), XCS("CDATA"),
1456 #ifdef XML_UNICODE
1457 XCS("\x06f2"),
1458 #else
1459 XCS("\xdb\xb2"),
1460 #endif
1461 XML_FALSE},
1462 {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1463 AttTest *test;
1464
1465 for (test = attr_data; test->definition != NULL; test++) {
1466 set_subtest("%s", test->definition);
1467 XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1468 XML_SetUserData(g_parser, test);
1469 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1470 XML_FALSE)
1471 == XML_STATUS_ERROR)
1472 xml_failure(g_parser);
1473 if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1474 (int)strlen(test->definition), XML_TRUE)
1475 == XML_STATUS_ERROR)
1476 xml_failure(g_parser);
1477 XML_ParserReset(g_parser, NULL);
1478 }
1479 }
1480 END_TEST
1481
1482 /* See related SF bug #673791.
1483 When namespace processing is enabled, setting the namespace URI for
1484 a prefix is not allowed; this test ensures that it *is* allowed
1485 when namespace processing is not enabled.
1486 (See Namespaces in XML, section 2.)
1487 */
START_TEST(test_empty_ns_without_namespaces)1488 START_TEST(test_empty_ns_without_namespaces) {
1489 const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1490 " <e xmlns:prefix=''/>\n"
1491 "</doc>";
1492
1493 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1494 == XML_STATUS_ERROR)
1495 xml_failure(g_parser);
1496 }
1497 END_TEST
1498
1499 /* Regression test for SF bug #824420.
1500 Checks that an xmlns:prefix attribute set in an attribute's default
1501 value isn't misinterpreted.
1502 */
START_TEST(test_ns_in_attribute_default_without_namespaces)1503 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1504 const char *text = "<!DOCTYPE e:element [\n"
1505 " <!ATTLIST e:element\n"
1506 " xmlns:e CDATA 'http://example.org/'>\n"
1507 " ]>\n"
1508 "<e:element/>";
1509
1510 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1511 == XML_STATUS_ERROR)
1512 xml_failure(g_parser);
1513 }
1514 END_TEST
1515
1516 /* Regression test for SF bug #1515266: missing check of stopped
1517 parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)1518 START_TEST(test_stop_parser_between_char_data_calls) {
1519 /* The sample data must be big enough that there are two calls to
1520 the character data handler from within the inner "for" loop of
1521 the XML_TOK_DATA_CHARS case in doContent(), and the character
1522 handler must stop the parser and clear the character data
1523 handler.
1524 */
1525 const char *text = long_character_data_text;
1526
1527 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1528 g_resumable = XML_FALSE;
1529 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1530 != XML_STATUS_ERROR)
1531 xml_failure(g_parser);
1532 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1533 xml_failure(g_parser);
1534 }
1535 END_TEST
1536
1537 /* Regression test for SF bug #1515266: missing check of stopped
1538 parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)1539 START_TEST(test_suspend_parser_between_char_data_calls) {
1540 /* The sample data must be big enough that there are two calls to
1541 the character data handler from within the inner "for" loop of
1542 the XML_TOK_DATA_CHARS case in doContent(), and the character
1543 handler must stop the parser and clear the character data
1544 handler.
1545 */
1546 const char *text = long_character_data_text;
1547
1548 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1549 g_resumable = XML_TRUE;
1550 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1551 // we won't know exactly how much input we actually managed to give Expat.
1552 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
1553 != XML_STATUS_SUSPENDED)
1554 xml_failure(g_parser);
1555 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1556 xml_failure(g_parser);
1557 /* Try parsing directly */
1558 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1559 != XML_STATUS_ERROR)
1560 fail("Attempt to continue parse while suspended not faulted");
1561 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1562 fail("Suspended parse not faulted with correct error");
1563 }
1564 END_TEST
1565
1566 /* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)1567 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1568 const char *text = long_character_data_text;
1569
1570 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1571 g_resumable = XML_FALSE;
1572 g_abortable = XML_FALSE;
1573 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1574 != XML_STATUS_ERROR)
1575 fail("Failed to double-stop parser");
1576
1577 XML_ParserReset(g_parser, NULL);
1578 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1579 g_resumable = XML_TRUE;
1580 g_abortable = XML_FALSE;
1581 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1582 // we won't know exactly how much input we actually managed to give Expat.
1583 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
1584 != XML_STATUS_SUSPENDED)
1585 fail("Failed to double-suspend parser");
1586
1587 XML_ParserReset(g_parser, NULL);
1588 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1589 g_resumable = XML_TRUE;
1590 g_abortable = XML_TRUE;
1591 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1592 != XML_STATUS_ERROR)
1593 fail("Failed to suspend-abort parser");
1594 }
1595 END_TEST
1596
START_TEST(test_good_cdata_ascii)1597 START_TEST(test_good_cdata_ascii) {
1598 const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1599 const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1600
1601 CharData storage;
1602 CharData_Init(&storage);
1603 XML_SetUserData(g_parser, &storage);
1604 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1605 /* Add start and end handlers for coverage */
1606 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1607 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1608
1609 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1610 == XML_STATUS_ERROR)
1611 xml_failure(g_parser);
1612 CharData_CheckXMLChars(&storage, expected);
1613
1614 /* Try again, this time with a default handler */
1615 XML_ParserReset(g_parser, NULL);
1616 CharData_Init(&storage);
1617 XML_SetUserData(g_parser, &storage);
1618 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1619 XML_SetDefaultHandler(g_parser, dummy_default_handler);
1620
1621 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1622 == XML_STATUS_ERROR)
1623 xml_failure(g_parser);
1624 CharData_CheckXMLChars(&storage, expected);
1625 }
1626 END_TEST
1627
START_TEST(test_good_cdata_utf16)1628 START_TEST(test_good_cdata_utf16) {
1629 /* Test data is:
1630 * <?xml version='1.0' encoding='utf-16'?>
1631 * <a><![CDATA[hello]]></a>
1632 */
1633 const char text[]
1634 = "\0<\0?\0x\0m\0l\0"
1635 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1636 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1637 "1\0"
1638 "6\0'"
1639 "\0?\0>\0\n"
1640 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1641 const XML_Char *expected = XCS("hello");
1642
1643 CharData storage;
1644 CharData_Init(&storage);
1645 XML_SetUserData(g_parser, &storage);
1646 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1647
1648 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1649 == XML_STATUS_ERROR)
1650 xml_failure(g_parser);
1651 CharData_CheckXMLChars(&storage, expected);
1652 }
1653 END_TEST
1654
START_TEST(test_good_cdata_utf16_le)1655 START_TEST(test_good_cdata_utf16_le) {
1656 /* Test data is:
1657 * <?xml version='1.0' encoding='utf-16'?>
1658 * <a><![CDATA[hello]]></a>
1659 */
1660 const char text[]
1661 = "<\0?\0x\0m\0l\0"
1662 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1663 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1664 "1\0"
1665 "6\0'"
1666 "\0?\0>\0\n"
1667 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1668 const XML_Char *expected = XCS("hello");
1669
1670 CharData storage;
1671 CharData_Init(&storage);
1672 XML_SetUserData(g_parser, &storage);
1673 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1674
1675 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1676 == XML_STATUS_ERROR)
1677 xml_failure(g_parser);
1678 CharData_CheckXMLChars(&storage, expected);
1679 }
1680 END_TEST
1681
1682 /* Test UTF16 conversion of a long cdata string */
1683
1684 /* 16 characters: handy macro to reduce visual clutter */
1685 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1686
START_TEST(test_long_cdata_utf16)1687 START_TEST(test_long_cdata_utf16) {
1688 /* Test data is:
1689 * <?xlm version='1.0' encoding='utf-16'?>
1690 * <a><![CDATA[
1691 * ABCDEFGHIJKLMNOP
1692 * ]]></a>
1693 */
1694 const char text[]
1695 = "\0<\0?\0x\0m\0l\0 "
1696 "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1697 "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1698 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1699 /* 64 characters per line */
1700 /* clang-format off */
1701 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1702 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1703 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1704 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1705 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1706 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1707 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1708 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1709 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1710 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1711 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1712 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1713 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1714 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1715 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1716 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1717 A_TO_P_IN_UTF16
1718 /* clang-format on */
1719 "\0]\0]\0>\0<\0/\0a\0>";
1720 const XML_Char *expected =
1721 /* clang-format off */
1722 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1723 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1724 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1725 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1726 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1727 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1728 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1729 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1730 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1731 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1732 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1733 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1734 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1735 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1736 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1737 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1738 XCS("ABCDEFGHIJKLMNOP");
1739 /* clang-format on */
1740 CharData storage;
1741 void *buffer;
1742
1743 CharData_Init(&storage);
1744 XML_SetUserData(g_parser, &storage);
1745 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1746 buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1747 if (buffer == NULL)
1748 fail("Could not allocate parse buffer");
1749 assert(buffer != NULL);
1750 memcpy(buffer, text, sizeof(text) - 1);
1751 if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1752 xml_failure(g_parser);
1753 CharData_CheckXMLChars(&storage, expected);
1754 }
1755 END_TEST
1756
1757 /* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)1758 START_TEST(test_multichar_cdata_utf16) {
1759 /* Test data is:
1760 * <?xml version='1.0' encoding='utf-16'?>
1761 * <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1762 *
1763 * where {MINIM} is U+1d15e (a minim or half-note)
1764 * UTF-16: 0xd834 0xdd5e
1765 * UTF-8: 0xf0 0x9d 0x85 0x9e
1766 * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1767 * UTF-16: 0xd834 0xdd5f
1768 * UTF-8: 0xf0 0x9d 0x85 0x9f
1769 */
1770 const char text[] = "\0<\0?\0x\0m\0l\0"
1771 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1772 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1773 "1\0"
1774 "6\0'"
1775 "\0?\0>\0\n"
1776 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1777 "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1778 "\0]\0]\0>\0<\0/\0a\0>";
1779 #ifdef XML_UNICODE
1780 const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1781 #else
1782 const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1783 #endif
1784 CharData storage;
1785
1786 CharData_Init(&storage);
1787 XML_SetUserData(g_parser, &storage);
1788 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1789
1790 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1791 == XML_STATUS_ERROR)
1792 xml_failure(g_parser);
1793 CharData_CheckXMLChars(&storage, expected);
1794 }
1795 END_TEST
1796
1797 /* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)1798 START_TEST(test_utf16_bad_surrogate_pair) {
1799 /* Test data is:
1800 * <?xml version='1.0' encoding='utf-16'?>
1801 * <a><![CDATA[{BADLINB}]]></a>
1802 *
1803 * where {BADLINB} is U+10000 (the first Linear B character)
1804 * with the UTF-16 surrogate pair in the wrong order, i.e.
1805 * 0xdc00 0xd800
1806 */
1807 const char text[] = "\0<\0?\0x\0m\0l\0"
1808 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1809 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1810 "1\0"
1811 "6\0'"
1812 "\0?\0>\0\n"
1813 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1814 "\xdc\x00\xd8\x00"
1815 "\0]\0]\0>\0<\0/\0a\0>";
1816
1817 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1818 != XML_STATUS_ERROR)
1819 fail("Reversed UTF-16 surrogate pair not faulted");
1820 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1821 xml_failure(g_parser);
1822 }
1823 END_TEST
1824
START_TEST(test_bad_cdata)1825 START_TEST(test_bad_cdata) {
1826 struct CaseData {
1827 const char *text;
1828 enum XML_Error expectedError;
1829 };
1830
1831 struct CaseData cases[]
1832 = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1833 {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1834 {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1835 {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1836 {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1837 {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1838 {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1839 {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1840
1841 {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1842 {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1843 {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1844
1845 {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1846 {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1847 {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1848 {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1849 {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1850 {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1851 {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1852
1853 {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1854 {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1855 {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1856
1857 size_t i = 0;
1858 for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1859 set_subtest("%s", cases[i].text);
1860 const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1861 g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1862 const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1863
1864 assert(actualStatus == XML_STATUS_ERROR);
1865
1866 if (actualError != cases[i].expectedError) {
1867 char message[100];
1868 snprintf(message, sizeof(message),
1869 "Expected error %d but got error %d for case %u: \"%s\"\n",
1870 cases[i].expectedError, actualError, (unsigned int)i + 1,
1871 cases[i].text);
1872 fail(message);
1873 }
1874
1875 XML_ParserReset(g_parser, NULL);
1876 }
1877 }
1878 END_TEST
1879
1880 /* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)1881 START_TEST(test_bad_cdata_utf16) {
1882 struct CaseData {
1883 size_t text_bytes;
1884 const char *text;
1885 enum XML_Error expected_error;
1886 };
1887
1888 const char prolog[] = "\0<\0?\0x\0m\0l\0"
1889 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1890 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1891 "1\0"
1892 "6\0'"
1893 "\0?\0>\0\n"
1894 "\0<\0a\0>";
1895 struct CaseData cases[] = {
1896 {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1897 {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1898 {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1899 {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1900 {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1901 {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1902 {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1903 {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1904 {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1905 {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1906 {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1907 {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1908 {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1909 {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1910 {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1911 {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1912 {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1913 {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1914 {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1915 {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1916 /* Now add a four-byte UTF-16 character */
1917 {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1918 XML_ERROR_UNCLOSED_CDATA_SECTION},
1919 {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1920 {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1921 XML_ERROR_PARTIAL_CHAR},
1922 {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1923 XML_ERROR_UNCLOSED_CDATA_SECTION}};
1924 size_t i;
1925
1926 for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1927 set_subtest("case %lu", (long unsigned)(i + 1));
1928 enum XML_Status actual_status;
1929 enum XML_Error actual_error;
1930
1931 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1932 XML_FALSE)
1933 == XML_STATUS_ERROR)
1934 xml_failure(g_parser);
1935 actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1936 (int)cases[i].text_bytes, XML_TRUE);
1937 assert(actual_status == XML_STATUS_ERROR);
1938 actual_error = XML_GetErrorCode(g_parser);
1939 if (actual_error != cases[i].expected_error) {
1940 char message[1024];
1941
1942 snprintf(message, sizeof(message),
1943 "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1944 ") for case %lu\n",
1945 cases[i].expected_error,
1946 XML_ErrorString(cases[i].expected_error), actual_error,
1947 XML_ErrorString(actual_error), (long unsigned)(i + 1));
1948 fail(message);
1949 }
1950 XML_ParserReset(g_parser, NULL);
1951 }
1952 }
1953 END_TEST
1954
1955 /* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)1956 START_TEST(test_stop_parser_between_cdata_calls) {
1957 const char *text = long_cdata_text;
1958
1959 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1960 g_resumable = XML_FALSE;
1961 expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1962 }
1963 END_TEST
1964
1965 /* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)1966 START_TEST(test_suspend_parser_between_cdata_calls) {
1967 if (g_chunkSize != 0) {
1968 // this test does not use SINGLE_BYTES, because of suspension
1969 return;
1970 }
1971
1972 const char *text = long_cdata_text;
1973 enum XML_Status result;
1974
1975 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1976 g_resumable = XML_TRUE;
1977 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1978 // we won't know exactly how much input we actually managed to give Expat.
1979 result = XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE);
1980 if (result != XML_STATUS_SUSPENDED) {
1981 if (result == XML_STATUS_ERROR)
1982 xml_failure(g_parser);
1983 fail("Parse not suspended in CDATA handler");
1984 }
1985 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1986 xml_failure(g_parser);
1987 }
1988 END_TEST
1989
1990 /* Test memory allocation functions */
START_TEST(test_memory_allocation)1991 START_TEST(test_memory_allocation) {
1992 char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1993 char *p;
1994
1995 if (buffer == NULL) {
1996 fail("Allocation failed");
1997 } else {
1998 /* Try writing to memory; some OSes try to cheat! */
1999 buffer[0] = 'T';
2000 buffer[1] = 'E';
2001 buffer[2] = 'S';
2002 buffer[3] = 'T';
2003 buffer[4] = '\0';
2004 if (strcmp(buffer, "TEST") != 0) {
2005 fail("Memory not writable");
2006 } else {
2007 p = (char *)XML_MemRealloc(g_parser, buffer, 512);
2008 if (p == NULL) {
2009 fail("Reallocation failed");
2010 } else {
2011 /* Write again, just to be sure */
2012 buffer = p;
2013 buffer[0] = 'V';
2014 if (strcmp(buffer, "VEST") != 0) {
2015 fail("Reallocated memory not writable");
2016 }
2017 }
2018 }
2019 XML_MemFree(g_parser, buffer);
2020 }
2021 }
2022 END_TEST
2023
2024 /* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)2025 START_TEST(test_default_current) {
2026 const char *text = "<doc>hell]</doc>";
2027 const char *entity_text = "<!DOCTYPE doc [\n"
2028 "<!ENTITY entity '%'>\n"
2029 "]>\n"
2030 "<doc>&entity;</doc>";
2031
2032 set_subtest("with defaulting");
2033 {
2034 struct handler_record_list storage;
2035 storage.count = 0;
2036 XML_SetDefaultHandler(g_parser, record_default_handler);
2037 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2038 XML_SetUserData(g_parser, &storage);
2039 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2040 == XML_STATUS_ERROR)
2041 xml_failure(g_parser);
2042 int i = 0;
2043 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
2044 // we should have gotten one or more cdata callbacks, totaling 5 chars
2045 int cdata_len_remaining = 5;
2046 while (cdata_len_remaining > 0) {
2047 const struct handler_record_entry *c_entry
2048 = handler_record_get(&storage, i++);
2049 assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
2050 assert_true(c_entry->arg > 0);
2051 assert_true(c_entry->arg <= cdata_len_remaining);
2052 cdata_len_remaining -= c_entry->arg;
2053 // default handler must follow, with the exact same len argument.
2054 assert_record_handler_called(&storage, i++, "record_default_handler",
2055 c_entry->arg);
2056 }
2057 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
2058 assert_true(storage.count == i);
2059 }
2060
2061 /* Again, without the defaulting */
2062 set_subtest("no defaulting");
2063 {
2064 struct handler_record_list storage;
2065 storage.count = 0;
2066 XML_ParserReset(g_parser, NULL);
2067 XML_SetDefaultHandler(g_parser, record_default_handler);
2068 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2069 XML_SetUserData(g_parser, &storage);
2070 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2071 == XML_STATUS_ERROR)
2072 xml_failure(g_parser);
2073 int i = 0;
2074 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
2075 // we should have gotten one or more cdata callbacks, totaling 5 chars
2076 int cdata_len_remaining = 5;
2077 while (cdata_len_remaining > 0) {
2078 const struct handler_record_entry *c_entry
2079 = handler_record_get(&storage, i++);
2080 assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
2081 assert_true(c_entry->arg > 0);
2082 assert_true(c_entry->arg <= cdata_len_remaining);
2083 cdata_len_remaining -= c_entry->arg;
2084 }
2085 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
2086 assert_true(storage.count == i);
2087 }
2088
2089 /* Now with an internal entity to complicate matters */
2090 set_subtest("with internal entity");
2091 {
2092 struct handler_record_list storage;
2093 storage.count = 0;
2094 XML_ParserReset(g_parser, NULL);
2095 XML_SetDefaultHandler(g_parser, record_default_handler);
2096 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2097 XML_SetUserData(g_parser, &storage);
2098 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2099 XML_TRUE)
2100 == XML_STATUS_ERROR)
2101 xml_failure(g_parser);
2102 /* The default handler suppresses the entity */
2103 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2104 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2105 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2106 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2107 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2108 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2109 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2110 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2111 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2112 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2113 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2114 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2115 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2116 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2117 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2118 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2119 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2120 assert_record_handler_called(&storage, 17, "record_default_handler", 8);
2121 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2122 assert_true(storage.count == 19);
2123 }
2124
2125 /* Again, with a skip handler */
2126 set_subtest("with skip handler");
2127 {
2128 struct handler_record_list storage;
2129 storage.count = 0;
2130 XML_ParserReset(g_parser, NULL);
2131 XML_SetDefaultHandler(g_parser, record_default_handler);
2132 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2133 XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
2134 XML_SetUserData(g_parser, &storage);
2135 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2136 XML_TRUE)
2137 == XML_STATUS_ERROR)
2138 xml_failure(g_parser);
2139 /* The default handler suppresses the entity */
2140 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2141 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2142 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2143 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2144 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2145 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2146 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2147 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2148 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2149 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2150 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2151 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2152 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2153 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2154 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2155 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2156 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2157 assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2158 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2159 assert_true(storage.count == 19);
2160 }
2161
2162 /* This time, allow the entity through */
2163 set_subtest("allow entity");
2164 {
2165 struct handler_record_list storage;
2166 storage.count = 0;
2167 XML_ParserReset(g_parser, NULL);
2168 XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2169 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2170 XML_SetUserData(g_parser, &storage);
2171 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2172 XML_TRUE)
2173 == XML_STATUS_ERROR)
2174 xml_failure(g_parser);
2175 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2176 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2177 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2178 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2179 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2180 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2181 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2182 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2183 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2184 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2185 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2186 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2187 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2188 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2189 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2190 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2191 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2192 assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2193 assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2194 assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2195 assert_true(storage.count == 20);
2196 }
2197
2198 /* Finally, without passing the cdata to the default handler */
2199 set_subtest("not passing cdata");
2200 {
2201 struct handler_record_list storage;
2202 storage.count = 0;
2203 XML_ParserReset(g_parser, NULL);
2204 XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2205 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2206 XML_SetUserData(g_parser, &storage);
2207 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2208 XML_TRUE)
2209 == XML_STATUS_ERROR)
2210 xml_failure(g_parser);
2211 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2212 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2213 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2214 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2215 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2216 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2217 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2218 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2219 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2220 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2221 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2222 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2223 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2224 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2225 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2226 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2227 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2228 assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2229 1);
2230 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2231 assert_true(storage.count == 19);
2232 }
2233 }
2234 END_TEST
2235
2236 /* Test DTD element parsing code paths */
START_TEST(test_dtd_elements)2237 START_TEST(test_dtd_elements) {
2238 const char *text = "<!DOCTYPE doc [\n"
2239 "<!ELEMENT doc (chapter)>\n"
2240 "<!ELEMENT chapter (#PCDATA)>\n"
2241 "]>\n"
2242 "<doc><chapter>Wombats are go</chapter></doc>";
2243
2244 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2245 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2246 == XML_STATUS_ERROR)
2247 xml_failure(g_parser);
2248 }
2249 END_TEST
2250
2251 static void XMLCALL
element_decl_check_model(void * userData,const XML_Char * name,XML_Content * model)2252 element_decl_check_model(void *userData, const XML_Char *name,
2253 XML_Content *model) {
2254 UNUSED_P(userData);
2255 uint32_t errorFlags = 0;
2256
2257 /* Expected model array structure is this:
2258 * [0] (type 6, quant 0)
2259 * [1] (type 5, quant 0)
2260 * [3] (type 4, quant 0, name "bar")
2261 * [4] (type 4, quant 0, name "foo")
2262 * [5] (type 4, quant 3, name "xyz")
2263 * [2] (type 4, quant 2, name "zebra")
2264 */
2265 errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2266 errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2267
2268 if (model != NULL) {
2269 errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2270 errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2271 errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2272 errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2273 errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2274
2275 errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2276 errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2277 errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2278 errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2279 errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2280
2281 errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2282 errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2283 errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2284 errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2285 errorFlags
2286 |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2287
2288 errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2289 errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2290 errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2291 errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2292 errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2293
2294 errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2295 errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2296 errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2297 errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2298 errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2299
2300 errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2301 errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2302 errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2303 errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2304 errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2305 }
2306
2307 XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2308 XML_FreeContentModel(g_parser, model);
2309 }
2310
START_TEST(test_dtd_elements_nesting)2311 START_TEST(test_dtd_elements_nesting) {
2312 // Payload inspired by a test in Perl's XML::Parser
2313 const char *text = "<!DOCTYPE foo [\n"
2314 "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2315 "]>\n"
2316 "<foo/>";
2317
2318 XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2319
2320 XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2321 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2322 == XML_STATUS_ERROR)
2323 xml_failure(g_parser);
2324
2325 if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2326 fail("Element declaration model regression detected");
2327 }
2328 END_TEST
2329
2330 /* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd)2331 START_TEST(test_set_foreign_dtd) {
2332 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2333 const char *text2 = "<doc>&entity;</doc>";
2334 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2335
2336 /* Check hash salt is passed through too */
2337 XML_SetHashSalt(g_parser, 0x12345678);
2338 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2339 XML_SetUserData(g_parser, &test_data);
2340 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2341 /* Add a default handler to exercise more code paths */
2342 XML_SetDefaultHandler(g_parser, dummy_default_handler);
2343 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2344 fail("Could not set foreign DTD");
2345 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2346 == XML_STATUS_ERROR)
2347 xml_failure(g_parser);
2348
2349 /* Ensure that trying to set the DTD after parsing has started
2350 * is faulted, even if it's the same setting.
2351 */
2352 if (XML_UseForeignDTD(g_parser, XML_TRUE)
2353 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2354 fail("Failed to reject late foreign DTD setting");
2355 /* Ditto for the hash salt */
2356 if (XML_SetHashSalt(g_parser, 0x23456789))
2357 fail("Failed to reject late hash salt change");
2358
2359 /* Now finish the parse */
2360 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2361 == XML_STATUS_ERROR)
2362 xml_failure(g_parser);
2363 }
2364 END_TEST
2365
2366 /* Test foreign DTD handling with a failing NotStandalone handler */
START_TEST(test_foreign_dtd_not_standalone)2367 START_TEST(test_foreign_dtd_not_standalone) {
2368 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2369 "<doc>&entity;</doc>";
2370 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2371
2372 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2373 XML_SetUserData(g_parser, &test_data);
2374 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2375 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2376 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2377 fail("Could not set foreign DTD");
2378 expect_failure(text, XML_ERROR_NOT_STANDALONE,
2379 "NotStandalonehandler failed to reject");
2380 }
2381 END_TEST
2382
2383 /* Test invalid character in a foreign DTD is faulted */
START_TEST(test_invalid_foreign_dtd)2384 START_TEST(test_invalid_foreign_dtd) {
2385 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2386 "<doc>&entity;</doc>";
2387 ExtFaults test_data
2388 = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2389
2390 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2391 XML_SetUserData(g_parser, &test_data);
2392 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2393 XML_UseForeignDTD(g_parser, XML_TRUE);
2394 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2395 "Bad DTD should not have been accepted");
2396 }
2397 END_TEST
2398
2399 /* Test foreign DTD use with a doctype */
START_TEST(test_foreign_dtd_with_doctype)2400 START_TEST(test_foreign_dtd_with_doctype) {
2401 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2402 "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2403 const char *text2 = "<doc>&entity;</doc>";
2404 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2405
2406 /* Check hash salt is passed through too */
2407 XML_SetHashSalt(g_parser, 0x12345678);
2408 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2409 XML_SetUserData(g_parser, &test_data);
2410 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2411 /* Add a default handler to exercise more code paths */
2412 XML_SetDefaultHandler(g_parser, dummy_default_handler);
2413 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2414 fail("Could not set foreign DTD");
2415 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2416 == XML_STATUS_ERROR)
2417 xml_failure(g_parser);
2418
2419 /* Ensure that trying to set the DTD after parsing has started
2420 * is faulted, even if it's the same setting.
2421 */
2422 if (XML_UseForeignDTD(g_parser, XML_TRUE)
2423 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2424 fail("Failed to reject late foreign DTD setting");
2425 /* Ditto for the hash salt */
2426 if (XML_SetHashSalt(g_parser, 0x23456789))
2427 fail("Failed to reject late hash salt change");
2428
2429 /* Now finish the parse */
2430 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2431 == XML_STATUS_ERROR)
2432 xml_failure(g_parser);
2433 }
2434 END_TEST
2435
2436 /* Test XML_UseForeignDTD with no external subset present */
START_TEST(test_foreign_dtd_without_external_subset)2437 START_TEST(test_foreign_dtd_without_external_subset) {
2438 const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2439 "<doc>&foo;</doc>";
2440
2441 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2442 XML_SetUserData(g_parser, NULL);
2443 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2444 XML_UseForeignDTD(g_parser, XML_TRUE);
2445 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2446 == XML_STATUS_ERROR)
2447 xml_failure(g_parser);
2448 }
2449 END_TEST
2450
START_TEST(test_empty_foreign_dtd)2451 START_TEST(test_empty_foreign_dtd) {
2452 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2453 "<doc>&entity;</doc>";
2454
2455 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2456 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2457 XML_UseForeignDTD(g_parser, XML_TRUE);
2458 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2459 "Undefined entity not faulted");
2460 }
2461 END_TEST
2462
2463 /* Test XML Base is set and unset appropriately */
START_TEST(test_set_base)2464 START_TEST(test_set_base) {
2465 const XML_Char *old_base;
2466 const XML_Char *new_base = XCS("/local/file/name.xml");
2467
2468 old_base = XML_GetBase(g_parser);
2469 if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2470 fail("Unable to set base");
2471 if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2472 fail("Base setting not correct");
2473 if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2474 fail("Unable to NULL base");
2475 if (XML_GetBase(g_parser) != NULL)
2476 fail("Base setting not nulled");
2477 XML_SetBase(g_parser, old_base);
2478 }
2479 END_TEST
2480
2481 /* Test attribute counts, indexing, etc */
START_TEST(test_attributes)2482 START_TEST(test_attributes) {
2483 const char *text = "<!DOCTYPE doc [\n"
2484 "<!ELEMENT doc (tag)>\n"
2485 "<!ATTLIST doc id ID #REQUIRED>\n"
2486 "]>"
2487 "<doc a='1' id='one' b='2'>"
2488 "<tag c='3'/>"
2489 "</doc>";
2490 AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2491 {XCS("b"), XCS("2")},
2492 {XCS("id"), XCS("one")},
2493 {NULL, NULL}};
2494 AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2495 ElementInfo info[] = {{XCS("doc"), 3, 0, XCS("id"), doc_info},
2496 {XCS("tag"), 1, 0, NULL, tag_info},
2497 {NULL, 0, 0, NULL, NULL}};
2498
2499 XML_Parser parser = XML_ParserCreate(NULL);
2500 assert_true(parser != NULL);
2501 ParserAndElementInfo parserAndElementInfos = {
2502 parser,
2503 info,
2504 };
2505
2506 XML_SetStartElementHandler(parser, counting_start_element_handler);
2507 XML_SetUserData(parser, &parserAndElementInfos);
2508 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2509 == XML_STATUS_ERROR)
2510 xml_failure(parser);
2511
2512 XML_ParserFree(parser);
2513 }
2514 END_TEST
2515
START_TEST(test_duplicate_cdata_attribute)2516 START_TEST(test_duplicate_cdata_attribute) {
2517 /*
2518 https://www.w3.org/TR/xml/#attdecls
2519
2520 Test the following statement from the linked specification:
2521 When more than one definition is provided for the same attribute of a given
2522 element type, the first declaration is binding and later declarations are
2523 ignored.
2524 */
2525
2526 const char *text
2527 = "<!DOCTYPE doc [\n"
2528 " <!ATTLIST doc attribute CDATA 'expected' attribute CDATA 'ignored'>\n"
2529 "]>\n"
2530 "<doc/>\n";
2531 AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected")}, {NULL, NULL}};
2532 ElementInfo info[]
2533 = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
2534
2535 XML_Parser parser = XML_ParserCreate(NULL);
2536 assert_true(parser != NULL);
2537
2538 ParserAndElementInfo parserAndElementInfos = {
2539 parser,
2540 info,
2541 };
2542
2543 XML_SetStartElementHandler(parser, counting_start_element_handler);
2544 XML_SetUserData(parser, &parserAndElementInfos);
2545
2546 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2547 != XML_STATUS_OK)
2548 xml_failure(parser);
2549
2550 XML_ParserFree(parser);
2551 }
2552 END_TEST
2553
START_TEST(test_duplicate_id_attribute_1)2554 START_TEST(test_duplicate_id_attribute_1) {
2555 /*
2556 https://www.w3.org/TR/xml/#attdecls
2557
2558 Test the following statement from the linked specification:
2559 When more than one definition is provided for the same attribute of a given
2560 element type, the first declaration is binding and later declarations are
2561 ignored.
2562 */
2563
2564 const char *text
2565 = "<!DOCTYPE doc [\n"
2566 " <!ATTLIST doc identifier CDATA 'expected' identifier ID #REQUIRED>\n"
2567 "]>\n"
2568 "<doc/>\n";
2569 AttrInfo doc_info[] = {{XCS("identifier"), XCS("expected")}, {NULL, NULL}};
2570 ElementInfo info[]
2571 = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
2572
2573 XML_Parser parser = XML_ParserCreate(NULL);
2574 assert_true(parser != NULL);
2575
2576 ParserAndElementInfo parserAndElementInfos = {
2577 parser,
2578 info,
2579 };
2580
2581 XML_SetStartElementHandler(parser, counting_start_element_handler);
2582 XML_SetUserData(parser, &parserAndElementInfos);
2583
2584 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2585 != XML_STATUS_OK)
2586 xml_failure(parser);
2587
2588 XML_ParserFree(parser);
2589 }
2590 END_TEST
2591
START_TEST(test_duplicate_id_attribute_2)2592 START_TEST(test_duplicate_id_attribute_2) {
2593 /*
2594 https://www.w3.org/TR/xml/#attdecls
2595
2596 Test the following statement from the linked specification:
2597 When more than one definition is provided for the same attribute of a given
2598 element type, the first declaration is binding and later declarations are
2599 ignored.
2600 */
2601
2602 const char *text
2603 = "<!DOCTYPE doc [\n"
2604 " <!ATTLIST doc identifier ID #REQUIRED identifier CDATA 'unexpected'>\n"
2605 "]>\n"
2606 "<doc/>\n";
2607 AttrInfo doc_info[] = {{NULL, NULL}};
2608
2609 ElementInfo info[]
2610 = {{XCS("doc"), 0, 0, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
2611
2612 XML_Parser parser = XML_ParserCreate(NULL);
2613 assert_true(parser != NULL);
2614
2615 ParserAndElementInfo parserAndElementInfos = {
2616 parser,
2617 info,
2618 };
2619
2620 XML_SetStartElementHandler(parser, counting_start_element_handler);
2621 XML_SetUserData(parser, &parserAndElementInfos);
2622
2623 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2624 != XML_STATUS_OK)
2625 xml_failure(parser);
2626
2627 XML_ParserFree(parser);
2628 }
2629 END_TEST
2630
START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl)2631 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl) {
2632 /*
2633 https://www.w3.org/TR/xml/#attdecls
2634
2635 Test the following statement from the linked specification:
2636 When more than one AttlistDecl is provided for a given element type,
2637 the contents of all those provided are merged.
2638 */
2639 const char *text = "<!DOCTYPE doc [\n"
2640 " <!ATTLIST doc attribute CDATA 'expected'>\n"
2641 " <!ATTLIST doc attribute CDATA 'ignored'>\n"
2642 "]>\n"
2643 "<doc/>\n";
2644 AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected")}, {NULL, NULL}};
2645 ElementInfo info[]
2646 = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
2647
2648 XML_Parser parser = XML_ParserCreate(NULL);
2649 assert_true(parser != NULL);
2650
2651 ParserAndElementInfo parserAndElementInfos = {
2652 parser,
2653 info,
2654 };
2655
2656 XML_SetStartElementHandler(parser, counting_start_element_handler);
2657 XML_SetUserData(parser, &parserAndElementInfos);
2658
2659 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2660 != XML_STATUS_OK)
2661 xml_failure(parser);
2662
2663 XML_ParserFree(parser);
2664 }
2665 END_TEST
2666
START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_2)2667 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_2) {
2668 /*
2669 https://www.w3.org/TR/xml/#attdecls
2670
2671 Test the following statement from the linked specification:
2672 When more than one AttlistDecl is provided for a given element type,
2673 the contents of all those provided are merged.
2674 */
2675 const char *text = "<!DOCTYPE doc [\n"
2676 " <!ATTLIST doc attribute CDATA 'expected_doc'>\n"
2677 " <!ATTLIST tag attribute CDATA 'expected_tag'>\n"
2678 " <!ATTLIST doc attribute CDATA 'ignored_doc'>\n"
2679 "]>\n"
2680 "<doc><tag></tag></doc>\n";
2681 AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected_doc")}, {NULL, NULL}};
2682 AttrInfo tag_info[] = {{XCS("attribute"), XCS("expected_tag")}, {NULL, NULL}};
2683 ElementInfo info[] = {{XCS("doc"), 0, 1, NULL, doc_info},
2684 {XCS("tag"), 0, 1, NULL, tag_info},
2685 {NULL, 0, 0, NULL, NULL}};
2686
2687 XML_Parser parser = XML_ParserCreate(NULL);
2688 assert_true(parser != NULL);
2689
2690 ParserAndElementInfo parserAndElementInfos = {
2691 parser,
2692 info,
2693 };
2694
2695 XML_SetStartElementHandler(parser, counting_start_element_handler);
2696 XML_SetUserData(parser, &parserAndElementInfos);
2697
2698 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2699 != XML_STATUS_OK)
2700 xml_failure(parser);
2701
2702 XML_ParserFree(parser);
2703 }
2704 END_TEST
2705
START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_3)2706 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_3) {
2707 /*
2708 https://www.w3.org/TR/xml/#attdecls
2709
2710 Test the following statement from the linked specification:
2711 When more than one AttlistDecl is provided for a given element type,
2712 the contents of all those provided are merged.
2713 */
2714 const char *text
2715 = "<!DOCTYPE doc [\n"
2716 " <!ATTLIST doc attribute CDATA 'expected_doc'>\n"
2717 " <!ATTLIST tag attribute CDATA 'expected_tag'>\n"
2718 " <!ATTLIST doc second_attribute CDATA 'second_expected_doc' attribute CDATA 'ignored_doc'>\n"
2719 "]>\n"
2720 "<doc><tag></tag></doc>\n";
2721 AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected_doc")},
2722 {XCS("second_attribute"), XCS("second_expected_doc")},
2723 {NULL, NULL}};
2724 AttrInfo tag_info[] = {{XCS("attribute"), XCS("expected_tag")}, {NULL, NULL}};
2725 ElementInfo info[] = {{XCS("doc"), 0, 2, NULL, doc_info},
2726 {XCS("tag"), 0, 1, NULL, tag_info},
2727 {NULL, 0, 0, NULL, NULL}};
2728
2729 XML_Parser parser = XML_ParserCreate(NULL);
2730 assert_true(parser != NULL);
2731
2732 ParserAndElementInfo parserAndElementInfos = {
2733 parser,
2734 info,
2735 };
2736
2737 XML_SetStartElementHandler(parser, counting_start_element_handler);
2738 XML_SetUserData(parser, &parserAndElementInfos);
2739
2740 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2741 != XML_STATUS_OK)
2742 xml_failure(parser);
2743
2744 XML_ParserFree(parser);
2745 }
2746 END_TEST
2747
START_TEST(test_duplicate_id_attribute_multiple_attlistdecl)2748 START_TEST(test_duplicate_id_attribute_multiple_attlistdecl) {
2749 /*
2750 https://www.w3.org/TR/xml/#attdecls
2751
2752 Test the following statement from the linked specification:
2753 When more than one AttlistDecl is provided for a given element type,
2754 the contents of all those provided are merged.
2755 */
2756 const char *text = "<!DOCTYPE doc [\n"
2757 " <!ATTLIST doc identifier ID #REQUIRED>\n"
2758 " <!ATTLIST tag identifier CDATA 'identifier_tag'>\n"
2759 " <!ATTLIST doc identifier CDATA 'ignored'>\n"
2760 "]>\n"
2761 "<doc identifier='doc_identity'><tag></tag></doc>\n";
2762 AttrInfo doc_info[]
2763 = {{XCS("identifier"), XCS("doc_identity")}, {NULL, NULL}};
2764 AttrInfo tag_info[]
2765 = {{XCS("identifier"), XCS("identifier_tag")}, {NULL, NULL}};
2766 ElementInfo info[] = {{XCS("doc"), 1, 0, XCS("identifier"), doc_info},
2767 {XCS("tag"), 0, 1, NULL, tag_info},
2768 {NULL, 0, 0, NULL, NULL}};
2769
2770 XML_Parser parser = XML_ParserCreate(NULL);
2771 assert_true(parser != NULL);
2772
2773 ParserAndElementInfo parserAndElementInfos = {
2774 parser,
2775 info,
2776 };
2777
2778 XML_SetStartElementHandler(parser, counting_start_element_handler);
2779 XML_SetUserData(parser, &parserAndElementInfos);
2780
2781 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2782 != XML_STATUS_OK)
2783 xml_failure(parser);
2784
2785 XML_ParserFree(parser);
2786 }
2787 END_TEST
2788
2789 /* Test reset works correctly in the middle of processing an internal
2790 * entity. Exercises some obscure code in XML_ParserReset().
2791 */
START_TEST(test_reset_in_entity)2792 START_TEST(test_reset_in_entity) {
2793 if (g_chunkSize != 0) {
2794 // this test does not use SINGLE_BYTES, because of suspension
2795 return;
2796 }
2797
2798 const char *text = "<!DOCTYPE doc [\n"
2799 "<!ENTITY wombat 'wom'>\n"
2800 "<!ENTITY entity 'hi &wom; there'>\n"
2801 "]>\n"
2802 "<doc>&entity;</doc>";
2803 XML_ParsingStatus status;
2804
2805 g_resumable = XML_TRUE;
2806 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2807 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
2808 // we won't know exactly how much input we actually managed to give Expat.
2809 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2810 == XML_STATUS_ERROR)
2811 xml_failure(g_parser);
2812 XML_GetParsingStatus(g_parser, &status);
2813 if (status.parsing != XML_SUSPENDED)
2814 fail("Parsing status not SUSPENDED");
2815 XML_ParserReset(g_parser, NULL);
2816 XML_GetParsingStatus(g_parser, &status);
2817 if (status.parsing != XML_INITIALIZED)
2818 fail("Parsing status doesn't reset to INITIALIZED");
2819 }
2820 END_TEST
2821
2822 /* Test that resume correctly passes through parse errors */
START_TEST(test_resume_invalid_parse)2823 START_TEST(test_resume_invalid_parse) {
2824 const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2825
2826 g_resumable = XML_TRUE;
2827 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2828 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2829 == XML_STATUS_ERROR)
2830 xml_failure(g_parser);
2831 if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2832 fail("Resumed invalid parse not faulted");
2833 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2834 fail("Invalid parse not correctly faulted");
2835 }
2836 END_TEST
2837
2838 /* Test that re-suspended parses are correctly passed through */
START_TEST(test_resume_resuspended)2839 START_TEST(test_resume_resuspended) {
2840 const char *text = "<doc>Hello<meep/>world</doc>";
2841
2842 g_resumable = XML_TRUE;
2843 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2844 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2845 == XML_STATUS_ERROR)
2846 xml_failure(g_parser);
2847 g_resumable = XML_TRUE;
2848 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2849 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2850 fail("Resumption not suspended");
2851 /* This one should succeed and finish up */
2852 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2853 xml_failure(g_parser);
2854 }
2855 END_TEST
2856
2857 /* Test that CDATA shows up correctly through a default handler */
START_TEST(test_cdata_default)2858 START_TEST(test_cdata_default) {
2859 const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2860 const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2861 CharData storage;
2862
2863 CharData_Init(&storage);
2864 XML_SetUserData(g_parser, &storage);
2865 XML_SetDefaultHandler(g_parser, accumulate_characters);
2866
2867 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2868 == XML_STATUS_ERROR)
2869 xml_failure(g_parser);
2870 CharData_CheckXMLChars(&storage, expected);
2871 }
2872 END_TEST
2873
2874 /* Test resetting a subordinate parser does exactly nothing */
START_TEST(test_subordinate_reset)2875 START_TEST(test_subordinate_reset) {
2876 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2877 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2878 "<doc>&entity;</doc>";
2879
2880 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2881 XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2882 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2883 == XML_STATUS_ERROR)
2884 xml_failure(g_parser);
2885 }
2886 END_TEST
2887
2888 /* Test suspending a subordinate parser */
START_TEST(test_subordinate_suspend)2889 START_TEST(test_subordinate_suspend) {
2890 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2891 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2892 "<doc>&entity;</doc>";
2893
2894 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2895 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2896 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2897 == XML_STATUS_ERROR)
2898 xml_failure(g_parser);
2899 }
2900 END_TEST
2901
2902 /* Test suspending a subordinate parser from an XML declaration */
2903 /* Increases code coverage of the tests */
2904
START_TEST(test_subordinate_xdecl_suspend)2905 START_TEST(test_subordinate_xdecl_suspend) {
2906 const char *text
2907 = "<!DOCTYPE doc [\n"
2908 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2909 "]>\n"
2910 "<doc>&entity;</doc>";
2911
2912 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2913 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2914 g_resumable = XML_TRUE;
2915 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2916 == XML_STATUS_ERROR)
2917 xml_failure(g_parser);
2918 }
2919 END_TEST
2920
START_TEST(test_subordinate_xdecl_abort)2921 START_TEST(test_subordinate_xdecl_abort) {
2922 const char *text
2923 = "<!DOCTYPE doc [\n"
2924 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2925 "]>\n"
2926 "<doc>&entity;</doc>";
2927
2928 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2929 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2930 g_resumable = XML_FALSE;
2931 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2932 == XML_STATUS_ERROR)
2933 xml_failure(g_parser);
2934 }
2935 END_TEST
2936
2937 /* Test external entity fault handling with suspension */
START_TEST(test_ext_entity_invalid_suspended_parse)2938 START_TEST(test_ext_entity_invalid_suspended_parse) {
2939 const char *text = "<!DOCTYPE doc [\n"
2940 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2941 "]>\n"
2942 "<doc>&en;</doc>";
2943 ExtFaults faults[]
2944 = {{"<?xml version='1.0' encoding='us-ascii'?><",
2945 "Incomplete element declaration not faulted", NULL,
2946 XML_ERROR_UNCLOSED_TOKEN},
2947 {/* First two bytes of a three-byte char */
2948 "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2949 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2950 {NULL, NULL, NULL, XML_ERROR_NONE}};
2951 ExtFaults *fault;
2952
2953 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2954 set_subtest("%s", fault->parse_text);
2955 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2956 XML_SetExternalEntityRefHandler(g_parser,
2957 external_entity_suspending_faulter);
2958 XML_SetUserData(g_parser, fault);
2959 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2960 "Parser did not report external entity error");
2961 XML_ParserReset(g_parser, NULL);
2962 }
2963 }
2964 END_TEST
2965
2966 /* Test setting an explicit encoding */
START_TEST(test_explicit_encoding)2967 START_TEST(test_explicit_encoding) {
2968 const char *text1 = "<doc>Hello ";
2969 const char *text2 = " World</doc>";
2970
2971 /* Just check that we can set the encoding to NULL before starting */
2972 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2973 fail("Failed to initialise encoding to NULL");
2974 /* Say we are UTF-8 */
2975 if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2976 fail("Failed to set explicit encoding");
2977 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2978 == XML_STATUS_ERROR)
2979 xml_failure(g_parser);
2980 /* Try to switch encodings mid-parse */
2981 if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2982 fail("Allowed encoding change");
2983 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2984 == XML_STATUS_ERROR)
2985 xml_failure(g_parser);
2986 /* Try now the parse is over */
2987 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2988 fail("Failed to unset encoding");
2989 }
2990 END_TEST
2991
2992 /* Test handling of trailing CR (rather than newline) */
START_TEST(test_trailing_cr)2993 START_TEST(test_trailing_cr) {
2994 const char *text = "<doc>\r";
2995 int found_cr;
2996
2997 /* Try with a character handler, for code coverage */
2998 XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2999 XML_SetUserData(g_parser, &found_cr);
3000 found_cr = 0;
3001 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3002 == XML_STATUS_OK)
3003 fail("Failed to fault unclosed doc");
3004 if (found_cr == 0)
3005 fail("Did not catch the carriage return");
3006 XML_ParserReset(g_parser, NULL);
3007
3008 /* Now with a default handler instead */
3009 XML_SetDefaultHandler(g_parser, cr_cdata_handler);
3010 XML_SetUserData(g_parser, &found_cr);
3011 found_cr = 0;
3012 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3013 == XML_STATUS_OK)
3014 fail("Failed to fault unclosed doc");
3015 if (found_cr == 0)
3016 fail("Did not catch default carriage return");
3017 }
3018 END_TEST
3019
3020 /* Test trailing CR in an external entity parse */
START_TEST(test_ext_entity_trailing_cr)3021 START_TEST(test_ext_entity_trailing_cr) {
3022 const char *text = "<!DOCTYPE doc [\n"
3023 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
3024 "]>\n"
3025 "<doc>&en;</doc>";
3026 int found_cr;
3027
3028 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3029 XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
3030 XML_SetUserData(g_parser, &found_cr);
3031 found_cr = 0;
3032 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3033 != XML_STATUS_OK)
3034 xml_failure(g_parser);
3035 if (found_cr == 0)
3036 fail("No carriage return found");
3037 XML_ParserReset(g_parser, NULL);
3038
3039 /* Try again with a different trailing CR */
3040 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3041 XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
3042 XML_SetUserData(g_parser, &found_cr);
3043 found_cr = 0;
3044 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3045 != XML_STATUS_OK)
3046 xml_failure(g_parser);
3047 if (found_cr == 0)
3048 fail("No carriage return found");
3049 }
3050 END_TEST
3051
3052 /* Test handling of trailing square bracket */
START_TEST(test_trailing_rsqb)3053 START_TEST(test_trailing_rsqb) {
3054 const char *text8 = "<doc>]";
3055 const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
3056 int found_rsqb;
3057 int text8_len = (int)strlen(text8);
3058
3059 XML_SetCharacterDataHandler(g_parser, rsqb_handler);
3060 XML_SetUserData(g_parser, &found_rsqb);
3061 found_rsqb = 0;
3062 if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
3063 == XML_STATUS_OK)
3064 fail("Failed to fault unclosed doc");
3065 if (found_rsqb == 0)
3066 fail("Did not catch the right square bracket");
3067
3068 /* Try again with a different encoding */
3069 XML_ParserReset(g_parser, NULL);
3070 XML_SetCharacterDataHandler(g_parser, rsqb_handler);
3071 XML_SetUserData(g_parser, &found_rsqb);
3072 found_rsqb = 0;
3073 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
3074 XML_TRUE)
3075 == XML_STATUS_OK)
3076 fail("Failed to fault unclosed doc");
3077 if (found_rsqb == 0)
3078 fail("Did not catch the right square bracket");
3079
3080 /* And finally with a default handler */
3081 XML_ParserReset(g_parser, NULL);
3082 XML_SetDefaultHandler(g_parser, rsqb_handler);
3083 XML_SetUserData(g_parser, &found_rsqb);
3084 found_rsqb = 0;
3085 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
3086 XML_TRUE)
3087 == XML_STATUS_OK)
3088 fail("Failed to fault unclosed doc");
3089 if (found_rsqb == 0)
3090 fail("Did not catch the right square bracket");
3091 }
3092 END_TEST
3093
3094 /* Test trailing right square bracket in an external entity parse */
START_TEST(test_ext_entity_trailing_rsqb)3095 START_TEST(test_ext_entity_trailing_rsqb) {
3096 const char *text = "<!DOCTYPE doc [\n"
3097 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
3098 "]>\n"
3099 "<doc>&en;</doc>";
3100 int found_rsqb;
3101
3102 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3103 XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
3104 XML_SetUserData(g_parser, &found_rsqb);
3105 found_rsqb = 0;
3106 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3107 != XML_STATUS_OK)
3108 xml_failure(g_parser);
3109 if (found_rsqb == 0)
3110 fail("No right square bracket found");
3111 }
3112 END_TEST
3113
3114 /* Test CDATA handling in an external entity */
START_TEST(test_ext_entity_good_cdata)3115 START_TEST(test_ext_entity_good_cdata) {
3116 const char *text = "<!DOCTYPE doc [\n"
3117 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
3118 "]>\n"
3119 "<doc>&en;</doc>";
3120
3121 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3122 XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
3123 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3124 != XML_STATUS_OK)
3125 xml_failure(g_parser);
3126 }
3127 END_TEST
3128
3129 /* Test user parameter settings */
START_TEST(test_user_parameters)3130 START_TEST(test_user_parameters) {
3131 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
3132 "<!-- Primary parse -->\n"
3133 "<!DOCTYPE doc SYSTEM 'foo'>\n"
3134 "<doc>&entity;";
3135 const char *epilog = "<!-- Back to primary parser -->\n"
3136 "</doc>";
3137
3138 g_comment_count = 0;
3139 g_skip_count = 0;
3140 g_xdecl_count = 0;
3141 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3142 XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
3143 XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
3144 XML_SetCommentHandler(g_parser, data_check_comment_handler);
3145 XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
3146 XML_UseParserAsHandlerArg(g_parser);
3147 XML_SetUserData(g_parser, (void *)1);
3148 g_handler_data = g_parser;
3149 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3150 == XML_STATUS_ERROR)
3151 xml_failure(g_parser);
3152 /* Ensure we can't change policy mid-parse */
3153 if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
3154 fail("Changed param entity parsing policy while parsing");
3155 if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
3156 == XML_STATUS_ERROR)
3157 xml_failure(g_parser);
3158 if (g_comment_count != 3)
3159 fail("Comment handler not invoked enough times");
3160 if (g_skip_count != 1)
3161 fail("Skip handler not invoked enough times");
3162 if (g_xdecl_count != 1)
3163 fail("XML declaration handler not invoked");
3164 }
3165 END_TEST
3166
3167 /* Test that an explicit external entity handler argument replaces
3168 * the parser as the first argument.
3169 *
3170 * We do not call the first parameter to the external entity handler
3171 * 'parser' for once, since the first time the handler is called it
3172 * will actually be a text string. We need to be able to access the
3173 * global 'parser' variable to create our external entity parser from,
3174 * since there are code paths we need to ensure get executed.
3175 */
START_TEST(test_ext_entity_ref_parameter)3176 START_TEST(test_ext_entity_ref_parameter) {
3177 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
3178 "<!DOCTYPE doc SYSTEM 'foo'>\n"
3179 "<doc>&entity;</doc>";
3180
3181 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3182 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
3183 /* Set a handler arg that is not NULL and not parser (which is
3184 * what NULL would cause to be passed.
3185 */
3186 XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
3187 g_handler_data = text;
3188 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3189 == XML_STATUS_ERROR)
3190 xml_failure(g_parser);
3191
3192 /* Now try again with unset args */
3193 XML_ParserReset(g_parser, NULL);
3194 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3195 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
3196 XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
3197 g_handler_data = g_parser;
3198 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3199 == XML_STATUS_ERROR)
3200 xml_failure(g_parser);
3201 }
3202 END_TEST
3203
3204 /* Test the parsing of an empty string */
START_TEST(test_empty_parse)3205 START_TEST(test_empty_parse) {
3206 const char *text = "<doc></doc>";
3207 const char *partial = "<doc>";
3208
3209 if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
3210 fail("Parsing empty string faulted");
3211 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3212 fail("Parsing final empty string not faulted");
3213 if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
3214 fail("Parsing final empty string faulted for wrong reason");
3215
3216 /* Now try with valid text before the empty end */
3217 XML_ParserReset(g_parser, NULL);
3218 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3219 == XML_STATUS_ERROR)
3220 xml_failure(g_parser);
3221 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
3222 fail("Parsing final empty string faulted");
3223
3224 /* Now try with invalid text before the empty end */
3225 XML_ParserReset(g_parser, NULL);
3226 if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
3227 XML_FALSE)
3228 == XML_STATUS_ERROR)
3229 xml_failure(g_parser);
3230 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3231 fail("Parsing final incomplete empty string not faulted");
3232 }
3233 END_TEST
3234
3235 /* Test XML_Parse for len < 0 */
START_TEST(test_negative_len_parse)3236 START_TEST(test_negative_len_parse) {
3237 const char *const doc = "<root/>";
3238 for (int isFinal = 0; isFinal < 2; isFinal++) {
3239 set_subtest("isFinal=%d", isFinal);
3240
3241 XML_Parser parser = XML_ParserCreate(NULL);
3242
3243 if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
3244 fail("There was not supposed to be any initial parse error.");
3245
3246 const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
3247
3248 if (status != XML_STATUS_ERROR)
3249 fail("Negative len was expected to fail the parse but did not.");
3250
3251 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
3252 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
3253
3254 XML_ParserFree(parser);
3255 }
3256 }
3257 END_TEST
3258
3259 /* Test XML_ParseBuffer for len < 0 */
START_TEST(test_negative_len_parse_buffer)3260 START_TEST(test_negative_len_parse_buffer) {
3261 const char *const doc = "<root/>";
3262 for (int isFinal = 0; isFinal < 2; isFinal++) {
3263 set_subtest("isFinal=%d", isFinal);
3264
3265 XML_Parser parser = XML_ParserCreate(NULL);
3266
3267 if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
3268 fail("There was not supposed to be any initial parse error.");
3269
3270 void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
3271
3272 if (buffer == NULL)
3273 fail("XML_GetBuffer failed.");
3274
3275 memcpy(buffer, doc, strlen(doc));
3276
3277 const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
3278
3279 if (status != XML_STATUS_ERROR)
3280 fail("Negative len was expected to fail the parse but did not.");
3281
3282 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
3283 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
3284
3285 XML_ParserFree(parser);
3286 }
3287 }
3288 END_TEST
3289
3290 /* Test odd corners of the XML_GetBuffer interface */
3291 static enum XML_Status
get_feature(enum XML_FeatureEnum feature_id,long * presult)3292 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
3293 const XML_Feature *feature = XML_GetFeatureList();
3294
3295 if (feature == NULL)
3296 return XML_STATUS_ERROR;
3297 for (; feature->feature != XML_FEATURE_END; feature++) {
3298 if (feature->feature == feature_id) {
3299 *presult = feature->value;
3300 return XML_STATUS_OK;
3301 }
3302 }
3303 return XML_STATUS_ERROR;
3304 }
3305
3306 /* Test odd corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_1)3307 START_TEST(test_get_buffer_1) {
3308 const char *text = get_buffer_test_text;
3309 void *buffer;
3310 long context_bytes;
3311
3312 /* Attempt to allocate a negative length buffer */
3313 if (XML_GetBuffer(g_parser, -12) != NULL)
3314 fail("Negative length buffer not failed");
3315
3316 /* Now get a small buffer and extend it past valid length */
3317 buffer = XML_GetBuffer(g_parser, 1536);
3318 if (buffer == NULL)
3319 fail("1.5K buffer failed");
3320 assert(buffer != NULL);
3321 memcpy(buffer, text, strlen(text));
3322 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
3323 == XML_STATUS_ERROR)
3324 xml_failure(g_parser);
3325 if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
3326 fail("INT_MAX buffer not failed");
3327
3328 /* Now try extending it a more reasonable but still too large
3329 * amount. The allocator in XML_GetBuffer() doubles the buffer
3330 * size until it exceeds the requested amount or INT_MAX. If it
3331 * exceeds INT_MAX, it rejects the request, so we want a request
3332 * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable,
3333 * with an extra byte just to ensure that the request is off any
3334 * boundary. The request will be inflated internally by
3335 * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
3336 * request.
3337 */
3338 if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
3339 context_bytes = 0;
3340 if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
3341 fail("INT_MAX- buffer not failed");
3342
3343 /* Now try extending it a carefully crafted amount */
3344 if (XML_GetBuffer(g_parser, 1000) == NULL)
3345 fail("1000 buffer failed");
3346 }
3347 END_TEST
3348
3349 /* Test more corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_2)3350 START_TEST(test_get_buffer_2) {
3351 const char *text = get_buffer_test_text;
3352 void *buffer;
3353
3354 /* Now get a decent buffer */
3355 buffer = XML_GetBuffer(g_parser, 1536);
3356 if (buffer == NULL)
3357 fail("1.5K buffer failed");
3358 assert(buffer != NULL);
3359 memcpy(buffer, text, strlen(text));
3360 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
3361 == XML_STATUS_ERROR)
3362 xml_failure(g_parser);
3363
3364 /* Extend it, to catch a different code path */
3365 if (XML_GetBuffer(g_parser, 1024) == NULL)
3366 fail("1024 buffer failed");
3367 }
3368 END_TEST
3369
3370 /* Test for signed integer overflow CVE-2022-23852 */
3371 #if XML_CONTEXT_BYTES > 0
START_TEST(test_get_buffer_3_overflow)3372 START_TEST(test_get_buffer_3_overflow) {
3373 XML_Parser parser = XML_ParserCreate(NULL);
3374 assert(parser != NULL);
3375
3376 const char *const text = "\n";
3377 const int expectedKeepValue = (int)strlen(text);
3378
3379 // After this call, variable "keep" in XML_GetBuffer will
3380 // have value expectedKeepValue
3381 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
3382 XML_FALSE /* isFinal */)
3383 == XML_STATUS_ERROR)
3384 xml_failure(parser);
3385
3386 assert(expectedKeepValue > 0);
3387 if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
3388 fail("enlarging buffer not failed");
3389
3390 XML_ParserFree(parser);
3391 }
3392 END_TEST
3393 #endif // XML_CONTEXT_BYTES > 0
3394
START_TEST(test_buffer_can_grow_to_max)3395 START_TEST(test_buffer_can_grow_to_max) {
3396 const char *const prefixes[] = {
3397 "",
3398 "<",
3399 "<x a='",
3400 "<doc><x a='",
3401 "<document><x a='",
3402 "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
3403 "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
3404 "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
3405 "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
3406 "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
3407 const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
3408 int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
3409 #if defined(__MINGW32__) && ! defined(__MINGW64__)
3410 // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
3411 // Can we make a big allocation?
3412 for (int i = 1; i <= 2; i++) {
3413 void *const big = malloc(maxbuf);
3414 if (big != NULL) {
3415 free(big);
3416 break;
3417 }
3418 // The big allocation failed. Let's be a little lenient.
3419 maxbuf = maxbuf / 2;
3420 fprintf(stderr, "Reducing maxbuf to %d...\n", maxbuf);
3421 }
3422 #endif
3423
3424 for (int i = 0; i < num_prefixes; ++i) {
3425 set_subtest("\"%s\"", prefixes[i]);
3426 XML_Parser parser = XML_ParserCreate(NULL);
3427 #if XML_GE == 1
3428 assert_true(XML_SetAllocTrackerActivationThreshold(parser, (size_t)-1)
3429 == XML_TRUE); // i.e. deactivate
3430 #endif
3431 const int prefix_len = (int)strlen(prefixes[i]);
3432 const enum XML_Status s
3433 = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
3434 if (s != XML_STATUS_OK)
3435 xml_failure(parser);
3436
3437 // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
3438 // subtracting the whole prefix is easiest, and close enough.
3439 assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
3440 // The limit should be consistent; no prefix should allow us to
3441 // reach above the max buffer size.
3442 assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
3443 XML_ParserFree(parser);
3444 }
3445 }
3446 END_TEST
3447
START_TEST(test_getbuffer_allocates_on_zero_len)3448 START_TEST(test_getbuffer_allocates_on_zero_len) {
3449 for (int first_len = 1; first_len >= 0; first_len--) {
3450 set_subtest("with len=%d first", first_len);
3451 XML_Parser parser = XML_ParserCreate(NULL);
3452 assert_true(parser != NULL);
3453 assert_true(XML_GetBuffer(parser, first_len) != NULL);
3454 assert_true(XML_GetBuffer(parser, 0) != NULL);
3455 if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
3456 xml_failure(parser);
3457 XML_ParserFree(parser);
3458 }
3459 }
3460 END_TEST
3461
3462 /* Test position information macros */
START_TEST(test_byte_info_at_end)3463 START_TEST(test_byte_info_at_end) {
3464 const char *text = "<doc></doc>";
3465
3466 if (XML_GetCurrentByteIndex(g_parser) != -1
3467 || XML_GetCurrentByteCount(g_parser) != 0)
3468 fail("Byte index/count incorrect at start of parse");
3469 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3470 == XML_STATUS_ERROR)
3471 xml_failure(g_parser);
3472 /* At end, the count will be zero and the index the end of string */
3473 if (XML_GetCurrentByteCount(g_parser) != 0)
3474 fail("Terminal byte count incorrect");
3475 if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
3476 fail("Terminal byte index incorrect");
3477 }
3478 END_TEST
3479
3480 /* Test position information from errors */
3481 #define PRE_ERROR_STR "<doc></"
3482 #define POST_ERROR_STR "wombat></doc>"
START_TEST(test_byte_info_at_error)3483 START_TEST(test_byte_info_at_error) {
3484 const char *text = PRE_ERROR_STR POST_ERROR_STR;
3485
3486 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3487 == XML_STATUS_OK)
3488 fail("Syntax error not faulted");
3489 if (XML_GetCurrentByteCount(g_parser) != 0)
3490 fail("Error byte count incorrect");
3491 if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3492 fail("Error byte index incorrect");
3493 }
3494 END_TEST
3495 #undef PRE_ERROR_STR
3496 #undef POST_ERROR_STR
3497
3498 /* Test position information in handler */
3499 #define START_ELEMENT "<e>"
3500 #define CDATA_TEXT "Hello"
3501 #define END_ELEMENT "</e>"
START_TEST(test_byte_info_at_cdata)3502 START_TEST(test_byte_info_at_cdata) {
3503 const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3504 int offset, size;
3505 ByteTestData data;
3506
3507 /* Check initial context is empty */
3508 if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3509 fail("Unexpected context at start of parse");
3510
3511 data.start_element_len = (int)strlen(START_ELEMENT);
3512 data.cdata_len = (int)strlen(CDATA_TEXT);
3513 data.total_string_len = (int)strlen(text);
3514 XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3515 XML_SetUserData(g_parser, &data);
3516 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3517 xml_failure(g_parser);
3518 }
3519 END_TEST
3520 #undef START_ELEMENT
3521 #undef CDATA_TEXT
3522 #undef END_ELEMENT
3523
3524 /* Test predefined entities are correctly recognised */
START_TEST(test_predefined_entities)3525 START_TEST(test_predefined_entities) {
3526 const char *text = "<doc><>&"'</doc>";
3527 const XML_Char *expected = XCS("<doc><>&"'</doc>");
3528 const XML_Char *result = XCS("<>&\"'");
3529 CharData storage;
3530
3531 XML_SetDefaultHandler(g_parser, accumulate_characters);
3532 /* run_character_check uses XML_SetCharacterDataHandler(), which
3533 * unfortunately heads off a code path that we need to exercise.
3534 */
3535 CharData_Init(&storage);
3536 XML_SetUserData(g_parser, &storage);
3537 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3538 == XML_STATUS_ERROR)
3539 xml_failure(g_parser);
3540 /* The default handler doesn't translate the entities */
3541 CharData_CheckXMLChars(&storage, expected);
3542
3543 /* Now try again and check the translation */
3544 XML_ParserReset(g_parser, NULL);
3545 run_character_check(text, result);
3546 }
3547 END_TEST
3548
3549 /* Regression test that an invalid tag in an external parameter
3550 * reference in an external DTD is correctly faulted.
3551 *
3552 * Only a few specific tags are legal in DTDs ignoring comments and
3553 * processing instructions, all of which begin with an exclamation
3554 * mark. "<el/>" is not one of them, so the parser should raise an
3555 * error on encountering it.
3556 */
START_TEST(test_invalid_tag_in_dtd)3557 START_TEST(test_invalid_tag_in_dtd) {
3558 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3559 "<doc></doc>\n";
3560
3561 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3562 XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3563 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3564 "Invalid tag IN DTD external param not rejected");
3565 }
3566 END_TEST
3567
3568 /* Test entities not quite the predefined ones are not mis-recognised */
START_TEST(test_not_predefined_entities)3569 START_TEST(test_not_predefined_entities) {
3570 const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3571 "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3572 int i = 0;
3573
3574 while (text[i] != NULL) {
3575 expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3576 "Undefined entity not rejected");
3577 XML_ParserReset(g_parser, NULL);
3578 i++;
3579 }
3580 }
3581 END_TEST
3582
3583 /* Test conditional inclusion (IGNORE) */
START_TEST(test_ignore_section)3584 START_TEST(test_ignore_section) {
3585 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3586 "<doc><e>&entity;</e></doc>";
3587 const XML_Char *expected
3588 = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3589 CharData storage;
3590
3591 CharData_Init(&storage);
3592 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3593 XML_SetUserData(g_parser, &storage);
3594 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3595 XML_SetDefaultHandler(g_parser, accumulate_characters);
3596 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3597 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3598 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3599 XML_SetStartElementHandler(g_parser, dummy_start_element);
3600 XML_SetEndElementHandler(g_parser, dummy_end_element);
3601 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3602 == XML_STATUS_ERROR)
3603 xml_failure(g_parser);
3604 CharData_CheckXMLChars(&storage, expected);
3605 }
3606 END_TEST
3607
START_TEST(test_ignore_section_utf16)3608 START_TEST(test_ignore_section_utf16) {
3609 const char text[] =
3610 /* <!DOCTYPE d SYSTEM 's'> */
3611 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3612 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3613 /* <d><e>&en;</e></d> */
3614 "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3615 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3616 CharData storage;
3617
3618 CharData_Init(&storage);
3619 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3620 XML_SetUserData(g_parser, &storage);
3621 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3622 XML_SetDefaultHandler(g_parser, accumulate_characters);
3623 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3624 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3625 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3626 XML_SetStartElementHandler(g_parser, dummy_start_element);
3627 XML_SetEndElementHandler(g_parser, dummy_end_element);
3628 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3629 == XML_STATUS_ERROR)
3630 xml_failure(g_parser);
3631 CharData_CheckXMLChars(&storage, expected);
3632 }
3633 END_TEST
3634
START_TEST(test_ignore_section_utf16_be)3635 START_TEST(test_ignore_section_utf16_be) {
3636 const char text[] =
3637 /* <!DOCTYPE d SYSTEM 's'> */
3638 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3639 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3640 /* <d><e>&en;</e></d> */
3641 "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3642 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3643 CharData storage;
3644
3645 CharData_Init(&storage);
3646 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3647 XML_SetUserData(g_parser, &storage);
3648 XML_SetExternalEntityRefHandler(g_parser,
3649 external_entity_load_ignore_utf16_be);
3650 XML_SetDefaultHandler(g_parser, accumulate_characters);
3651 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3652 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3653 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3654 XML_SetStartElementHandler(g_parser, dummy_start_element);
3655 XML_SetEndElementHandler(g_parser, dummy_end_element);
3656 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3657 == XML_STATUS_ERROR)
3658 xml_failure(g_parser);
3659 CharData_CheckXMLChars(&storage, expected);
3660 }
3661 END_TEST
3662
3663 /* Test mis-formatted conditional exclusion */
START_TEST(test_bad_ignore_section)3664 START_TEST(test_bad_ignore_section) {
3665 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3666 "<doc><e>&entity;</e></doc>";
3667 ExtFaults faults[]
3668 = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3669 XML_ERROR_SYNTAX},
3670 {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3671 XML_ERROR_INVALID_TOKEN},
3672 {/* FIrst two bytes of a three-byte char */
3673 "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3674 XML_ERROR_PARTIAL_CHAR},
3675 {NULL, NULL, NULL, XML_ERROR_NONE}};
3676 ExtFaults *fault;
3677
3678 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3679 set_subtest("%s", fault->parse_text);
3680 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3681 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3682 XML_SetUserData(g_parser, fault);
3683 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3684 "Incomplete IGNORE section not failed");
3685 XML_ParserReset(g_parser, NULL);
3686 }
3687 }
3688 END_TEST
3689
3690 struct bom_testdata {
3691 const char *external;
3692 int split;
3693 XML_Bool nested_callback_happened;
3694 };
3695
3696 static int XMLCALL
external_bom_checker(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)3697 external_bom_checker(XML_Parser parser, const XML_Char *context,
3698 const XML_Char *base, const XML_Char *systemId,
3699 const XML_Char *publicId) {
3700 const char *text;
3701 UNUSED_P(base);
3702 UNUSED_P(systemId);
3703 UNUSED_P(publicId);
3704
3705 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3706 if (ext_parser == NULL)
3707 fail("Could not create external entity parser");
3708
3709 if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3710 struct bom_testdata *const testdata = XML_GetUserData(parser);
3711 const char *const external = testdata->external;
3712 const int split = testdata->split;
3713 testdata->nested_callback_happened = XML_TRUE;
3714
3715 if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3716 != XML_STATUS_OK) {
3717 xml_failure(ext_parser);
3718 }
3719 text = external + split; // the parse below will continue where we left off.
3720 } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3721 text = "<!ELEMENT doc EMPTY>\n"
3722 "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3723 "<!ENTITY % e2 '%e1;'>\n";
3724 } else {
3725 fail("unknown systemId");
3726 }
3727
3728 if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3729 != XML_STATUS_OK)
3730 xml_failure(ext_parser);
3731
3732 XML_ParserFree(ext_parser);
3733 return XML_STATUS_OK;
3734 }
3735
3736 /* regression test: BOM should be consumed when followed by a partial token. */
START_TEST(test_external_bom_consumed)3737 START_TEST(test_external_bom_consumed) {
3738 const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3739 "<doc></doc>\n";
3740 const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3741 const int len = (int)strlen(external);
3742 for (int split = 0; split <= len; ++split) {
3743 set_subtest("split at byte %d", split);
3744
3745 struct bom_testdata testdata;
3746 testdata.external = external;
3747 testdata.split = split;
3748 testdata.nested_callback_happened = XML_FALSE;
3749
3750 XML_Parser parser = XML_ParserCreate(NULL);
3751 if (parser == NULL) {
3752 fail("Couldn't create parser");
3753 }
3754 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3755 XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3756 XML_SetUserData(parser, &testdata);
3757 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3758 == XML_STATUS_ERROR)
3759 xml_failure(parser);
3760 if (! testdata.nested_callback_happened) {
3761 fail("ref handler not called");
3762 }
3763 XML_ParserFree(parser);
3764 }
3765 }
3766 END_TEST
3767
3768 /* Test recursive parsing */
START_TEST(test_external_entity_values)3769 START_TEST(test_external_entity_values) {
3770 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3771 "<doc></doc>\n";
3772 ExtFaults data_004_2[] = {
3773 {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3774 {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3775 XML_ERROR_INVALID_TOKEN},
3776 {"'wombat", "Unterminated string not faulted", NULL,
3777 XML_ERROR_UNCLOSED_TOKEN},
3778 {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3779 XML_ERROR_PARTIAL_CHAR},
3780 {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3781 {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3782 XML_ERROR_XML_DECL},
3783 {/* UTF-8 BOM */
3784 "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3785 XML_ERROR_NONE},
3786 {"<?xml version='1.0' encoding='utf-8'?>\n$",
3787 "Invalid token after text declaration not faulted", NULL,
3788 XML_ERROR_INVALID_TOKEN},
3789 {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3790 "Unterminated string after text decl not faulted", NULL,
3791 XML_ERROR_UNCLOSED_TOKEN},
3792 {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3793 "Partial UTF-8 character after text decl not faulted", NULL,
3794 XML_ERROR_PARTIAL_CHAR},
3795 {"%e1;", "Recursive parameter entity not faulted", NULL,
3796 XML_ERROR_RECURSIVE_ENTITY_REF},
3797 {NULL, NULL, NULL, XML_ERROR_NONE}};
3798 int i;
3799
3800 for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3801 set_subtest("%s", data_004_2[i].parse_text);
3802 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3803 XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3804 XML_SetUserData(g_parser, &data_004_2[i]);
3805 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3806 == XML_STATUS_ERROR)
3807 xml_failure(g_parser);
3808 XML_ParserReset(g_parser, NULL);
3809 }
3810 }
3811 END_TEST
3812
3813 /* Test the recursive parse interacts with a not standalone handler */
START_TEST(test_ext_entity_not_standalone)3814 START_TEST(test_ext_entity_not_standalone) {
3815 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3816 "<doc></doc>";
3817
3818 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3819 XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3820 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3821 "Standalone rejection not caught");
3822 }
3823 END_TEST
3824
START_TEST(test_ext_entity_value_abort)3825 START_TEST(test_ext_entity_value_abort) {
3826 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3827 "<doc></doc>\n";
3828
3829 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3830 XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3831 g_resumable = XML_FALSE;
3832 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3833 == XML_STATUS_ERROR)
3834 xml_failure(g_parser);
3835 }
3836 END_TEST
3837
START_TEST(test_bad_public_doctype)3838 START_TEST(test_bad_public_doctype) {
3839 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3840 "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3841 "<doc></doc>";
3842
3843 /* Setting a handler provokes a particular code path */
3844 XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3845 dummy_end_doctype_handler);
3846 expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3847 }
3848 END_TEST
3849
3850 /* Test based on ibm/valid/P32/ibm32v04.xml */
START_TEST(test_attribute_enum_value)3851 START_TEST(test_attribute_enum_value) {
3852 const char *text = "<?xml version='1.0' standalone='no'?>\n"
3853 "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3854 "<animal>This is a \n <a/> \n\nyellow tiger</animal>";
3855 ExtTest dtd_data
3856 = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3857 "<!ELEMENT a EMPTY>\n"
3858 "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3859 NULL, NULL};
3860 const XML_Char *expected = XCS("This is a \n \n\nyellow tiger");
3861
3862 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3863 XML_SetUserData(g_parser, &dtd_data);
3864 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3865 /* An attribute list handler provokes a different code path */
3866 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3867 run_ext_character_check(text, &dtd_data, expected);
3868 }
3869 END_TEST
3870
3871 /* Slightly bizarrely, the library seems to silently ignore entity
3872 * definitions for predefined entities, even when they are wrong. The
3873 * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3874 * to happen, so this is currently treated as acceptable.
3875 */
START_TEST(test_predefined_entity_redefinition)3876 START_TEST(test_predefined_entity_redefinition) {
3877 const char *text = "<!DOCTYPE doc [\n"
3878 "<!ENTITY apos 'foo'>\n"
3879 "]>\n"
3880 "<doc>'</doc>";
3881 run_character_check(text, XCS("'"));
3882 }
3883 END_TEST
3884
3885 /* Test that the parser stops processing the DTD after an unresolved
3886 * parameter entity is encountered.
3887 */
START_TEST(test_dtd_stop_processing)3888 START_TEST(test_dtd_stop_processing) {
3889 const char *text = "<!DOCTYPE doc [\n"
3890 "%foo;\n"
3891 "<!ENTITY bar 'bas'>\n"
3892 "]><doc/>";
3893
3894 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3895 init_dummy_handlers();
3896 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3897 == XML_STATUS_ERROR)
3898 xml_failure(g_parser);
3899 if (get_dummy_handler_flags() != 0)
3900 fail("DTD processing still going after undefined PE");
3901 }
3902 END_TEST
3903
3904 /* Test public notations with no system ID */
START_TEST(test_public_notation_no_sysid)3905 START_TEST(test_public_notation_no_sysid) {
3906 const char *text = "<!DOCTYPE doc [\n"
3907 "<!NOTATION note PUBLIC 'foo'>\n"
3908 "<!ELEMENT doc EMPTY>\n"
3909 "]>\n<doc/>";
3910
3911 init_dummy_handlers();
3912 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3913 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3914 == XML_STATUS_ERROR)
3915 xml_failure(g_parser);
3916 if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3917 fail("Notation declaration handler not called");
3918 }
3919 END_TEST
3920
START_TEST(test_nested_groups)3921 START_TEST(test_nested_groups) {
3922 const char *text
3923 = "<!DOCTYPE doc [\n"
3924 "<!ELEMENT doc "
3925 /* Sixteen elements per line */
3926 "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3927 "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3928 "))))))))))))))))))))))))))))))))>\n"
3929 "<!ELEMENT e EMPTY>"
3930 "]>\n"
3931 "<doc><e/></doc>";
3932 CharData storage;
3933
3934 CharData_Init(&storage);
3935 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3936 XML_SetStartElementHandler(g_parser, record_element_start_handler);
3937 XML_SetUserData(g_parser, &storage);
3938 init_dummy_handlers();
3939 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3940 == XML_STATUS_ERROR)
3941 xml_failure(g_parser);
3942 CharData_CheckXMLChars(&storage, XCS("doce"));
3943 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3944 fail("Element handler not fired");
3945 }
3946 END_TEST
3947
START_TEST(test_group_choice)3948 START_TEST(test_group_choice) {
3949 const char *text = "<!DOCTYPE doc [\n"
3950 "<!ELEMENT doc (a|b|c)+>\n"
3951 "<!ELEMENT a EMPTY>\n"
3952 "<!ELEMENT b (#PCDATA)>\n"
3953 "<!ELEMENT c ANY>\n"
3954 "]>\n"
3955 "<doc>\n"
3956 "<a/>\n"
3957 "<b attr='foo'>This is a foo</b>\n"
3958 "<c></c>\n"
3959 "</doc>\n";
3960
3961 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3962 init_dummy_handlers();
3963 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3964 == XML_STATUS_ERROR)
3965 xml_failure(g_parser);
3966 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3967 fail("Element handler flag not raised");
3968 }
3969 END_TEST
3970
START_TEST(test_standalone_parameter_entity)3971 START_TEST(test_standalone_parameter_entity) {
3972 const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3973 "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3974 "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3975 "%entity;\n"
3976 "]>\n"
3977 "<doc></doc>";
3978 char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3979
3980 XML_SetUserData(g_parser, dtd_data);
3981 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3982 XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3983 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3984 == XML_STATUS_ERROR)
3985 xml_failure(g_parser);
3986 }
3987 END_TEST
3988
3989 /* Test skipping of parameter entity in an external DTD */
3990 /* Derived from ibm/invalid/P69/ibm69i01.xml */
START_TEST(test_skipped_parameter_entity)3991 START_TEST(test_skipped_parameter_entity) {
3992 const char *text = "<?xml version='1.0'?>\n"
3993 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3994 "<!ELEMENT root (#PCDATA|a)* >\n"
3995 "]>\n"
3996 "<root></root>";
3997 ExtTest dtd_data = {"%pe2;", NULL, NULL};
3998
3999 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4000 XML_SetUserData(g_parser, &dtd_data);
4001 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4002 XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
4003 init_dummy_handlers();
4004 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4005 == XML_STATUS_ERROR)
4006 xml_failure(g_parser);
4007 if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
4008 fail("Skip handler not executed");
4009 }
4010 END_TEST
4011
4012 /* Test recursive parameter entity definition rejected in external DTD */
START_TEST(test_recursive_external_parameter_entity)4013 START_TEST(test_recursive_external_parameter_entity) {
4014 const char *text = "<?xml version='1.0'?>\n"
4015 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
4016 "<!ELEMENT root (#PCDATA|a)* >\n"
4017 "]>\n"
4018 "<root></root>";
4019 ExtFaults dtd_data = {"<!ENTITY % pe2 '%pe2;'>\n%pe2;",
4020 "Recursive external parameter entity not faulted", NULL,
4021 XML_ERROR_RECURSIVE_ENTITY_REF};
4022
4023 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
4024 XML_SetUserData(g_parser, &dtd_data);
4025 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4026 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4027 "Recursive external parameter not spotted");
4028 }
4029 END_TEST
4030
4031 /* Test undefined parameter entity in external entity handler */
START_TEST(test_undefined_ext_entity_in_external_dtd)4032 START_TEST(test_undefined_ext_entity_in_external_dtd) {
4033 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
4034 "<doc></doc>\n";
4035
4036 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4037 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
4038 XML_SetUserData(g_parser, NULL);
4039 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4040 == XML_STATUS_ERROR)
4041 xml_failure(g_parser);
4042
4043 /* Now repeat without the external entity ref handler invoking
4044 * another copy of itself.
4045 */
4046 XML_ParserReset(g_parser, NULL);
4047 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4048 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
4049 XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
4050 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4051 == XML_STATUS_ERROR)
4052 xml_failure(g_parser);
4053 }
4054 END_TEST
4055
4056 /* Test suspending the parse on receiving an XML declaration works */
START_TEST(test_suspend_xdecl)4057 START_TEST(test_suspend_xdecl) {
4058 const char *text = long_character_data_text;
4059
4060 XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
4061 XML_SetUserData(g_parser, g_parser);
4062 g_resumable = XML_TRUE;
4063 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
4064 // we won't know exactly how much input we actually managed to give Expat.
4065 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4066 != XML_STATUS_SUSPENDED)
4067 xml_failure(g_parser);
4068 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
4069 xml_failure(g_parser);
4070 /* Attempt to start a new parse while suspended */
4071 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4072 != XML_STATUS_ERROR)
4073 fail("Attempt to parse while suspended not faulted");
4074 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
4075 fail("Suspended parse not faulted with correct error");
4076 }
4077 END_TEST
4078
4079 /* Test aborting the parse in an epilog works */
START_TEST(test_abort_epilog)4080 START_TEST(test_abort_epilog) {
4081 const char *text = "<doc></doc>\n\r\n";
4082 XML_Char trigger_char = XCS('\r');
4083
4084 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
4085 XML_SetUserData(g_parser, &trigger_char);
4086 g_resumable = XML_FALSE;
4087 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4088 != XML_STATUS_ERROR)
4089 fail("Abort not triggered");
4090 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
4091 xml_failure(g_parser);
4092 }
4093 END_TEST
4094
4095 /* Test a different code path for abort in the epilog */
START_TEST(test_abort_epilog_2)4096 START_TEST(test_abort_epilog_2) {
4097 const char *text = "<doc></doc>\n";
4098 XML_Char trigger_char = XCS('\n');
4099
4100 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
4101 XML_SetUserData(g_parser, &trigger_char);
4102 g_resumable = XML_FALSE;
4103 expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
4104 }
4105 END_TEST
4106
4107 /* Test suspension from the epilog */
START_TEST(test_suspend_epilog)4108 START_TEST(test_suspend_epilog) {
4109 const char *text = "<doc></doc>\n";
4110 XML_Char trigger_char = XCS('\n');
4111
4112 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
4113 XML_SetUserData(g_parser, &trigger_char);
4114 g_resumable = XML_TRUE;
4115 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4116 != XML_STATUS_SUSPENDED)
4117 xml_failure(g_parser);
4118 }
4119 END_TEST
4120
START_TEST(test_suspend_in_sole_empty_tag)4121 START_TEST(test_suspend_in_sole_empty_tag) {
4122 const char *text = "<doc/>";
4123 enum XML_Status rc;
4124
4125 XML_SetEndElementHandler(g_parser, suspending_end_handler);
4126 XML_SetUserData(g_parser, g_parser);
4127 rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
4128 if (rc == XML_STATUS_ERROR)
4129 xml_failure(g_parser);
4130 else if (rc != XML_STATUS_SUSPENDED)
4131 fail("Suspend not triggered");
4132 rc = XML_ResumeParser(g_parser);
4133 if (rc == XML_STATUS_ERROR)
4134 xml_failure(g_parser);
4135 else if (rc != XML_STATUS_OK)
4136 fail("Resume failed");
4137 }
4138 END_TEST
4139
START_TEST(test_unfinished_epilog)4140 START_TEST(test_unfinished_epilog) {
4141 const char *text = "<doc></doc><";
4142
4143 expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
4144 "Incomplete epilog entry not faulted");
4145 }
4146 END_TEST
4147
START_TEST(test_partial_char_in_epilog)4148 START_TEST(test_partial_char_in_epilog) {
4149 const char *text = "<doc></doc>\xe2\x82";
4150
4151 /* First check that no fault is raised if the parse is not finished */
4152 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
4153 == XML_STATUS_ERROR)
4154 xml_failure(g_parser);
4155 /* Now check that it is faulted once we finish */
4156 if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
4157 fail("Partial character in epilog not faulted");
4158 if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
4159 xml_failure(g_parser);
4160 }
4161 END_TEST
4162
4163 /* Test resuming a parse suspended in entity substitution */
START_TEST(test_suspend_resume_internal_entity)4164 START_TEST(test_suspend_resume_internal_entity) {
4165 const char *text
4166 = "<!DOCTYPE doc [\n"
4167 "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
4168 "]>\n"
4169 "<doc>&foo;</doc>\n";
4170 const XML_Char *expected1 = XCS("Hi");
4171 const XML_Char *expected2 = XCS("HiHo");
4172 CharData storage;
4173
4174 CharData_Init(&storage);
4175 XML_SetStartElementHandler(g_parser, start_element_suspender);
4176 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
4177 XML_SetUserData(g_parser, &storage);
4178 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
4179 // we won't know exactly how much input we actually managed to give Expat.
4180 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4181 != XML_STATUS_SUSPENDED)
4182 xml_failure(g_parser);
4183 CharData_CheckXMLChars(&storage, XCS(""));
4184 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
4185 xml_failure(g_parser);
4186 CharData_CheckXMLChars(&storage, expected1);
4187 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
4188 xml_failure(g_parser);
4189 CharData_CheckXMLChars(&storage, expected2);
4190 }
4191 END_TEST
4192
START_TEST(test_suspend_resume_internal_entity_issue_629)4193 START_TEST(test_suspend_resume_internal_entity_issue_629) {
4194 const char *const text
4195 = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
4196 "<"
4197 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4198 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4199 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4200 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4201 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4202 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4203 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4204 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4205 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4206 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4207 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4208 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4209 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4210 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4211 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4212 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4213 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4214 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4215 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4216 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4217 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4218 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4219 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4220 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4221 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4222 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4223 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4224 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4225 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4226 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4227 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4228 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4229 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4230 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4231 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4232 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4233 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4234 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4235 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4236 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4237 "/>"
4238 "</b></a>";
4239 const size_t firstChunkSizeBytes = 54;
4240
4241 XML_Parser parser = XML_ParserCreate(NULL);
4242 XML_SetUserData(parser, parser);
4243 XML_SetCommentHandler(parser, suspending_comment_handler);
4244
4245 if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
4246 != XML_STATUS_SUSPENDED)
4247 xml_failure(parser);
4248 if (XML_ResumeParser(parser) != XML_STATUS_OK)
4249 xml_failure(parser);
4250 if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
4251 (int)(strlen(text) - firstChunkSizeBytes),
4252 XML_TRUE)
4253 != XML_STATUS_OK)
4254 xml_failure(parser);
4255 XML_ParserFree(parser);
4256 }
4257 END_TEST
4258
4259 /* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error)4260 START_TEST(test_resume_entity_with_syntax_error) {
4261 if (g_chunkSize != 0) {
4262 // this test does not use SINGLE_BYTES, because of suspension
4263 return;
4264 }
4265
4266 const char *text = "<!DOCTYPE doc [\n"
4267 "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
4268 "]>\n"
4269 "<doc>&foo;</doc>\n";
4270
4271 XML_SetStartElementHandler(g_parser, start_element_suspender);
4272 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
4273 // we won't know exactly how much input we actually managed to give Expat.
4274 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4275 != XML_STATUS_SUSPENDED)
4276 xml_failure(g_parser);
4277 if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
4278 fail("Syntax error in entity not faulted");
4279 if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
4280 xml_failure(g_parser);
4281 }
4282 END_TEST
4283
4284 /* Test suspending and resuming in a parameter entity substitution */
START_TEST(test_suspend_resume_parameter_entity)4285 START_TEST(test_suspend_resume_parameter_entity) {
4286 const char *text = "<!DOCTYPE doc [\n"
4287 "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
4288 "%foo;\n"
4289 "]>\n"
4290 "<doc>Hello, world</doc>";
4291 const XML_Char *expected = XCS("Hello, world");
4292 CharData storage;
4293
4294 CharData_Init(&storage);
4295 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4296 XML_SetElementDeclHandler(g_parser, element_decl_suspender);
4297 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
4298 XML_SetUserData(g_parser, &storage);
4299 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4300 != XML_STATUS_SUSPENDED)
4301 xml_failure(g_parser);
4302 CharData_CheckXMLChars(&storage, XCS(""));
4303 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
4304 xml_failure(g_parser);
4305 CharData_CheckXMLChars(&storage, expected);
4306 }
4307 END_TEST
4308
4309 /* Test attempting to use parser after an error is faulted */
START_TEST(test_restart_on_error)4310 START_TEST(test_restart_on_error) {
4311 const char *text = "<$doc><doc></doc>";
4312
4313 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4314 != XML_STATUS_ERROR)
4315 fail("Invalid tag name not faulted");
4316 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
4317 xml_failure(g_parser);
4318 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
4319 fail("Restarting invalid parse not faulted");
4320 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
4321 xml_failure(g_parser);
4322 }
4323 END_TEST
4324
4325 /* Test that angle brackets in an attribute default value are faulted */
START_TEST(test_reject_lt_in_attribute_value)4326 START_TEST(test_reject_lt_in_attribute_value) {
4327 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
4328 "<doc></doc>";
4329
4330 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4331 "Bad attribute default not faulted");
4332 }
4333 END_TEST
4334
START_TEST(test_reject_unfinished_param_in_att_value)4335 START_TEST(test_reject_unfinished_param_in_att_value) {
4336 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
4337 "<doc></doc>";
4338
4339 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4340 "Bad attribute default not faulted");
4341 }
4342 END_TEST
4343
START_TEST(test_trailing_cr_in_att_value)4344 START_TEST(test_trailing_cr_in_att_value) {
4345 const char *text = "<doc a='value\r'/>";
4346
4347 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4348 == XML_STATUS_ERROR)
4349 xml_failure(g_parser);
4350 }
4351 END_TEST
4352
4353 /* Try parsing a general entity within a parameter entity in a
4354 * standalone internal DTD. Covers a corner case in the parser.
4355 */
START_TEST(test_standalone_internal_entity)4356 START_TEST(test_standalone_internal_entity) {
4357 const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
4358 "<!DOCTYPE doc [\n"
4359 " <!ELEMENT doc (#PCDATA)>\n"
4360 " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"≥\">'>\n"
4361 " <!ENTITY ge 'AttDefaultValue'>\n"
4362 " %pe;\n"
4363 "]>\n"
4364 "<doc att2='any'/>";
4365
4366 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4367 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4368 == XML_STATUS_ERROR)
4369 xml_failure(g_parser);
4370 }
4371 END_TEST
4372
4373 /* Test that a reference to an unknown external entity is skipped */
START_TEST(test_skipped_external_entity)4374 START_TEST(test_skipped_external_entity) {
4375 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
4376 "<doc></doc>\n";
4377 ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
4378 "<!ENTITY % e2 '%e1;'>\n",
4379 NULL, NULL};
4380
4381 XML_SetUserData(g_parser, &test_data);
4382 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4383 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4384 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4385 == XML_STATUS_ERROR)
4386 xml_failure(g_parser);
4387 }
4388 END_TEST
4389
4390 /* Test a different form of unknown external entity */
START_TEST(test_skipped_null_loaded_ext_entity)4391 START_TEST(test_skipped_null_loaded_ext_entity) {
4392 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
4393 "<doc />";
4394 ExtHdlrData test_data
4395 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
4396 "<!ENTITY % pe2 '%pe1;'>\n"
4397 "%pe2;\n",
4398 external_entity_null_loader, NULL};
4399
4400 XML_SetUserData(g_parser, &test_data);
4401 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4402 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
4403 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4404 == XML_STATUS_ERROR)
4405 xml_failure(g_parser);
4406 }
4407 END_TEST
4408
START_TEST(test_skipped_unloaded_ext_entity)4409 START_TEST(test_skipped_unloaded_ext_entity) {
4410 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
4411 "<doc />";
4412 ExtHdlrData test_data
4413 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
4414 "<!ENTITY % pe2 '%pe1;'>\n"
4415 "%pe2;\n",
4416 NULL, NULL};
4417
4418 XML_SetUserData(g_parser, &test_data);
4419 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4420 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
4421 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4422 == XML_STATUS_ERROR)
4423 xml_failure(g_parser);
4424 }
4425 END_TEST
4426
4427 /* Test that a parameter entity value ending with a carriage return
4428 * has it translated internally into a newline.
4429 */
START_TEST(test_param_entity_with_trailing_cr)4430 START_TEST(test_param_entity_with_trailing_cr) {
4431 #define PARAM_ENTITY_NAME "pe"
4432 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
4433 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
4434 "<doc/>";
4435 ExtTest test_data
4436 = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
4437 "%" PARAM_ENTITY_NAME ";\n",
4438 NULL, NULL};
4439
4440 XML_SetUserData(g_parser, &test_data);
4441 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4442 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4443 XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
4444 param_entity_match_init(XCS(PARAM_ENTITY_NAME),
4445 XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
4446 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4447 == XML_STATUS_ERROR)
4448 xml_failure(g_parser);
4449 int entity_match_flag = get_param_entity_match_flag();
4450 if (entity_match_flag == ENTITY_MATCH_FAIL)
4451 fail("Parameter entity CR->NEWLINE conversion failed");
4452 else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
4453 fail("Parameter entity not parsed");
4454 }
4455 #undef PARAM_ENTITY_NAME
4456 #undef PARAM_ENTITY_CORE_VALUE
4457 END_TEST
4458
START_TEST(test_invalid_character_entity)4459 START_TEST(test_invalid_character_entity) {
4460 const char *text = "<!DOCTYPE doc [\n"
4461 " <!ENTITY entity '�'>\n"
4462 "]>\n"
4463 "<doc>&entity;</doc>";
4464
4465 expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4466 "Out of range character reference not faulted");
4467 }
4468 END_TEST
4469
START_TEST(test_invalid_character_entity_2)4470 START_TEST(test_invalid_character_entity_2) {
4471 const char *text = "<!DOCTYPE doc [\n"
4472 " <!ENTITY entity '&#xg0;'>\n"
4473 "]>\n"
4474 "<doc>&entity;</doc>";
4475
4476 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4477 "Out of range character reference not faulted");
4478 }
4479 END_TEST
4480
START_TEST(test_invalid_character_entity_3)4481 START_TEST(test_invalid_character_entity_3) {
4482 const char text[] =
4483 /* <!DOCTYPE doc [\n */
4484 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4485 /* U+0E04 = KHO KHWAI
4486 * U+0E08 = CHO CHAN */
4487 /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
4488 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
4489 "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
4490 /* ]>\n */
4491 "\0]\0>\0\n"
4492 /* <doc>&entity;</doc> */
4493 "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
4494
4495 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4496 != XML_STATUS_ERROR)
4497 fail("Invalid start of entity name not faulted");
4498 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4499 xml_failure(g_parser);
4500 }
4501 END_TEST
4502
START_TEST(test_invalid_character_entity_4)4503 START_TEST(test_invalid_character_entity_4) {
4504 const char *text = "<!DOCTYPE doc [\n"
4505 " <!ENTITY entity '�'>\n" /* = � */
4506 "]>\n"
4507 "<doc>&entity;</doc>";
4508
4509 expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4510 "Out of range character reference not faulted");
4511 }
4512 END_TEST
4513
4514 /* Test that processing instructions are picked up by a default handler */
START_TEST(test_pi_handled_in_default)4515 START_TEST(test_pi_handled_in_default) {
4516 const char *text = "<?test processing instruction?>\n<doc/>";
4517 const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4518 CharData storage;
4519
4520 CharData_Init(&storage);
4521 XML_SetDefaultHandler(g_parser, accumulate_characters);
4522 XML_SetUserData(g_parser, &storage);
4523 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4524 == XML_STATUS_ERROR)
4525 xml_failure(g_parser);
4526 CharData_CheckXMLChars(&storage, expected);
4527 }
4528 END_TEST
4529
4530 /* Test that comments are picked up by a default handler */
START_TEST(test_comment_handled_in_default)4531 START_TEST(test_comment_handled_in_default) {
4532 const char *text = "<!-- This is a comment -->\n<doc/>";
4533 const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4534 CharData storage;
4535
4536 CharData_Init(&storage);
4537 XML_SetDefaultHandler(g_parser, accumulate_characters);
4538 XML_SetUserData(g_parser, &storage);
4539 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4540 == XML_STATUS_ERROR)
4541 xml_failure(g_parser);
4542 CharData_CheckXMLChars(&storage, expected);
4543 }
4544 END_TEST
4545
4546 /* Test PIs that look almost but not quite like XML declarations */
START_TEST(test_pi_yml)4547 START_TEST(test_pi_yml) {
4548 const char *text = "<?yml something like data?><doc/>";
4549 const XML_Char *expected = XCS("yml: something like data\n");
4550 CharData storage;
4551
4552 CharData_Init(&storage);
4553 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4554 XML_SetUserData(g_parser, &storage);
4555 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4556 == XML_STATUS_ERROR)
4557 xml_failure(g_parser);
4558 CharData_CheckXMLChars(&storage, expected);
4559 }
4560 END_TEST
4561
START_TEST(test_pi_xnl)4562 START_TEST(test_pi_xnl) {
4563 const char *text = "<?xnl nothing like data?><doc/>";
4564 const XML_Char *expected = XCS("xnl: nothing like data\n");
4565 CharData storage;
4566
4567 CharData_Init(&storage);
4568 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4569 XML_SetUserData(g_parser, &storage);
4570 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4571 == XML_STATUS_ERROR)
4572 xml_failure(g_parser);
4573 CharData_CheckXMLChars(&storage, expected);
4574 }
4575 END_TEST
4576
START_TEST(test_pi_xmm)4577 START_TEST(test_pi_xmm) {
4578 const char *text = "<?xmm everything like data?><doc/>";
4579 const XML_Char *expected = XCS("xmm: everything like data\n");
4580 CharData storage;
4581
4582 CharData_Init(&storage);
4583 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4584 XML_SetUserData(g_parser, &storage);
4585 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4586 == XML_STATUS_ERROR)
4587 xml_failure(g_parser);
4588 CharData_CheckXMLChars(&storage, expected);
4589 }
4590 END_TEST
4591
START_TEST(test_utf16_pi)4592 START_TEST(test_utf16_pi) {
4593 const char text[] =
4594 /* <?{KHO KHWAI}{CHO CHAN}?>
4595 * where {KHO KHWAI} = U+0E04
4596 * and {CHO CHAN} = U+0E08
4597 */
4598 "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4599 /* <q/> */
4600 "<\0q\0/\0>\0";
4601 #ifdef XML_UNICODE
4602 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4603 #else
4604 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4605 #endif
4606 CharData storage;
4607
4608 CharData_Init(&storage);
4609 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4610 XML_SetUserData(g_parser, &storage);
4611 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4612 == XML_STATUS_ERROR)
4613 xml_failure(g_parser);
4614 CharData_CheckXMLChars(&storage, expected);
4615 }
4616 END_TEST
4617
START_TEST(test_utf16_be_pi)4618 START_TEST(test_utf16_be_pi) {
4619 const char text[] =
4620 /* <?{KHO KHWAI}{CHO CHAN}?>
4621 * where {KHO KHWAI} = U+0E04
4622 * and {CHO CHAN} = U+0E08
4623 */
4624 "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4625 /* <q/> */
4626 "\0<\0q\0/\0>";
4627 #ifdef XML_UNICODE
4628 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4629 #else
4630 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4631 #endif
4632 CharData storage;
4633
4634 CharData_Init(&storage);
4635 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4636 XML_SetUserData(g_parser, &storage);
4637 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4638 == XML_STATUS_ERROR)
4639 xml_failure(g_parser);
4640 CharData_CheckXMLChars(&storage, expected);
4641 }
4642 END_TEST
4643
4644 /* Test that comments can be picked up and translated */
START_TEST(test_utf16_be_comment)4645 START_TEST(test_utf16_be_comment) {
4646 const char text[] =
4647 /* <!-- Comment A --> */
4648 "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4649 /* <doc/> */
4650 "\0<\0d\0o\0c\0/\0>";
4651 const XML_Char *expected = XCS(" Comment A ");
4652 CharData storage;
4653
4654 CharData_Init(&storage);
4655 XML_SetCommentHandler(g_parser, accumulate_comment);
4656 XML_SetUserData(g_parser, &storage);
4657 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4658 == XML_STATUS_ERROR)
4659 xml_failure(g_parser);
4660 CharData_CheckXMLChars(&storage, expected);
4661 }
4662 END_TEST
4663
START_TEST(test_utf16_le_comment)4664 START_TEST(test_utf16_le_comment) {
4665 const char text[] =
4666 /* <!-- Comment B --> */
4667 "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4668 /* <doc/> */
4669 "<\0d\0o\0c\0/\0>\0";
4670 const XML_Char *expected = XCS(" Comment B ");
4671 CharData storage;
4672
4673 CharData_Init(&storage);
4674 XML_SetCommentHandler(g_parser, accumulate_comment);
4675 XML_SetUserData(g_parser, &storage);
4676 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4677 == XML_STATUS_ERROR)
4678 xml_failure(g_parser);
4679 CharData_CheckXMLChars(&storage, expected);
4680 }
4681 END_TEST
4682
4683 /* Test that the unknown encoding handler with map entries that expect
4684 * conversion but no conversion function is faulted
4685 */
START_TEST(test_missing_encoding_conversion_fn)4686 START_TEST(test_missing_encoding_conversion_fn) {
4687 const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4688 "<doc>\x81</doc>";
4689
4690 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4691 /* MiscEncodingHandler sets up an encoding with every top-bit-set
4692 * character introducing a two-byte sequence. For this, it
4693 * requires a convert function. The above function call doesn't
4694 * pass one through, so when BadEncodingHandler actually gets
4695 * called it should supply an invalid encoding.
4696 */
4697 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4698 "Encoding with missing convert() not faulted");
4699 }
4700 END_TEST
4701
START_TEST(test_failing_encoding_conversion_fn)4702 START_TEST(test_failing_encoding_conversion_fn) {
4703 const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4704 "<doc>\x81</doc>";
4705
4706 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4707 /* BadEncodingHandler sets up an encoding with every top-bit-set
4708 * character introducing a two-byte sequence. For this, it
4709 * requires a convert function. The above function call passes
4710 * one that insists all possible sequences are invalid anyway.
4711 */
4712 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4713 "Encoding with failing convert() not faulted");
4714 }
4715 END_TEST
4716
4717 /* Test unknown encoding conversions */
START_TEST(test_unknown_encoding_success)4718 START_TEST(test_unknown_encoding_success) {
4719 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4720 /* Equivalent to <eoc>Hello, world</eoc> */
4721 "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4722
4723 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4724 run_character_check(text, XCS("Hello, world"));
4725 }
4726 END_TEST
4727
4728 /* Test bad name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name)4729 START_TEST(test_unknown_encoding_bad_name) {
4730 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4731 "<\xff\x64oc>Hello, world</\xff\x64oc>";
4732
4733 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4734 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4735 "Bad name start in unknown encoding not faulted");
4736 }
4737 END_TEST
4738
4739 /* Test bad mid-name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name_2)4740 START_TEST(test_unknown_encoding_bad_name_2) {
4741 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4742 "<d\xffoc>Hello, world</d\xffoc>";
4743
4744 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4745 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4746 "Bad name in unknown encoding not faulted");
4747 }
4748 END_TEST
4749
4750 /* Test element name that is long enough to fill the conversion buffer
4751 * in an unknown encoding, finishing with an encoded character.
4752 */
START_TEST(test_unknown_encoding_long_name_1)4753 START_TEST(test_unknown_encoding_long_name_1) {
4754 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4755 "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4756 "Hi"
4757 "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4758 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4759 CharData storage;
4760
4761 CharData_Init(&storage);
4762 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4763 XML_SetStartElementHandler(g_parser, record_element_start_handler);
4764 XML_SetUserData(g_parser, &storage);
4765 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4766 == XML_STATUS_ERROR)
4767 xml_failure(g_parser);
4768 CharData_CheckXMLChars(&storage, expected);
4769 }
4770 END_TEST
4771
4772 /* Test element name that is long enough to fill the conversion buffer
4773 * in an unknown encoding, finishing with an simple character.
4774 */
START_TEST(test_unknown_encoding_long_name_2)4775 START_TEST(test_unknown_encoding_long_name_2) {
4776 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4777 "<abcdefghabcdefghabcdefghijklmnop>"
4778 "Hi"
4779 "</abcdefghabcdefghabcdefghijklmnop>";
4780 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4781 CharData storage;
4782
4783 CharData_Init(&storage);
4784 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4785 XML_SetStartElementHandler(g_parser, record_element_start_handler);
4786 XML_SetUserData(g_parser, &storage);
4787 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4788 == XML_STATUS_ERROR)
4789 xml_failure(g_parser);
4790 CharData_CheckXMLChars(&storage, expected);
4791 }
4792 END_TEST
4793
START_TEST(test_invalid_unknown_encoding)4794 START_TEST(test_invalid_unknown_encoding) {
4795 const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4796 "<doc>Hello world</doc>";
4797
4798 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4799 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4800 "Invalid unknown encoding not faulted");
4801 }
4802 END_TEST
4803
START_TEST(test_unknown_ascii_encoding_ok)4804 START_TEST(test_unknown_ascii_encoding_ok) {
4805 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4806 "<doc>Hello, world</doc>";
4807
4808 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4809 run_character_check(text, XCS("Hello, world"));
4810 }
4811 END_TEST
4812
START_TEST(test_unknown_ascii_encoding_fail)4813 START_TEST(test_unknown_ascii_encoding_fail) {
4814 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4815 "<doc>Hello, \x80 world</doc>";
4816
4817 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4818 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4819 "Invalid character not faulted");
4820 }
4821 END_TEST
4822
START_TEST(test_unknown_encoding_invalid_length)4823 START_TEST(test_unknown_encoding_invalid_length) {
4824 const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4825 "<doc>Hello, world</doc>";
4826
4827 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4828 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4829 "Invalid unknown encoding not faulted");
4830 }
4831 END_TEST
4832
START_TEST(test_unknown_encoding_invalid_topbit)4833 START_TEST(test_unknown_encoding_invalid_topbit) {
4834 const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4835 "<doc>Hello, world</doc>";
4836
4837 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4838 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4839 "Invalid unknown encoding not faulted");
4840 }
4841 END_TEST
4842
START_TEST(test_unknown_encoding_invalid_surrogate)4843 START_TEST(test_unknown_encoding_invalid_surrogate) {
4844 const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4845 "<doc>Hello, \x82 world</doc>";
4846
4847 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4848 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4849 "Invalid unknown encoding not faulted");
4850 }
4851 END_TEST
4852
START_TEST(test_unknown_encoding_invalid_high)4853 START_TEST(test_unknown_encoding_invalid_high) {
4854 const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4855 "<doc>Hello, world</doc>";
4856
4857 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4858 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4859 "Invalid unknown encoding not faulted");
4860 }
4861 END_TEST
4862
START_TEST(test_unknown_encoding_invalid_attr_value)4863 START_TEST(test_unknown_encoding_invalid_attr_value) {
4864 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4865 "<doc attr='\xff\x30'/>";
4866
4867 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4868 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4869 "Invalid attribute valid not faulted");
4870 }
4871 END_TEST
4872
START_TEST(test_unknown_encoding_user_data_primary)4873 START_TEST(test_unknown_encoding_user_data_primary) {
4874 // This test is based on ideas contributed by Artiphishell Inc.
4875 const char *const text = "<?xml version='1.0' encoding='x-unk'?>\n"
4876 "<root />\n";
4877 XML_Parser parser = XML_ParserCreate(NULL);
4878 XML_SetUnknownEncodingHandler(parser,
4879 user_data_checking_unknown_encoding_handler,
4880 (void *)(intptr_t)0xC0FFEE);
4881
4882 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
4883 == XML_STATUS_OK);
4884
4885 XML_ParserFree(parser);
4886 }
4887 END_TEST
4888
START_TEST(test_unknown_encoding_user_data_secondary)4889 START_TEST(test_unknown_encoding_user_data_secondary) {
4890 // This test is based on ideas contributed by Artiphishell Inc.
4891 const char *const text_main = "<!DOCTYPE r [\n"
4892 " <!ENTITY ext SYSTEM 'ext.ent'>\n"
4893 "]>\n"
4894 "<r>&ext;</r>\n";
4895 const char *const text_external = "<?xml version='1.0' encoding='x-unk'?>\n"
4896 "<e>data</e>";
4897 ExtTest2 test_data = {text_external, (int)strlen(text_external), NULL, NULL};
4898 XML_Parser parser = XML_ParserCreate(NULL);
4899 XML_SetExternalEntityRefHandler(parser, external_entity_loader2);
4900 XML_SetUnknownEncodingHandler(parser,
4901 user_data_checking_unknown_encoding_handler,
4902 (void *)(intptr_t)0xC0FFEE);
4903 XML_SetUserData(parser, &test_data);
4904
4905 assert_true(_XML_Parse_SINGLE_BYTES(parser, text_main, (int)strlen(text_main),
4906 XML_TRUE)
4907 == XML_STATUS_OK);
4908
4909 XML_ParserFree(parser);
4910 }
4911 END_TEST
4912
4913 /* Test an external entity parser set to use latin-1 detects UTF-16
4914 * BOMs correctly.
4915 */
4916 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
START_TEST(test_ext_entity_latin1_utf16le_bom)4917 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4918 const char *text = "<!DOCTYPE doc [\n"
4919 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4920 "]>\n"
4921 "<doc>&en;</doc>";
4922 ExtTest2 test_data
4923 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4924 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4925 * 0x4c = L and 0x20 is a space
4926 */
4927 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4928 #ifdef XML_UNICODE
4929 const XML_Char *expected = XCS("\x00ff\x00feL ");
4930 #else
4931 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4932 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4933 #endif
4934 CharData storage;
4935
4936 CharData_Init(&storage);
4937 test_data.storage = &storage;
4938 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4939 XML_SetUserData(g_parser, &test_data);
4940 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4941 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4942 == XML_STATUS_ERROR)
4943 xml_failure(g_parser);
4944 CharData_CheckXMLChars(&storage, expected);
4945 }
4946 END_TEST
4947
START_TEST(test_ext_entity_latin1_utf16be_bom)4948 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4949 const char *text = "<!DOCTYPE doc [\n"
4950 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4951 "]>\n"
4952 "<doc>&en;</doc>";
4953 ExtTest2 test_data
4954 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4955 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4956 * 0x4c = L and 0x20 is a space
4957 */
4958 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4959 #ifdef XML_UNICODE
4960 const XML_Char *expected = XCS("\x00fe\x00ff L");
4961 #else
4962 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4963 const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4964 #endif
4965 CharData storage;
4966
4967 CharData_Init(&storage);
4968 test_data.storage = &storage;
4969 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4970 XML_SetUserData(g_parser, &test_data);
4971 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4972 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4973 == XML_STATUS_ERROR)
4974 xml_failure(g_parser);
4975 CharData_CheckXMLChars(&storage, expected);
4976 }
4977 END_TEST
4978
4979 /* Parsing the full buffer rather than a byte at a time makes a
4980 * difference to the encoding scanning code, so repeat the above tests
4981 * without breaking them down by byte.
4982 */
START_TEST(test_ext_entity_latin1_utf16le_bom2)4983 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4984 const char *text = "<!DOCTYPE doc [\n"
4985 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4986 "]>\n"
4987 "<doc>&en;</doc>";
4988 ExtTest2 test_data
4989 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4990 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4991 * 0x4c = L and 0x20 is a space
4992 */
4993 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4994 #ifdef XML_UNICODE
4995 const XML_Char *expected = XCS("\x00ff\x00feL ");
4996 #else
4997 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4998 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4999 #endif
5000 CharData storage;
5001
5002 CharData_Init(&storage);
5003 test_data.storage = &storage;
5004 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5005 XML_SetUserData(g_parser, &test_data);
5006 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5007 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5008 == XML_STATUS_ERROR)
5009 xml_failure(g_parser);
5010 CharData_CheckXMLChars(&storage, expected);
5011 }
5012 END_TEST
5013
START_TEST(test_ext_entity_latin1_utf16be_bom2)5014 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
5015 const char *text = "<!DOCTYPE doc [\n"
5016 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5017 "]>\n"
5018 "<doc>&en;</doc>";
5019 ExtTest2 test_data
5020 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
5021 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
5022 * 0x4c = L and 0x20 is a space
5023 */
5024 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
5025 #ifdef XML_UNICODE
5026 const XML_Char *expected = XCS("\x00fe\x00ff L");
5027 #else
5028 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
5029 const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
5030 #endif
5031 CharData storage;
5032
5033 CharData_Init(&storage);
5034 test_data.storage = &storage;
5035 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5036 XML_SetUserData(g_parser, &test_data);
5037 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5038 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5039 == XML_STATUS_ERROR)
5040 xml_failure(g_parser);
5041 CharData_CheckXMLChars(&storage, expected);
5042 }
5043 END_TEST
5044
5045 /* Test little-endian UTF-16 given an explicit big-endian encoding */
START_TEST(test_ext_entity_utf16_be)5046 START_TEST(test_ext_entity_utf16_be) {
5047 const char *text = "<!DOCTYPE doc [\n"
5048 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5049 "]>\n"
5050 "<doc>&en;</doc>";
5051 ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
5052 #ifdef XML_UNICODE
5053 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
5054 #else
5055 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
5056 "\xe6\x94\x80" /* U+6500 */
5057 "\xe2\xbc\x80" /* U+2F00 */
5058 "\xe3\xb8\x80"); /* U+3E00 */
5059 #endif
5060 CharData storage;
5061
5062 CharData_Init(&storage);
5063 test_data.storage = &storage;
5064 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5065 XML_SetUserData(g_parser, &test_data);
5066 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5067 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5068 == XML_STATUS_ERROR)
5069 xml_failure(g_parser);
5070 CharData_CheckXMLChars(&storage, expected);
5071 }
5072 END_TEST
5073
5074 /* Test big-endian UTF-16 given an explicit little-endian encoding */
START_TEST(test_ext_entity_utf16_le)5075 START_TEST(test_ext_entity_utf16_le) {
5076 const char *text = "<!DOCTYPE doc [\n"
5077 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5078 "]>\n"
5079 "<doc>&en;</doc>";
5080 ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
5081 #ifdef XML_UNICODE
5082 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
5083 #else
5084 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
5085 "\xe6\x94\x80" /* U+6500 */
5086 "\xe2\xbc\x80" /* U+2F00 */
5087 "\xe3\xb8\x80"); /* U+3E00 */
5088 #endif
5089 CharData storage;
5090
5091 CharData_Init(&storage);
5092 test_data.storage = &storage;
5093 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5094 XML_SetUserData(g_parser, &test_data);
5095 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5096 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5097 == XML_STATUS_ERROR)
5098 xml_failure(g_parser);
5099 CharData_CheckXMLChars(&storage, expected);
5100 }
5101 END_TEST
5102
5103 /* Test little-endian UTF-16 given no explicit encoding.
5104 * The existing default encoding (UTF-8) is assumed to hold without a
5105 * BOM to contradict it, so the entity value will in fact provoke an
5106 * error because 0x00 is not a valid XML character. We parse the
5107 * whole buffer in one go rather than feeding it in byte by byte to
5108 * exercise different code paths in the initial scanning routines.
5109 */
START_TEST(test_ext_entity_utf16_unknown)5110 START_TEST(test_ext_entity_utf16_unknown) {
5111 const char *text = "<!DOCTYPE doc [\n"
5112 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5113 "]>\n"
5114 "<doc>&en;</doc>";
5115 ExtFaults2 test_data
5116 = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
5117 XML_ERROR_INVALID_TOKEN};
5118
5119 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
5120 XML_SetUserData(g_parser, &test_data);
5121 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5122 "Invalid character should not have been accepted");
5123 }
5124 END_TEST
5125
5126 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
START_TEST(test_ext_entity_utf8_non_bom)5127 START_TEST(test_ext_entity_utf8_non_bom) {
5128 const char *text = "<!DOCTYPE doc [\n"
5129 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5130 "]>\n"
5131 "<doc>&en;</doc>";
5132 ExtTest2 test_data
5133 = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
5134 3, NULL, NULL};
5135 #ifdef XML_UNICODE
5136 const XML_Char *expected = XCS("\xfec0");
5137 #else
5138 const XML_Char *expected = XCS("\xef\xbb\x80");
5139 #endif
5140 CharData storage;
5141
5142 CharData_Init(&storage);
5143 test_data.storage = &storage;
5144 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5145 XML_SetUserData(g_parser, &test_data);
5146 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5147 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5148 == XML_STATUS_ERROR)
5149 xml_failure(g_parser);
5150 CharData_CheckXMLChars(&storage, expected);
5151 }
5152 END_TEST
5153
5154 /* Test that UTF-8 in a CDATA section is correctly passed through */
START_TEST(test_utf8_in_cdata_section)5155 START_TEST(test_utf8_in_cdata_section) {
5156 const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
5157 #ifdef XML_UNICODE
5158 const XML_Char *expected = XCS("one \x00e9 two");
5159 #else
5160 const XML_Char *expected = XCS("one \xc3\xa9 two");
5161 #endif
5162
5163 run_character_check(text, expected);
5164 }
5165 END_TEST
5166
5167 /* Test that little-endian UTF-16 in a CDATA section is handled */
START_TEST(test_utf8_in_cdata_section_2)5168 START_TEST(test_utf8_in_cdata_section_2) {
5169 const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
5170 #ifdef XML_UNICODE
5171 const XML_Char *expected = XCS("\x00e9]\x00e9two");
5172 #else
5173 const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
5174 #endif
5175
5176 run_character_check(text, expected);
5177 }
5178 END_TEST
5179
START_TEST(test_utf8_in_start_tags)5180 START_TEST(test_utf8_in_start_tags) {
5181 struct test_case {
5182 bool goodName;
5183 bool goodNameStart;
5184 const char *tagName;
5185 };
5186
5187 // The idea with the tests below is this:
5188 // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
5189 // go to isNever and are hence not a concern.
5190 //
5191 // We start with a character that is a valid name character
5192 // (or even name-start character, see XML 1.0r4 spec) and then we flip
5193 // single bits at places where (1) the result leaves the UTF-8 encoding space
5194 // and (2) we stay in the same n-byte sequence family.
5195 //
5196 // The flipped bits are highlighted in angle brackets in comments,
5197 // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
5198 // the most significant bit to 1 to leave UTF-8 encoding space.
5199 struct test_case cases[] = {
5200 // 1-byte UTF-8: [0xxx xxxx]
5201 {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
5202 {false, false, "\xBA"}, // [<1>011 1010]
5203 {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
5204 {false, false, "\xB9"}, // [<1>011 1001]
5205
5206 // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
5207 {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
5208 // Arabic small waw U+06E5
5209 {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
5210 {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
5211 {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
5212 {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
5213 // combining char U+0301
5214 {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
5215 {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
5216 {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
5217
5218 // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
5219 {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
5220 // Devanagari Letter A U+0905
5221 {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
5222 {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
5223 {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
5224 {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
5225 {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
5226 {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
5227 // combining char U+0901
5228 {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
5229 {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
5230 {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
5231 {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
5232 {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
5233 };
5234 const bool atNameStart[] = {true, false};
5235
5236 size_t i = 0;
5237 char doc[1024];
5238 size_t failCount = 0;
5239
5240 // we need all the bytes to be parsed, but we don't want the errors that can
5241 // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
5242 if (g_reparseDeferralEnabledDefault) {
5243 return;
5244 }
5245
5246 for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
5247 size_t j = 0;
5248 for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
5249 const bool expectedSuccess
5250 = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
5251 snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
5252 cases[i].tagName);
5253 XML_Parser parser = XML_ParserCreate(NULL);
5254
5255 const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
5256 parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
5257
5258 bool success = true;
5259 if ((status == XML_STATUS_OK) != expectedSuccess) {
5260 success = false;
5261 }
5262 if ((status == XML_STATUS_ERROR)
5263 && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
5264 success = false;
5265 }
5266
5267 if (! success) {
5268 fprintf(
5269 stderr,
5270 "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
5271 (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
5272 (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
5273 failCount++;
5274 }
5275
5276 XML_ParserFree(parser);
5277 }
5278 }
5279
5280 if (failCount > 0) {
5281 fail("UTF-8 regression detected");
5282 }
5283 }
5284 END_TEST
5285
5286 /* Test trailing spaces in elements are accepted */
START_TEST(test_trailing_spaces_in_elements)5287 START_TEST(test_trailing_spaces_in_elements) {
5288 const char *text = "<doc >Hi</doc >";
5289 const XML_Char *expected = XCS("doc/doc");
5290 CharData storage;
5291
5292 CharData_Init(&storage);
5293 XML_SetElementHandler(g_parser, record_element_start_handler,
5294 record_element_end_handler);
5295 XML_SetUserData(g_parser, &storage);
5296 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5297 == XML_STATUS_ERROR)
5298 xml_failure(g_parser);
5299 CharData_CheckXMLChars(&storage, expected);
5300 }
5301 END_TEST
5302
START_TEST(test_utf16_attribute)5303 START_TEST(test_utf16_attribute) {
5304 const char text[] =
5305 /* <d {KHO KHWAI}{CHO CHAN}='a'/>
5306 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5307 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5308 */
5309 "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
5310 const XML_Char *expected = XCS("a");
5311 CharData storage;
5312
5313 CharData_Init(&storage);
5314 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5315 XML_SetUserData(g_parser, &storage);
5316 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5317 == XML_STATUS_ERROR)
5318 xml_failure(g_parser);
5319 CharData_CheckXMLChars(&storage, expected);
5320 }
5321 END_TEST
5322
START_TEST(test_utf16_second_attr)5323 START_TEST(test_utf16_second_attr) {
5324 /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
5325 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5326 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5327 */
5328 const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
5329 "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
5330 const XML_Char *expected = XCS("1");
5331 CharData storage;
5332
5333 CharData_Init(&storage);
5334 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5335 XML_SetUserData(g_parser, &storage);
5336 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5337 == XML_STATUS_ERROR)
5338 xml_failure(g_parser);
5339 CharData_CheckXMLChars(&storage, expected);
5340 }
5341 END_TEST
5342
START_TEST(test_attr_after_solidus)5343 START_TEST(test_attr_after_solidus) {
5344 const char *text = "<doc attr1='a' / attr2='b'>";
5345
5346 expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
5347 }
5348 END_TEST
5349
START_TEST(test_utf16_pe)5350 START_TEST(test_utf16_pe) {
5351 /* <!DOCTYPE doc [
5352 * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
5353 * %{KHO KHWAI}{CHO CHAN};
5354 * ]>
5355 * <doc></doc>
5356 *
5357 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5358 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5359 */
5360 const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
5361 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
5362 "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
5363 "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
5364 "\0%\x0e\x04\x0e\x08\0;\0\n"
5365 "\0]\0>\0\n"
5366 "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
5367 #ifdef XML_UNICODE
5368 const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
5369 #else
5370 const XML_Char *expected
5371 = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
5372 #endif
5373 CharData storage;
5374
5375 CharData_Init(&storage);
5376 XML_SetUserData(g_parser, &storage);
5377 XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
5378 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5379 == XML_STATUS_ERROR)
5380 xml_failure(g_parser);
5381 CharData_CheckXMLChars(&storage, expected);
5382 }
5383 END_TEST
5384
5385 /* Test that duff attribute description keywords are rejected */
START_TEST(test_bad_attr_desc_keyword)5386 START_TEST(test_bad_attr_desc_keyword) {
5387 const char *text = "<!DOCTYPE doc [\n"
5388 " <!ATTLIST doc attr CDATA #!IMPLIED>\n"
5389 "]>\n"
5390 "<doc />";
5391
5392 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5393 "Bad keyword !IMPLIED not faulted");
5394 }
5395 END_TEST
5396
5397 /* Test that an invalid attribute description keyword consisting of
5398 * UTF-16 characters with their top bytes non-zero are correctly
5399 * faulted
5400 */
START_TEST(test_bad_attr_desc_keyword_utf16)5401 START_TEST(test_bad_attr_desc_keyword_utf16) {
5402 /* <!DOCTYPE d [
5403 * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
5404 * ]><d/>
5405 *
5406 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5407 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5408 */
5409 const char text[]
5410 = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5411 "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
5412 "\0#\x0e\x04\x0e\x08\0>\0\n"
5413 "\0]\0>\0<\0d\0/\0>";
5414
5415 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5416 != XML_STATUS_ERROR)
5417 fail("Invalid UTF16 attribute keyword not faulted");
5418 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
5419 xml_failure(g_parser);
5420 }
5421 END_TEST
5422
5423 /* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this
5424 * using prefix-encoding (see above) to trigger specific code paths
5425 */
START_TEST(test_bad_doctype)5426 START_TEST(test_bad_doctype) {
5427 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
5428 "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
5429
5430 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5431 expect_failure(text, XML_ERROR_SYNTAX,
5432 "Invalid bytes in DOCTYPE not faulted");
5433 }
5434 END_TEST
5435
START_TEST(test_bad_doctype_utf8)5436 START_TEST(test_bad_doctype_utf8) {
5437 const char *text = "<!DOCTYPE \xDB\x25"
5438 "doc><doc/>"; // [1101 1011] [<0>010 0101]
5439 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5440 "Invalid UTF-8 in DOCTYPE not faulted");
5441 }
5442 END_TEST
5443
START_TEST(test_bad_doctype_utf16)5444 START_TEST(test_bad_doctype_utf16) {
5445 const char text[] =
5446 /* <!DOCTYPE doc [ \x06f2 ]><doc/>
5447 *
5448 * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
5449 * (name character) but not a valid letter (name start character)
5450 */
5451 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
5452 "\x06\xf2"
5453 "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
5454
5455 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5456 != XML_STATUS_ERROR)
5457 fail("Invalid bytes in DOCTYPE not faulted");
5458 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
5459 xml_failure(g_parser);
5460 }
5461 END_TEST
5462
START_TEST(test_bad_doctype_plus)5463 START_TEST(test_bad_doctype_plus) {
5464 const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
5465 "<1+>&foo;</1+>";
5466
5467 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5468 "'+' in document name not faulted");
5469 }
5470 END_TEST
5471
START_TEST(test_bad_doctype_star)5472 START_TEST(test_bad_doctype_star) {
5473 const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
5474 "<1*>&foo;</1*>";
5475
5476 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5477 "'*' in document name not faulted");
5478 }
5479 END_TEST
5480
START_TEST(test_bad_doctype_query)5481 START_TEST(test_bad_doctype_query) {
5482 const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
5483 "<1?>&foo;</1?>";
5484
5485 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5486 "'?' in document name not faulted");
5487 }
5488 END_TEST
5489
START_TEST(test_unknown_encoding_bad_ignore)5490 START_TEST(test_unknown_encoding_bad_ignore) {
5491 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
5492 "<!DOCTYPE doc SYSTEM 'foo'>"
5493 "<doc><e>&entity;</e></doc>";
5494 ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
5495 "Invalid character not faulted", XCS("prefix-conv"),
5496 XML_ERROR_INVALID_TOKEN};
5497
5498 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5499 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5500 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
5501 XML_SetUserData(g_parser, &fault);
5502 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5503 "Bad IGNORE section with unknown encoding not failed");
5504 }
5505 END_TEST
5506
START_TEST(test_entity_in_utf16_be_attr)5507 START_TEST(test_entity_in_utf16_be_attr) {
5508 const char text[] =
5509 /* <e a='ä ä'></e> */
5510 "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
5511 "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
5512 #ifdef XML_UNICODE
5513 const XML_Char *expected = XCS("\x00e4 \x00e4");
5514 #else
5515 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5516 #endif
5517 CharData storage;
5518
5519 CharData_Init(&storage);
5520 XML_SetUserData(g_parser, &storage);
5521 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5522 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5523 == XML_STATUS_ERROR)
5524 xml_failure(g_parser);
5525 CharData_CheckXMLChars(&storage, expected);
5526 }
5527 END_TEST
5528
START_TEST(test_entity_in_utf16_le_attr)5529 START_TEST(test_entity_in_utf16_le_attr) {
5530 const char text[] =
5531 /* <e a='ä ä'></e> */
5532 "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
5533 "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
5534 #ifdef XML_UNICODE
5535 const XML_Char *expected = XCS("\x00e4 \x00e4");
5536 #else
5537 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5538 #endif
5539 CharData storage;
5540
5541 CharData_Init(&storage);
5542 XML_SetUserData(g_parser, &storage);
5543 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5544 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5545 == XML_STATUS_ERROR)
5546 xml_failure(g_parser);
5547 CharData_CheckXMLChars(&storage, expected);
5548 }
5549 END_TEST
5550
START_TEST(test_entity_public_utf16_be)5551 START_TEST(test_entity_public_utf16_be) {
5552 const char text[] =
5553 /* <!DOCTYPE d [ */
5554 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5555 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5556 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5557 "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5558 /* %e; */
5559 "\0%\0e\0;\0\n"
5560 /* ]> */
5561 "\0]\0>\0\n"
5562 /* <d>&j;</d> */
5563 "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5564 ExtTest2 test_data
5565 = {/* <!ENTITY j 'baz'> */
5566 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5567 const XML_Char *expected = XCS("baz");
5568 CharData storage;
5569
5570 CharData_Init(&storage);
5571 test_data.storage = &storage;
5572 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5573 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5574 XML_SetUserData(g_parser, &test_data);
5575 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5576 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5577 == XML_STATUS_ERROR)
5578 xml_failure(g_parser);
5579 CharData_CheckXMLChars(&storage, expected);
5580 }
5581 END_TEST
5582
START_TEST(test_entity_public_utf16_le)5583 START_TEST(test_entity_public_utf16_le) {
5584 const char text[] =
5585 /* <!DOCTYPE d [ */
5586 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5587 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5588 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5589 "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5590 /* %e; */
5591 "%\0e\0;\0\n\0"
5592 /* ]> */
5593 "]\0>\0\n\0"
5594 /* <d>&j;</d> */
5595 "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5596 ExtTest2 test_data
5597 = {/* <!ENTITY j 'baz'> */
5598 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5599 const XML_Char *expected = XCS("baz");
5600 CharData storage;
5601
5602 CharData_Init(&storage);
5603 test_data.storage = &storage;
5604 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5605 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5606 XML_SetUserData(g_parser, &test_data);
5607 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5608 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5609 == XML_STATUS_ERROR)
5610 xml_failure(g_parser);
5611 CharData_CheckXMLChars(&storage, expected);
5612 }
5613 END_TEST
5614
5615 /* Test that a doctype with neither an internal nor external subset is
5616 * faulted
5617 */
START_TEST(test_short_doctype)5618 START_TEST(test_short_doctype) {
5619 const char *text = "<!DOCTYPE doc></doc>";
5620 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5621 "DOCTYPE without subset not rejected");
5622 }
5623 END_TEST
5624
START_TEST(test_short_doctype_2)5625 START_TEST(test_short_doctype_2) {
5626 const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5627 expect_failure(text, XML_ERROR_SYNTAX,
5628 "DOCTYPE without Public ID not rejected");
5629 }
5630 END_TEST
5631
START_TEST(test_short_doctype_3)5632 START_TEST(test_short_doctype_3) {
5633 const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5634 expect_failure(text, XML_ERROR_SYNTAX,
5635 "DOCTYPE without System ID not rejected");
5636 }
5637 END_TEST
5638
START_TEST(test_long_doctype)5639 START_TEST(test_long_doctype) {
5640 const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5641 expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5642 }
5643 END_TEST
5644
START_TEST(test_bad_entity)5645 START_TEST(test_bad_entity) {
5646 const char *text = "<!DOCTYPE doc [\n"
5647 " <!ENTITY foo PUBLIC>\n"
5648 "]>\n"
5649 "<doc/>";
5650 expect_failure(text, XML_ERROR_SYNTAX,
5651 "ENTITY without Public ID is not rejected");
5652 }
5653 END_TEST
5654
5655 /* Test unquoted value is faulted */
START_TEST(test_bad_entity_2)5656 START_TEST(test_bad_entity_2) {
5657 const char *text = "<!DOCTYPE doc [\n"
5658 " <!ENTITY % foo bar>\n"
5659 "]>\n"
5660 "<doc/>";
5661 expect_failure(text, XML_ERROR_SYNTAX,
5662 "ENTITY without Public ID is not rejected");
5663 }
5664 END_TEST
5665
START_TEST(test_bad_entity_3)5666 START_TEST(test_bad_entity_3) {
5667 const char *text = "<!DOCTYPE doc [\n"
5668 " <!ENTITY % foo PUBLIC>\n"
5669 "]>\n"
5670 "<doc/>";
5671 expect_failure(text, XML_ERROR_SYNTAX,
5672 "Parameter ENTITY without Public ID is not rejected");
5673 }
5674 END_TEST
5675
START_TEST(test_bad_entity_4)5676 START_TEST(test_bad_entity_4) {
5677 const char *text = "<!DOCTYPE doc [\n"
5678 " <!ENTITY % foo SYSTEM>\n"
5679 "]>\n"
5680 "<doc/>";
5681 expect_failure(text, XML_ERROR_SYNTAX,
5682 "Parameter ENTITY without Public ID is not rejected");
5683 }
5684 END_TEST
5685
START_TEST(test_bad_notation)5686 START_TEST(test_bad_notation) {
5687 const char *text = "<!DOCTYPE doc [\n"
5688 " <!NOTATION n SYSTEM>\n"
5689 "]>\n"
5690 "<doc/>";
5691 expect_failure(text, XML_ERROR_SYNTAX,
5692 "Notation without System ID is not rejected");
5693 }
5694 END_TEST
5695
5696 /* Test for issue #11, wrongly suppressed default handler */
START_TEST(test_default_doctype_handler)5697 START_TEST(test_default_doctype_handler) {
5698 const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5699 " <!ENTITY foo 'bar'>\n"
5700 "]>\n"
5701 "<doc>&foo;</doc>";
5702 DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5703 {XCS("'test.dtd'"), 10, XML_FALSE},
5704 {NULL, 0, XML_FALSE}};
5705 int i;
5706
5707 XML_SetUserData(g_parser, &test_data);
5708 XML_SetDefaultHandler(g_parser, checking_default_handler);
5709 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5710 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5711 == XML_STATUS_ERROR)
5712 xml_failure(g_parser);
5713 for (i = 0; test_data[i].expected != NULL; i++)
5714 if (! test_data[i].seen)
5715 fail("Default handler not run for public !DOCTYPE");
5716 }
5717 END_TEST
5718
START_TEST(test_empty_element_abort)5719 START_TEST(test_empty_element_abort) {
5720 const char *text = "<abort/>";
5721
5722 XML_SetStartElementHandler(g_parser, start_element_suspender);
5723 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5724 != XML_STATUS_ERROR)
5725 fail("Expected to error on abort");
5726 }
5727 END_TEST
5728
5729 /* Regression test for GH issue #612: unfinished m_declAttributeType
5730 * allocation in ->m_tempPool can corrupt following allocation.
5731 */
START_TEST(test_pool_integrity_with_unfinished_attr)5732 START_TEST(test_pool_integrity_with_unfinished_attr) {
5733 const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5734 "<!DOCTYPE foo [\n"
5735 "<!ELEMENT foo ANY>\n"
5736 "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5737 "%entp;\n"
5738 "]>\n"
5739 "<a></a>\n";
5740 const XML_Char *expected = XCS("COMMENT");
5741 CharData storage;
5742
5743 CharData_Init(&storage);
5744 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5745 XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5746 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5747 XML_SetCommentHandler(g_parser, accumulate_comment);
5748 XML_SetUserData(g_parser, &storage);
5749 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5750 == XML_STATUS_ERROR)
5751 xml_failure(g_parser);
5752 CharData_CheckXMLChars(&storage, expected);
5753 }
5754 END_TEST
5755
5756 /* Test a possible early return location in internalEntityProcessor */
START_TEST(test_entity_ref_no_elements)5757 START_TEST(test_entity_ref_no_elements) {
5758 const char *const text = "<!DOCTYPE foo [\n"
5759 "<!ENTITY e1 \"test\">\n"
5760 "]> <foo>&e1;"; // intentionally missing newline
5761
5762 XML_Parser parser = XML_ParserCreate(NULL);
5763 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5764 == XML_STATUS_ERROR);
5765 assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS);
5766 XML_ParserFree(parser);
5767 }
5768 END_TEST
5769
5770 /* Tests if chained entity references lead to unbounded recursion */
START_TEST(test_deep_nested_entity)5771 START_TEST(test_deep_nested_entity) {
5772 const size_t N_LINES = 60000;
5773 const size_t SIZE_PER_LINE = 50;
5774
5775 char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE);
5776 if (text == NULL) {
5777 fail("malloc failed");
5778 }
5779
5780 char *textPtr = text;
5781
5782 // Create the XML
5783 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5784 "<!DOCTYPE foo [\n"
5785 " <!ENTITY s0 'deepText'>\n");
5786
5787 for (size_t i = 1; i < N_LINES; ++i) {
5788 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n",
5789 (long unsigned)i, (long unsigned)(i - 1));
5790 }
5791
5792 snprintf(textPtr, SIZE_PER_LINE, "]> <foo>&s%lu;</foo>\n",
5793 (long unsigned)(N_LINES - 1));
5794
5795 const XML_Char *const expected = XCS("deepText");
5796
5797 CharData storage;
5798 CharData_Init(&storage);
5799
5800 XML_Parser parser = XML_ParserCreate(NULL);
5801
5802 XML_SetCharacterDataHandler(parser, accumulate_characters);
5803 XML_SetUserData(parser, &storage);
5804
5805 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5806 == XML_STATUS_ERROR)
5807 xml_failure(parser);
5808
5809 CharData_CheckXMLChars(&storage, expected);
5810 XML_ParserFree(parser);
5811 free(text);
5812 }
5813 END_TEST
5814
5815 /* Tests if chained entity references in attributes
5816 lead to unbounded recursion */
START_TEST(test_deep_nested_attribute_entity)5817 START_TEST(test_deep_nested_attribute_entity) {
5818 const size_t N_LINES = 60000;
5819 const size_t SIZE_PER_LINE = 100;
5820
5821 char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE);
5822 if (text == NULL) {
5823 fail("malloc failed");
5824 }
5825
5826 char *textPtr = text;
5827
5828 // Create the XML
5829 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5830 "<!DOCTYPE foo [\n"
5831 " <!ENTITY s0 'deepText'>\n");
5832
5833 for (size_t i = 1; i < N_LINES; ++i) {
5834 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n",
5835 (long unsigned)i, (long unsigned)(i - 1));
5836 }
5837
5838 snprintf(textPtr, SIZE_PER_LINE, "]> <foo name='&s%lu;'>mainText</foo>\n",
5839 (long unsigned)(N_LINES - 1));
5840
5841 AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}};
5842 ElementInfo info[]
5843 = {{XCS("foo"), 1, 0, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
5844
5845 XML_Parser parser = XML_ParserCreate(NULL);
5846 ParserAndElementInfo parserPlusElemenInfo = {parser, info};
5847
5848 XML_SetStartElementHandler(parser, counting_start_element_handler);
5849 XML_SetUserData(parser, &parserPlusElemenInfo);
5850
5851 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5852 == XML_STATUS_ERROR)
5853 xml_failure(parser);
5854
5855 XML_ParserFree(parser);
5856 free(text);
5857 }
5858 END_TEST
5859
START_TEST(test_deep_nested_entity_delayed_interpretation)5860 START_TEST(test_deep_nested_entity_delayed_interpretation) {
5861 const size_t N_LINES = 70000;
5862 const size_t SIZE_PER_LINE = 100;
5863
5864 char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE);
5865 if (text == NULL) {
5866 fail("malloc failed");
5867 }
5868
5869 char *textPtr = text;
5870
5871 // Create the XML
5872 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5873 "<!DOCTYPE foo [\n"
5874 " <!ENTITY %% s0 'deepText'>\n");
5875
5876 for (size_t i = 1; i < N_LINES; ++i) {
5877 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5878 " <!ENTITY %% s%lu '%s%lu;'>\n", (long unsigned)i,
5879 (long unsigned)(i - 1));
5880 }
5881
5882 snprintf(textPtr, SIZE_PER_LINE,
5883 " <!ENTITY %% define_g \"<!ENTITY g '%s%lu;'>\">\n"
5884 " %%define_g;\n"
5885 "]>\n"
5886 "<foo/>\n",
5887 (long unsigned)(N_LINES - 1));
5888
5889 XML_Parser parser = XML_ParserCreate(NULL);
5890
5891 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5892 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5893 == XML_STATUS_ERROR)
5894 xml_failure(parser);
5895
5896 XML_ParserFree(parser);
5897 free(text);
5898 }
5899 END_TEST
5900
START_TEST(test_nested_entity_suspend)5901 START_TEST(test_nested_entity_suspend) {
5902 const char *const text = "<!DOCTYPE a [\n"
5903 " <!ENTITY e1 '<!--e1-->'>\n"
5904 " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5905 " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5906 "]>\n"
5907 "<a><!--start-->&e3;<!--end--></a>";
5908 const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5909 XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5910 CharData storage;
5911 CharData_Init(&storage);
5912 XML_Parser parser = XML_ParserCreate(NULL);
5913 ParserPlusStorage parserPlusStorage = {parser, &storage};
5914
5915 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5916 XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5917 XML_SetUserData(parser, &parserPlusStorage);
5918
5919 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5920 while (status == XML_STATUS_SUSPENDED) {
5921 status = XML_ResumeParser(parser);
5922 }
5923 if (status != XML_STATUS_OK)
5924 xml_failure(parser);
5925
5926 CharData_CheckXMLChars(&storage, expected);
5927 XML_ParserFree(parser);
5928 }
5929 END_TEST
5930
START_TEST(test_nested_entity_suspend_2)5931 START_TEST(test_nested_entity_suspend_2) {
5932 const char *const text = "<!DOCTYPE doc [\n"
5933 " <!ENTITY ge1 'head1Ztail1'>\n"
5934 " <!ENTITY ge2 'head2&ge1;tail2'>\n"
5935 " <!ENTITY ge3 'head3&ge2;tail3'>\n"
5936 "]>\n"
5937 "<doc>&ge3;</doc>";
5938 const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1")
5939 XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3");
5940 CharData storage;
5941 CharData_Init(&storage);
5942 XML_Parser parser = XML_ParserCreate(NULL);
5943 ParserPlusStorage parserPlusStorage = {parser, &storage};
5944
5945 XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend);
5946 XML_SetUserData(parser, &parserPlusStorage);
5947
5948 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5949 while (status == XML_STATUS_SUSPENDED) {
5950 status = XML_ResumeParser(parser);
5951 }
5952 if (status != XML_STATUS_OK)
5953 xml_failure(parser);
5954
5955 CharData_CheckXMLChars(&storage, expected);
5956 XML_ParserFree(parser);
5957 }
5958 END_TEST
5959
5960 /* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_scale_linearly)5961 START_TEST(test_big_tokens_scale_linearly) {
5962 const struct {
5963 const char *pre;
5964 const char *post;
5965 } text[] = {
5966 {"<a>", "</a>"}, // assumed good, used as baseline
5967 {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5968 {"<c attr='", "'></c>"}, // big attribute, used to be O(N²)
5969 {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²)
5970 {"<e><", "/></e>"}, // big elem name, used to be O(N²)
5971 };
5972 const int num_cases = sizeof(text) / sizeof(text[0]);
5973 char aaaaaa[4096];
5974 const int fillsize = (int)sizeof(aaaaaa);
5975 const int fillcount = 100;
5976 const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5977 const unsigned max_factor = 4;
5978 const unsigned max_scanned = max_factor * approx_bytes;
5979
5980 memset(aaaaaa, 'a', fillsize);
5981
5982 if (! g_reparseDeferralEnabledDefault) {
5983 return; // heuristic is disabled; we would get O(n^2) and fail.
5984 }
5985
5986 for (int i = 0; i < num_cases; ++i) {
5987 XML_Parser parser = XML_ParserCreate(NULL);
5988 assert_true(parser != NULL);
5989 enum XML_Status status;
5990 set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
5991
5992 // parse the start text
5993 g_bytesScanned = 0;
5994 status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5995 (int)strlen(text[i].pre), XML_FALSE);
5996 if (status != XML_STATUS_OK) {
5997 xml_failure(parser);
5998 }
5999
6000 // parse lots of 'a', failing the test early if it takes too long
6001 unsigned past_max_count = 0;
6002 for (int f = 0; f < fillcount; ++f) {
6003 status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
6004 if (status != XML_STATUS_OK) {
6005 xml_failure(parser);
6006 }
6007 if (g_bytesScanned > max_scanned) {
6008 // We're not done, and have already passed the limit -- the test will
6009 // definitely fail. This block allows us to save time by failing early.
6010 const unsigned pushed
6011 = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
6012 fprintf(
6013 stderr,
6014 "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
6015 f + 1, fillcount, pushed, g_bytesScanned,
6016 g_bytesScanned / (double)pushed, max_scanned, max_factor);
6017 past_max_count++;
6018 // We are failing, but allow a few log prints first. If we don't reach
6019 // a count of five, the test will fail after the loop instead.
6020 assert_true(past_max_count < 5);
6021 }
6022 }
6023
6024 // parse the end text
6025 status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
6026 (int)strlen(text[i].post), XML_TRUE);
6027 if (status != XML_STATUS_OK) {
6028 xml_failure(parser);
6029 }
6030
6031 assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
6032 if (g_bytesScanned > max_scanned) {
6033 fprintf(
6034 stderr,
6035 "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
6036 g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
6037 max_factor);
6038 fail("scanned too many bytes");
6039 }
6040
6041 XML_ParserFree(parser);
6042 }
6043 }
6044 END_TEST
6045
START_TEST(test_set_reparse_deferral)6046 START_TEST(test_set_reparse_deferral) {
6047 const char *const pre = "<d>";
6048 const char *const start = "<x attr='";
6049 const char *const end = "'></x>";
6050 char eeeeee[100];
6051 const int fillsize = (int)sizeof(eeeeee);
6052 memset(eeeeee, 'e', fillsize);
6053
6054 for (int enabled = 0; enabled <= 1; enabled += 1) {
6055 set_subtest("deferral=%d", enabled);
6056
6057 XML_Parser parser = XML_ParserCreate(NULL);
6058 assert_true(parser != NULL);
6059 assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
6060 // pre-grow the buffer to avoid reparsing due to almost-fullness
6061 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
6062
6063 CharData storage;
6064 CharData_Init(&storage);
6065 XML_SetUserData(parser, &storage);
6066 XML_SetStartElementHandler(parser, start_element_event_handler);
6067
6068 enum XML_Status status;
6069 // parse the start text
6070 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
6071 if (status != XML_STATUS_OK) {
6072 xml_failure(parser);
6073 }
6074 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
6075
6076 // ..and the start of the token
6077 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
6078 if (status != XML_STATUS_OK) {
6079 xml_failure(parser);
6080 }
6081 CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
6082
6083 // try to parse lots of 'e', but the token isn't finished
6084 for (int c = 0; c < 100; ++c) {
6085 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
6086 if (status != XML_STATUS_OK) {
6087 xml_failure(parser);
6088 }
6089 }
6090 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
6091
6092 // end the <x> token.
6093 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
6094 if (status != XML_STATUS_OK) {
6095 xml_failure(parser);
6096 }
6097
6098 if (enabled) {
6099 // In general, we may need to push more data to trigger a reparse attempt,
6100 // but in this test, the data is constructed to always require it.
6101 CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
6102 // 2x the token length should suffice; the +1 covers the start and end.
6103 for (int c = 0; c < 101; ++c) {
6104 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
6105 if (status != XML_STATUS_OK) {
6106 xml_failure(parser);
6107 }
6108 }
6109 }
6110 CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
6111
6112 XML_ParserFree(parser);
6113 }
6114 }
6115 END_TEST
6116
6117 struct element_decl_data {
6118 XML_Parser parser;
6119 int count;
6120 };
6121
6122 static void
element_decl_counter(void * userData,const XML_Char * name,XML_Content * model)6123 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
6124 UNUSED_P(name);
6125 struct element_decl_data *testdata = (struct element_decl_data *)userData;
6126 testdata->count += 1;
6127 XML_FreeContentModel(testdata->parser, model);
6128 }
6129
6130 static int
external_inherited_parser(XML_Parser p,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)6131 external_inherited_parser(XML_Parser p, const XML_Char *context,
6132 const XML_Char *base, const XML_Char *systemId,
6133 const XML_Char *publicId) {
6134 UNUSED_P(base);
6135 UNUSED_P(systemId);
6136 UNUSED_P(publicId);
6137 const char *const pre = "<!ELEMENT document ANY>\n";
6138 const char *const start = "<!ELEMENT ";
6139 const char *const end = " ANY>\n";
6140 const char *const post = "<!ELEMENT xyz ANY>\n";
6141 const int enabled = *(int *)XML_GetUserData(p);
6142 char eeeeee[100];
6143 char spaces[100];
6144 const int fillsize = (int)sizeof(eeeeee);
6145 assert_true(fillsize == (int)sizeof(spaces));
6146 memset(eeeeee, 'e', fillsize);
6147 memset(spaces, ' ', fillsize);
6148
6149 XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
6150 assert_true(parser != NULL);
6151 // pre-grow the buffer to avoid reparsing due to almost-fullness
6152 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
6153
6154 struct element_decl_data testdata;
6155 testdata.parser = parser;
6156 testdata.count = 0;
6157 XML_SetUserData(parser, &testdata);
6158 XML_SetElementDeclHandler(parser, element_decl_counter);
6159
6160 enum XML_Status status;
6161 // parse the initial text
6162 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
6163 if (status != XML_STATUS_OK) {
6164 xml_failure(parser);
6165 }
6166 assert_true(testdata.count == 1); // first element should be done
6167
6168 // ..and the start of the big token
6169 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
6170 if (status != XML_STATUS_OK) {
6171 xml_failure(parser);
6172 }
6173 assert_true(testdata.count == 1); // still just the first one
6174
6175 // try to parse lots of 'e', but the token isn't finished
6176 for (int c = 0; c < 100; ++c) {
6177 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
6178 if (status != XML_STATUS_OK) {
6179 xml_failure(parser);
6180 }
6181 }
6182 assert_true(testdata.count == 1); // *still* just the first one
6183
6184 // end the big token.
6185 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
6186 if (status != XML_STATUS_OK) {
6187 xml_failure(parser);
6188 }
6189
6190 if (enabled) {
6191 // In general, we may need to push more data to trigger a reparse attempt,
6192 // but in this test, the data is constructed to always require it.
6193 assert_true(testdata.count == 1); // or the test is incorrect
6194 // 2x the token length should suffice; the +1 covers the start and end.
6195 for (int c = 0; c < 101; ++c) {
6196 status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
6197 if (status != XML_STATUS_OK) {
6198 xml_failure(parser);
6199 }
6200 }
6201 }
6202 assert_true(testdata.count == 2); // the big token should be done
6203
6204 // parse the final text
6205 status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
6206 if (status != XML_STATUS_OK) {
6207 xml_failure(parser);
6208 }
6209 assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
6210
6211 XML_ParserFree(parser);
6212 return XML_STATUS_OK;
6213 }
6214
START_TEST(test_reparse_deferral_is_inherited)6215 START_TEST(test_reparse_deferral_is_inherited) {
6216 const char *const text
6217 = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
6218 for (int enabled = 0; enabled <= 1; ++enabled) {
6219 set_subtest("deferral=%d", enabled);
6220
6221 XML_Parser parser = XML_ParserCreate(NULL);
6222 assert_true(parser != NULL);
6223 XML_SetUserData(parser, (void *)&enabled);
6224 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
6225 // this handler creates a sub-parser and checks that its deferral behavior
6226 // is what we expected, based on the value of `enabled` (in userdata).
6227 XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
6228 assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
6229 if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
6230 xml_failure(parser);
6231
6232 XML_ParserFree(parser);
6233 }
6234 }
6235 END_TEST
6236
START_TEST(test_set_reparse_deferral_on_null_parser)6237 START_TEST(test_set_reparse_deferral_on_null_parser) {
6238 assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
6239 assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
6240 assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
6241 assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
6242 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
6243 == XML_FALSE);
6244 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
6245 == XML_FALSE);
6246 }
6247 END_TEST
6248
START_TEST(test_set_reparse_deferral_on_the_fly)6249 START_TEST(test_set_reparse_deferral_on_the_fly) {
6250 const char *const pre = "<d><x attr='";
6251 const char *const end = "'></x>";
6252 char iiiiii[100];
6253 const int fillsize = (int)sizeof(iiiiii);
6254 memset(iiiiii, 'i', fillsize);
6255
6256 XML_Parser parser = XML_ParserCreate(NULL);
6257 assert_true(parser != NULL);
6258 assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
6259
6260 CharData storage;
6261 CharData_Init(&storage);
6262 XML_SetUserData(parser, &storage);
6263 XML_SetStartElementHandler(parser, start_element_event_handler);
6264
6265 enum XML_Status status;
6266 // parse the start text
6267 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
6268 if (status != XML_STATUS_OK) {
6269 xml_failure(parser);
6270 }
6271 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
6272
6273 // try to parse some 'i', but the token isn't finished
6274 status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
6275 if (status != XML_STATUS_OK) {
6276 xml_failure(parser);
6277 }
6278 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
6279
6280 // end the <x> token.
6281 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
6282 if (status != XML_STATUS_OK) {
6283 xml_failure(parser);
6284 }
6285 CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
6286
6287 // now change the heuristic setting and add *no* data
6288 assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
6289 // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
6290 status = XML_Parse(parser, "", 0, XML_FALSE);
6291 if (status != XML_STATUS_OK) {
6292 xml_failure(parser);
6293 }
6294 CharData_CheckXMLChars(&storage, XCS("dx"));
6295
6296 XML_ParserFree(parser);
6297 }
6298 END_TEST
6299
START_TEST(test_set_bad_reparse_option)6300 START_TEST(test_set_bad_reparse_option) {
6301 XML_Parser parser = XML_ParserCreate(NULL);
6302 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
6303 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
6304 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
6305 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
6306 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
6307 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
6308 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
6309 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
6310 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
6311 XML_ParserFree(parser);
6312 }
6313 END_TEST
6314
6315 static size_t g_totalAlloc = 0;
6316 static size_t g_biggestAlloc = 0;
6317
6318 static void *
counting_realloc(void * ptr,size_t size)6319 counting_realloc(void *ptr, size_t size) {
6320 g_totalAlloc += size;
6321 if (size > g_biggestAlloc) {
6322 g_biggestAlloc = size;
6323 }
6324 return realloc(ptr, size);
6325 }
6326
6327 static void *
counting_malloc(size_t size)6328 counting_malloc(size_t size) {
6329 return counting_realloc(NULL, size);
6330 }
6331
START_TEST(test_bypass_heuristic_when_close_to_bufsize)6332 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
6333 if (g_chunkSize != 0) {
6334 // this test does not use SINGLE_BYTES, because it depends on very precise
6335 // buffer fills.
6336 return;
6337 }
6338 if (! g_reparseDeferralEnabledDefault) {
6339 return; // this test is irrelevant when the deferral heuristic is disabled.
6340 }
6341
6342 const int document_length = 65536;
6343 char *const document = malloc(document_length);
6344 assert_true(document != NULL);
6345
6346 const XML_Memory_Handling_Suite memfuncs = {
6347 counting_malloc,
6348 counting_realloc,
6349 free,
6350 };
6351
6352 const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
6353 const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
6354 const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
6355
6356 for (const int *leading = leading_list; *leading >= 0; leading++) {
6357 for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
6358 for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
6359 set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
6360 *fillsize);
6361 // start by checking that the test looks reasonably valid
6362 assert_true(*leading + *bigtoken <= document_length);
6363
6364 // put 'x' everywhere; some will be overwritten by elements.
6365 memset(document, 'x', document_length);
6366 // maybe add an initial tag
6367 if (*leading) {
6368 assert_true(*leading >= 3); // or the test case is invalid
6369 memcpy(document, "<a>", 3);
6370 }
6371 // add the large token
6372 document[*leading + 0] = '<';
6373 document[*leading + 1] = 'b';
6374 memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
6375 document[*leading + *bigtoken - 1] = '>';
6376
6377 // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
6378 const int expected_elem_total = 1 + (*leading ? 1 : 0);
6379
6380 XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
6381 assert_true(parser != NULL);
6382
6383 CharData storage;
6384 CharData_Init(&storage);
6385 XML_SetUserData(parser, &storage);
6386 XML_SetStartElementHandler(parser, start_element_event_handler);
6387
6388 g_biggestAlloc = 0;
6389 g_totalAlloc = 0;
6390 int offset = 0;
6391 // fill data until the big token is covered (but not necessarily parsed)
6392 while (offset < *leading + *bigtoken) {
6393 assert_true(offset + *fillsize <= document_length);
6394 const enum XML_Status status
6395 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6396 if (status != XML_STATUS_OK) {
6397 xml_failure(parser);
6398 }
6399 offset += *fillsize;
6400 }
6401 // Now, check that we've had a buffer allocation that could fit the
6402 // context bytes and our big token. In order to detect a special case,
6403 // we need to know how many bytes of our big token were included in the
6404 // first push that contained _any_ bytes of the big token:
6405 const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
6406 if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
6407 // Special case: we aren't saving any context, and the whole big token
6408 // was covered by a single fill, so Expat may have parsed directly
6409 // from our input pointer, without allocating an internal buffer.
6410 } else if (*leading < XML_CONTEXT_BYTES) {
6411 assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
6412 } else {
6413 assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
6414 }
6415 // fill data until the big token is actually parsed
6416 while (storage.count < expected_elem_total) {
6417 const size_t alloc_before = g_totalAlloc;
6418 assert_true(offset + *fillsize <= document_length);
6419 const enum XML_Status status
6420 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6421 if (status != XML_STATUS_OK) {
6422 xml_failure(parser);
6423 }
6424 offset += *fillsize;
6425 // since all the bytes of the big token are already in the buffer,
6426 // the bufsize ceiling should make us finish its parsing without any
6427 // further buffer allocations. We assume that there will be no other
6428 // large allocations in this test.
6429 assert_true(g_totalAlloc - alloc_before < 4096);
6430 }
6431 // test-the-test: was our alloc even called?
6432 assert_true(g_totalAlloc > 0);
6433 // test-the-test: there shouldn't be any extra start elements
6434 assert_true(storage.count == expected_elem_total);
6435
6436 XML_ParserFree(parser);
6437 }
6438 }
6439 }
6440 free(document);
6441 }
6442 END_TEST
6443
START_TEST(test_varying_buffer_fills)6444 START_TEST(test_varying_buffer_fills) {
6445 const int KiB = 1024;
6446 const int MiB = 1024 * KiB;
6447 const int document_length = 16 * MiB;
6448 const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
6449
6450 if (g_chunkSize != 0) {
6451 return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
6452 }
6453
6454 char *const document = malloc(document_length);
6455 assert_true(document != NULL);
6456 memset(document, 'x', document_length);
6457 document[0] = '<';
6458 document[1] = 't';
6459 memset(&document[2], ' ', big - 2); // a very spacy token
6460 document[big - 1] = '>';
6461
6462 // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
6463 // When reparse deferral is enabled, the final (negated) value is the expected
6464 // maximum number of bytes scanned in parse attempts.
6465 const int testcases[][30] = {
6466 {8 * MiB, -8 * MiB},
6467 {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
6468 // zero-size fills shouldn't trigger the bypass
6469 {4 * MiB, 0, 4 * MiB, -12 * MiB},
6470 {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
6471 {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
6472 // try to hit the buffer ceiling only once (at the end)
6473 {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
6474 // try to hit the same buffer ceiling multiple times
6475 {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
6476
6477 // try to hit every ceiling, by always landing 1K shy of the buffer size
6478 {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
6479 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
6480
6481 // try to avoid every ceiling, by always landing 1B past the buffer size
6482 // the normal 2x heuristic threshold still forces parse attempts.
6483 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
6484 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
6485 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
6486 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
6487 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6488 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6489 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7
6490 -(10 * MiB + 682 * KiB + 7)},
6491 // try to avoid every ceiling again, except on our last fill.
6492 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
6493 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
6494 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
6495 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
6496 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6497 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6498 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
6499 -(10 * MiB + 682 * KiB + 6)},
6500
6501 // try to hit ceilings on the way multiple times
6502 {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
6503 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
6504 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer
6505 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer
6506 // we'll make a parse attempt at every parse call
6507 -(45 * MiB + 12)},
6508 };
6509 const int testcount = sizeof(testcases) / sizeof(testcases[0]);
6510 for (int test_i = 0; test_i < testcount; test_i++) {
6511 const int *fillsize = testcases[test_i];
6512 set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
6513 fillsize[2], fillsize[3]);
6514 XML_Parser parser = XML_ParserCreate(NULL);
6515 assert_true(parser != NULL);
6516
6517 CharData storage;
6518 CharData_Init(&storage);
6519 XML_SetUserData(parser, &storage);
6520 XML_SetStartElementHandler(parser, start_element_event_handler);
6521
6522 g_bytesScanned = 0;
6523 int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
6524 int offset = 0;
6525 while (*fillsize >= 0) {
6526 assert_true(offset + *fillsize <= document_length); // or test is invalid
6527 const enum XML_Status status
6528 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6529 if (status != XML_STATUS_OK) {
6530 xml_failure(parser);
6531 }
6532 offset += *fillsize;
6533 fillsize++;
6534 assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
6535 worstcase_bytes += offset; // we might've tried to parse all pending bytes
6536 }
6537 assert_true(storage.count == 1); // the big token should've been parsed
6538 assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
6539 if (g_reparseDeferralEnabledDefault) {
6540 // heuristic is enabled; some XML_Parse calls may have deferred reparsing
6541 const unsigned max_bytes_scanned = -*fillsize;
6542 if (g_bytesScanned > max_bytes_scanned) {
6543 fprintf(stderr,
6544 "bytes scanned in parse attempts: actual=%u limit=%u \n",
6545 g_bytesScanned, max_bytes_scanned);
6546 fail("too many bytes scanned in parse attempts");
6547 }
6548 }
6549 assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
6550
6551 XML_ParserFree(parser);
6552 }
6553 free(document);
6554 }
6555 END_TEST
6556
START_TEST(test_empty_ext_param_entity_in_value)6557 START_TEST(test_empty_ext_param_entity_in_value) {
6558 const char *text = "<!DOCTYPE r SYSTEM \"ext.dtd\"><r/>";
6559 ExtOption options[] = {
6560 {XCS("ext.dtd"), "<!ENTITY % pe SYSTEM \"empty\">"
6561 "<!ENTITY ge \"%pe;\">"},
6562 {XCS("empty"), ""},
6563 {NULL, NULL},
6564 };
6565
6566 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
6567 XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner);
6568 XML_SetUserData(g_parser, options);
6569 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
6570 == XML_STATUS_ERROR)
6571 xml_failure(g_parser);
6572 }
6573 END_TEST
6574
6575 void
make_basic_test_case(Suite * s)6576 make_basic_test_case(Suite *s) {
6577 TCase *tc_basic = tcase_create("basic tests");
6578
6579 suite_add_tcase(s, tc_basic);
6580 tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
6581
6582 tcase_add_test(tc_basic, test_nul_byte);
6583 tcase_add_test(tc_basic, test_u0000_char);
6584 tcase_add_test(tc_basic, test_siphash_self);
6585 tcase_add_test(tc_basic, test_siphash_spec);
6586 tcase_add_test(tc_basic, test_bom_utf8);
6587 tcase_add_test(tc_basic, test_bom_utf16_be);
6588 tcase_add_test(tc_basic, test_bom_utf16_le);
6589 tcase_add_test(tc_basic, test_nobom_utf16_le);
6590 tcase_add_test(tc_basic, test_hash_collision);
6591 tcase_add_test(tc_basic, test_hash_salt_setter);
6592 tcase_add_test(tc_basic, test_illegal_utf8);
6593 tcase_add_test(tc_basic, test_utf8_auto_align);
6594 tcase_add_test(tc_basic, test_utf16);
6595 tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
6596 tcase_add_test(tc_basic, test_not_utf16);
6597 tcase_add_test(tc_basic, test_bad_encoding);
6598 tcase_add_test(tc_basic, test_latin1_umlauts);
6599 tcase_add_test(tc_basic, test_long_utf8_character);
6600 tcase_add_test(tc_basic, test_long_latin1_attribute);
6601 tcase_add_test(tc_basic, test_long_ascii_attribute);
6602 /* Regression test for SF bug #491986. */
6603 tcase_add_test(tc_basic, test_danish_latin1);
6604 /* Regression test for SF bug #514281. */
6605 tcase_add_test(tc_basic, test_french_charref_hexidecimal);
6606 tcase_add_test(tc_basic, test_french_charref_decimal);
6607 tcase_add_test(tc_basic, test_french_latin1);
6608 tcase_add_test(tc_basic, test_french_utf8);
6609 tcase_add_test(tc_basic, test_utf8_false_rejection);
6610 tcase_add_test(tc_basic, test_line_number_after_parse);
6611 tcase_add_test(tc_basic, test_column_number_after_parse);
6612 tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
6613 tcase_add_test(tc_basic, test_line_number_after_error);
6614 tcase_add_test(tc_basic, test_column_number_after_error);
6615 tcase_add_test(tc_basic, test_really_long_lines);
6616 tcase_add_test(tc_basic, test_really_long_encoded_lines);
6617 tcase_add_test(tc_basic, test_end_element_events);
6618 tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
6619 tcase_add_test(tc_basic, test_attr_whitespace_normalization);
6620 tcase_add_test(tc_basic, test_xmldecl_misplaced);
6621 tcase_add_test(tc_basic, test_xmldecl_invalid);
6622 tcase_add_test(tc_basic, test_xmldecl_missing_attr);
6623 tcase_add_test(tc_basic, test_xmldecl_missing_value);
6624 tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
6625 tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
6626 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
6627 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
6628 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
6629 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
6630 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
6631 tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
6632 tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
6633 tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
6634 tcase_add_test(tc_basic,
6635 test_wfc_undeclared_entity_with_external_subset_standalone);
6636 tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
6637 tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
6638 tcase_add_test(tc_basic, test_not_standalone_handler_reject);
6639 tcase_add_test(tc_basic, test_not_standalone_handler_accept);
6640 tcase_add_test(tc_basic, test_entity_start_tag_level_greater_than_one);
6641 tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
6642 tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs);
6643 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
6644 tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
6645 tcase_add_test(tc_basic, test_dtd_attr_handling);
6646 tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
6647 tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
6648 tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
6649 tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
6650 tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
6651 tcase_add_test(tc_basic, test_good_cdata_ascii);
6652 tcase_add_test(tc_basic, test_good_cdata_utf16);
6653 tcase_add_test(tc_basic, test_good_cdata_utf16_le);
6654 tcase_add_test(tc_basic, test_long_cdata_utf16);
6655 tcase_add_test(tc_basic, test_multichar_cdata_utf16);
6656 tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
6657 tcase_add_test(tc_basic, test_bad_cdata);
6658 tcase_add_test(tc_basic, test_bad_cdata_utf16);
6659 tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
6660 tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
6661 tcase_add_test(tc_basic, test_memory_allocation);
6662 tcase_add_test__if_xml_ge(tc_basic, test_default_current);
6663 tcase_add_test(tc_basic, test_dtd_elements);
6664 tcase_add_test(tc_basic, test_dtd_elements_nesting);
6665 tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
6666 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
6667 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
6668 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
6669 tcase_add_test__ifdef_xml_dtd(tc_basic,
6670 test_foreign_dtd_without_external_subset);
6671 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
6672 tcase_add_test(tc_basic, test_set_base);
6673 tcase_add_test(tc_basic, test_attributes);
6674 tcase_add_test(tc_basic, test_duplicate_cdata_attribute);
6675 tcase_add_test(tc_basic, test_duplicate_id_attribute_1);
6676 tcase_add_test(tc_basic, test_duplicate_id_attribute_2);
6677 tcase_add_test(tc_basic, test_duplicate_cdata_attribute_multiple_attlistdecl);
6678 tcase_add_test(tc_basic,
6679 test_duplicate_cdata_attribute_multiple_attlistdecl_2);
6680 tcase_add_test(tc_basic,
6681 test_duplicate_cdata_attribute_multiple_attlistdecl_3);
6682 tcase_add_test(tc_basic, test_duplicate_id_attribute_multiple_attlistdecl);
6683 tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
6684 tcase_add_test(tc_basic, test_resume_invalid_parse);
6685 tcase_add_test(tc_basic, test_resume_resuspended);
6686 tcase_add_test(tc_basic, test_cdata_default);
6687 tcase_add_test(tc_basic, test_subordinate_reset);
6688 tcase_add_test(tc_basic, test_subordinate_suspend);
6689 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
6690 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
6691 tcase_add_test__ifdef_xml_dtd(tc_basic,
6692 test_ext_entity_invalid_suspended_parse);
6693 tcase_add_test(tc_basic, test_explicit_encoding);
6694 tcase_add_test(tc_basic, test_trailing_cr);
6695 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
6696 tcase_add_test(tc_basic, test_trailing_rsqb);
6697 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
6698 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
6699 tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
6700 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
6701 tcase_add_test(tc_basic, test_empty_parse);
6702 tcase_add_test(tc_basic, test_negative_len_parse);
6703 tcase_add_test(tc_basic, test_negative_len_parse_buffer);
6704 tcase_add_test(tc_basic, test_get_buffer_1);
6705 tcase_add_test(tc_basic, test_get_buffer_2);
6706 #if XML_CONTEXT_BYTES > 0
6707 tcase_add_test(tc_basic, test_get_buffer_3_overflow);
6708 #endif
6709 tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
6710 tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
6711 tcase_add_test(tc_basic, test_byte_info_at_end);
6712 tcase_add_test(tc_basic, test_byte_info_at_error);
6713 tcase_add_test(tc_basic, test_byte_info_at_cdata);
6714 tcase_add_test(tc_basic, test_predefined_entities);
6715 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
6716 tcase_add_test(tc_basic, test_not_predefined_entities);
6717 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
6718 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
6719 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
6720 tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
6721 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
6722 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
6723 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
6724 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
6725 tcase_add_test(tc_basic, test_bad_public_doctype);
6726 tcase_add_test(tc_basic, test_attribute_enum_value);
6727 tcase_add_test(tc_basic, test_predefined_entity_redefinition);
6728 tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
6729 tcase_add_test(tc_basic, test_public_notation_no_sysid);
6730 tcase_add_test(tc_basic, test_nested_groups);
6731 tcase_add_test(tc_basic, test_group_choice);
6732 tcase_add_test(tc_basic, test_standalone_parameter_entity);
6733 tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
6734 tcase_add_test__ifdef_xml_dtd(tc_basic,
6735 test_recursive_external_parameter_entity);
6736 tcase_add_test__ifdef_xml_dtd(tc_basic,
6737 test_recursive_external_parameter_entity_2);
6738 tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
6739 tcase_add_test(tc_basic, test_suspend_xdecl);
6740 tcase_add_test(tc_basic, test_abort_epilog);
6741 tcase_add_test(tc_basic, test_abort_epilog_2);
6742 tcase_add_test(tc_basic, test_suspend_epilog);
6743 tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
6744 tcase_add_test(tc_basic, test_unfinished_epilog);
6745 tcase_add_test(tc_basic, test_partial_char_in_epilog);
6746 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6747 tcase_add_test__ifdef_xml_dtd(tc_basic,
6748 test_suspend_resume_internal_entity_issue_629);
6749 tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6750 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6751 tcase_add_test(tc_basic, test_restart_on_error);
6752 tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6753 tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6754 tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6755 tcase_add_test(tc_basic, test_standalone_internal_entity);
6756 tcase_add_test(tc_basic, test_skipped_external_entity);
6757 tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6758 tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6759 tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6760 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6761 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6762 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6763 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6764 tcase_add_test(tc_basic, test_pi_handled_in_default);
6765 tcase_add_test(tc_basic, test_comment_handled_in_default);
6766 tcase_add_test(tc_basic, test_pi_yml);
6767 tcase_add_test(tc_basic, test_pi_xnl);
6768 tcase_add_test(tc_basic, test_pi_xmm);
6769 tcase_add_test(tc_basic, test_utf16_pi);
6770 tcase_add_test(tc_basic, test_utf16_be_pi);
6771 tcase_add_test(tc_basic, test_utf16_be_comment);
6772 tcase_add_test(tc_basic, test_utf16_le_comment);
6773 tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6774 tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6775 tcase_add_test(tc_basic, test_unknown_encoding_success);
6776 tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6777 tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6778 tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6779 tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6780 tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6781 tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6782 tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6783 tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6784 tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6785 tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6786 tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6787 tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6788 tcase_add_test(tc_basic, test_unknown_encoding_user_data_primary);
6789 tcase_add_test(tc_basic, test_unknown_encoding_user_data_secondary);
6790 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6791 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6792 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6793 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6794 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6795 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6796 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6797 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6798 tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6799 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6800 tcase_add_test(tc_basic, test_utf8_in_start_tags);
6801 tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6802 tcase_add_test(tc_basic, test_utf16_attribute);
6803 tcase_add_test(tc_basic, test_utf16_second_attr);
6804 tcase_add_test(tc_basic, test_attr_after_solidus);
6805 tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6806 tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6807 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6808 tcase_add_test(tc_basic, test_bad_doctype);
6809 tcase_add_test(tc_basic, test_bad_doctype_utf8);
6810 tcase_add_test(tc_basic, test_bad_doctype_utf16);
6811 tcase_add_test(tc_basic, test_bad_doctype_plus);
6812 tcase_add_test(tc_basic, test_bad_doctype_star);
6813 tcase_add_test(tc_basic, test_bad_doctype_query);
6814 tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6815 tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6816 tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6817 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6818 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6819 tcase_add_test(tc_basic, test_short_doctype);
6820 tcase_add_test(tc_basic, test_short_doctype_2);
6821 tcase_add_test(tc_basic, test_short_doctype_3);
6822 tcase_add_test(tc_basic, test_long_doctype);
6823 tcase_add_test(tc_basic, test_bad_entity);
6824 tcase_add_test(tc_basic, test_bad_entity_2);
6825 tcase_add_test(tc_basic, test_bad_entity_3);
6826 tcase_add_test(tc_basic, test_bad_entity_4);
6827 tcase_add_test(tc_basic, test_bad_notation);
6828 tcase_add_test(tc_basic, test_default_doctype_handler);
6829 tcase_add_test(tc_basic, test_empty_element_abort);
6830 tcase_add_test__ifdef_xml_dtd(tc_basic,
6831 test_pool_integrity_with_unfinished_attr);
6832 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_ext_param_entity_in_value);
6833 tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements);
6834 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity);
6835 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity);
6836 tcase_add_test__if_xml_ge(tc_basic,
6837 test_deep_nested_entity_delayed_interpretation);
6838 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6839 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2);
6840 tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6841 tcase_add_test(tc_basic, test_set_reparse_deferral);
6842 tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6843 tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6844 tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6845 tcase_add_test(tc_basic, test_set_bad_reparse_option);
6846 tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6847 tcase_add_test(tc_basic, test_varying_buffer_fills);
6848 }
6849