1 /* Tests in the "basic" test case for the Expat test suite
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net>
11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org>
14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15 Copyright (c) 2017 Joe Orton <jorton@redhat.com>
16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com>
17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com>
20 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
21 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22 Copyright (c) 2024-2026 Berkay Eren Ürün <berkay.ueruen@siemens.com>
23 Copyright (c) 2026 Francesco Bertolaccini
24 Copyright (c) 2026 Matthew Fernandez <matthew.fernandez@gmail.com>
25 Licensed under the MIT license:
26
27 Permission is hereby granted, free of charge, to any person obtaining
28 a copy of this software and associated documentation files (the
29 "Software"), to deal in the Software without restriction, including
30 without limitation the rights to use, copy, modify, merge, publish,
31 distribute, sublicense, and/or sell copies of the Software, and to permit
32 persons to whom the Software is furnished to do so, subject to the
33 following conditions:
34
35 The above copyright notice and this permission notice shall be included
36 in all copies or substantial portions of the Software.
37
38 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
39 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
40 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
41 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
42 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
43 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
44 USE OR OTHER DEALINGS IN THE SOFTWARE.
45 */
46
47 #if defined(NDEBUG)
48 # undef NDEBUG /* because test suite relies on assert(...) at the moment */
49 #endif
50
51 #include "expat_config.h"
52
53 #include <assert.h>
54
55 #include <stdbool.h>
56 #include <stdio.h>
57 #include <string.h>
58 #include <time.h>
59
60 #include "expat.h"
61 #include "internal.h"
62 #include "minicheck.h"
63 #include "structdata.h"
64 #include "common.h"
65 #include "dummy.h"
66 #include "handlers.h"
67 #include "siphash.h"
68 #include "basic_tests.h"
69
70 static void
basic_setup(void)71 basic_setup(void) {
72 g_parser = XML_ParserCreate(NULL);
73 if (g_parser == NULL)
74 fail("Parser not created.");
75 }
76
77 /*
78 * Character & encoding tests.
79 */
80
START_TEST(test_nul_byte)81 START_TEST(test_nul_byte) {
82 char text[] = "<doc>\0</doc>";
83
84 /* test that a NUL byte (in US-ASCII data) is an error */
85 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
86 == XML_STATUS_OK)
87 fail("Parser did not report error on NUL-byte.");
88 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
89 xml_failure(g_parser);
90 }
91 END_TEST
92
START_TEST(test_u0000_char)93 START_TEST(test_u0000_char) {
94 /* test that a NUL byte (in US-ASCII data) is an error */
95 expect_failure("<doc>�</doc>", XML_ERROR_BAD_CHAR_REF,
96 "Parser did not report error on NUL-byte.");
97 }
98 END_TEST
99
START_TEST(test_siphash_self)100 START_TEST(test_siphash_self) {
101 if (! sip24_valid())
102 fail("SipHash self-test failed");
103 }
104 END_TEST
105
START_TEST(test_siphash_spec)106 START_TEST(test_siphash_spec) {
107 /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
108 const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
109 "\x0a\x0b\x0c\x0d\x0e";
110 const size_t len = sizeof(message) - 1;
111 const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
112 struct siphash state;
113 struct sipkey key;
114
115 sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
116 "\x0a\x0b\x0c\x0d\x0e\x0f");
117 sip24_init(&state, &key);
118
119 /* Cover spread across calls */
120 sip24_update(&state, message, 4);
121 sip24_update(&state, message + 4, len - 4);
122
123 /* Cover null length */
124 sip24_update(&state, message, 0);
125
126 if (sip24_final(&state) != expected)
127 fail("sip24_final failed spec test\n");
128
129 /* Cover wrapper */
130 if (siphash24(message, len, &key) != expected)
131 fail("siphash24 failed spec test\n");
132 }
133 END_TEST
134
START_TEST(test_bom_utf8)135 START_TEST(test_bom_utf8) {
136 /* This test is really just making sure we don't core on a UTF-8 BOM. */
137 const char *text = "\357\273\277<e/>";
138
139 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
140 == XML_STATUS_ERROR)
141 xml_failure(g_parser);
142 }
143 END_TEST
144
START_TEST(test_bom_utf16_be)145 START_TEST(test_bom_utf16_be) {
146 char text[] = "\376\377\0<\0e\0/\0>";
147
148 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
149 == XML_STATUS_ERROR)
150 xml_failure(g_parser);
151 }
152 END_TEST
153
START_TEST(test_bom_utf16_le)154 START_TEST(test_bom_utf16_le) {
155 char text[] = "\377\376<\0e\0/\0>\0";
156
157 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
158 == XML_STATUS_ERROR)
159 xml_failure(g_parser);
160 }
161 END_TEST
162
START_TEST(test_nobom_utf16_le)163 START_TEST(test_nobom_utf16_le) {
164 char text[] = " \0<\0e\0/\0>\0";
165
166 if (g_chunkSize == 1) {
167 // TODO: with just the first byte, we can't tell the difference between
168 // UTF-16-LE and UTF-8. Avoid the failure for now.
169 return;
170 }
171
172 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
173 == XML_STATUS_ERROR)
174 xml_failure(g_parser);
175 }
176 END_TEST
177
START_TEST(test_hash_collision)178 START_TEST(test_hash_collision) {
179 /* For full coverage of the lookup routine, we need to ensure a
180 * hash collision even though we can only tell that we have one
181 * through breakpoint debugging or coverage statistics. The
182 * following will cause a hash collision on machines with a 64-bit
183 * long type; others will have to experiment. The full coverage
184 * tests invoked from qa.sh usually provide a hash collision, but
185 * not always. This is an attempt to provide insurance.
186 */
187 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
188 const char *text
189 = "<doc>\n"
190 "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
191 "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
192 "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
193 "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
194 "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
195 "<d8>This triggers the table growth and collides with b2</d8>\n"
196 "</doc>\n";
197
198 XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
199 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
200 == XML_STATUS_ERROR)
201 xml_failure(g_parser);
202 }
203 END_TEST
204 #undef COLLIDING_HASH_SALT
205
START_TEST(test_hash_salt_setter)206 START_TEST(test_hash_salt_setter) {
207 const uint8_t entropy[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
208 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
209 XML_Parser parser = XML_ParserCreate(NULL);
210
211 // NULL parser should be rejected
212 assert_true(XML_SetHashSalt16Bytes(NULL, entropy) == XML_FALSE);
213
214 // NULL entropy should be rejected
215 assert_true(XML_SetHashSalt16Bytes(parser, NULL) == XML_FALSE);
216
217 // Setting should be allowed more than once
218 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE);
219 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE);
220
221 // But not after parsing has started
222 assert_true(XML_Parse(parser, "", 0, XML_FALSE /* isFinal */)
223 == XML_STATUS_OK);
224 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_FALSE);
225
226 XML_ParserFree(parser);
227 }
228 END_TEST
229
230 /* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)231 START_TEST(test_danish_latin1) {
232 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
233 "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
234 #ifdef XML_UNICODE
235 const XML_Char *expected
236 = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
237 #else
238 const XML_Char *expected
239 = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
240 #endif
241 run_character_check(text, expected);
242 }
243 END_TEST
244
245 /* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)246 START_TEST(test_french_charref_hexidecimal) {
247 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
248 "<doc>éèàçêÈ</doc>";
249 #ifdef XML_UNICODE
250 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
251 #else
252 const XML_Char *expected
253 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
254 #endif
255 run_character_check(text, expected);
256 }
257 END_TEST
258
START_TEST(test_french_charref_decimal)259 START_TEST(test_french_charref_decimal) {
260 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
261 "<doc>éèàçêÈ</doc>";
262 #ifdef XML_UNICODE
263 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
264 #else
265 const XML_Char *expected
266 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
267 #endif
268 run_character_check(text, expected);
269 }
270 END_TEST
271
START_TEST(test_french_latin1)272 START_TEST(test_french_latin1) {
273 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
274 "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
275 #ifdef XML_UNICODE
276 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
277 #else
278 const XML_Char *expected
279 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
280 #endif
281 run_character_check(text, expected);
282 }
283 END_TEST
284
START_TEST(test_french_utf8)285 START_TEST(test_french_utf8) {
286 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
287 "<doc>\xC3\xA9</doc>";
288 #ifdef XML_UNICODE
289 const XML_Char *expected = XCS("\x00e9");
290 #else
291 const XML_Char *expected = XCS("\xC3\xA9");
292 #endif
293 run_character_check(text, expected);
294 }
295 END_TEST
296
297 /* Regression test for SF bug #600479.
298 XXX There should be a test that exercises all legal XML Unicode
299 characters as PCDATA and attribute value content, and XML Name
300 characters as part of element and attribute names.
301 */
START_TEST(test_utf8_false_rejection)302 START_TEST(test_utf8_false_rejection) {
303 const char *text = "<doc>\xEF\xBA\xBF</doc>";
304 #ifdef XML_UNICODE
305 const XML_Char *expected = XCS("\xfebf");
306 #else
307 const XML_Char *expected = XCS("\xEF\xBA\xBF");
308 #endif
309 run_character_check(text, expected);
310 }
311 END_TEST
312
313 /* Regression test for SF bug #477667.
314 This test assures that any 8-bit character followed by a 7-bit
315 character will not be mistakenly interpreted as a valid UTF-8
316 sequence.
317 */
START_TEST(test_illegal_utf8)318 START_TEST(test_illegal_utf8) {
319 char text[100];
320 int i;
321
322 for (i = 128; i <= 255; ++i) {
323 snprintf(text, sizeof(text), "<e>%ccd</e>", i);
324 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
325 == XML_STATUS_OK) {
326 snprintf(text, sizeof(text),
327 "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
328 i);
329 fail(text);
330 } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
331 xml_failure(g_parser);
332 /* Reset the parser since we use the same parser repeatedly. */
333 XML_ParserReset(g_parser, NULL);
334 }
335 }
336 END_TEST
337
338 /* Examples, not masks: */
339 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
340 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
341 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
342 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
343 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
344
START_TEST(test_utf8_auto_align)345 START_TEST(test_utf8_auto_align) {
346 struct TestCase {
347 ptrdiff_t expectedMovementInChars;
348 const char *input;
349 };
350
351 struct TestCase cases[] = {
352 {00, ""},
353
354 {00, UTF8_LEAD_1},
355
356 {-1, UTF8_LEAD_2},
357 {00, UTF8_LEAD_2 UTF8_FOLLOW},
358
359 {-1, UTF8_LEAD_3},
360 {-2, UTF8_LEAD_3 UTF8_FOLLOW},
361 {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
362
363 {-1, UTF8_LEAD_4},
364 {-2, UTF8_LEAD_4 UTF8_FOLLOW},
365 {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
366 {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
367 };
368
369 size_t i = 0;
370 bool success = true;
371 for (; i < sizeof(cases) / sizeof(*cases); i++) {
372 const char *fromLim = cases[i].input + strlen(cases[i].input);
373 const char *const fromLimInitially = fromLim;
374 ptrdiff_t actualMovementInChars;
375
376 _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
377
378 actualMovementInChars = (fromLim - fromLimInitially);
379 if (actualMovementInChars != cases[i].expectedMovementInChars) {
380 size_t j = 0;
381 success = false;
382 printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
383 ", actually moved by %2d chars: \"",
384 (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
385 (int)actualMovementInChars);
386 for (; j < strlen(cases[i].input); j++) {
387 printf("\\x%02x", (unsigned char)cases[i].input[j]);
388 }
389 printf("\"\n");
390 }
391 }
392
393 if (! success) {
394 fail("UTF-8 auto-alignment is not bullet-proof\n");
395 }
396 }
397 END_TEST
398
START_TEST(test_utf16)399 START_TEST(test_utf16) {
400 /* <?xml version="1.0" encoding="UTF-16"?>
401 * <doc a='123'>some {A} text</doc>
402 *
403 * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
404 */
405 char text[]
406 = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
407 "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
408 "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
409 "\000'\000?\000>\000\n"
410 "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
411 "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
412 "<\000/\000d\000o\000c\000>";
413 #ifdef XML_UNICODE
414 const XML_Char *expected = XCS("some \xff21 text");
415 #else
416 const XML_Char *expected = XCS("some \357\274\241 text");
417 #endif
418 CharData storage;
419
420 CharData_Init(&storage);
421 XML_SetUserData(g_parser, &storage);
422 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
423 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
424 == XML_STATUS_ERROR)
425 xml_failure(g_parser);
426 CharData_CheckXMLChars(&storage, expected);
427 }
428 END_TEST
429
START_TEST(test_utf16_le_epilog_newline)430 START_TEST(test_utf16_le_epilog_newline) {
431 unsigned int first_chunk_bytes = 17;
432 char text[] = "\xFF\xFE" /* BOM */
433 "<\000e\000/\000>\000" /* document element */
434 "\r\000\n\000\r\000\n\000"; /* epilog */
435
436 if (first_chunk_bytes >= sizeof(text) - 1)
437 fail("bad value of first_chunk_bytes");
438 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)first_chunk_bytes, XML_FALSE)
439 == XML_STATUS_ERROR)
440 xml_failure(g_parser);
441 else {
442 enum XML_Status rc;
443 rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
444 (int)(sizeof(text) - first_chunk_bytes - 1),
445 XML_TRUE);
446 if (rc == XML_STATUS_ERROR)
447 xml_failure(g_parser);
448 }
449 }
450 END_TEST
451
452 /* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)453 START_TEST(test_not_utf16) {
454 const char *text = "<?xml version='1.0' encoding='utf-16'?>"
455 "<doc>Hi</doc>";
456
457 /* Use a handler to provoke the appropriate code paths */
458 XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
459 expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
460 "UTF-16 declared in UTF-8 not faulted");
461 }
462 END_TEST
463
464 /* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)465 START_TEST(test_bad_encoding) {
466 const char *text = "<doc>Hi</doc>";
467
468 if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
469 fail("XML_SetEncoding failed");
470 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
471 "Unknown encoding not faulted");
472 }
473 END_TEST
474
475 /* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)476 START_TEST(test_latin1_umlauts) {
477 const char *text
478 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
479 "<e a='\xE4 \xF6 \xFC ä ö ü ä ö ü >'\n"
480 " >\xE4 \xF6 \xFC ä ö ü ä ö ü ></e>";
481 #ifdef XML_UNICODE
482 /* Expected results in UTF-16 */
483 const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
484 XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
485 #else
486 /* Expected results in UTF-8 */
487 const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
488 XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
489 #endif
490
491 run_character_check(text, expected);
492 XML_ParserReset(g_parser, NULL);
493 run_attribute_check(text, expected);
494 /* Repeat with a default handler */
495 XML_ParserReset(g_parser, NULL);
496 XML_SetDefaultHandler(g_parser, dummy_default_handler);
497 run_character_check(text, expected);
498 XML_ParserReset(g_parser, NULL);
499 XML_SetDefaultHandler(g_parser, dummy_default_handler);
500 run_attribute_check(text, expected);
501 }
502 END_TEST
503
504 /* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)505 START_TEST(test_long_utf8_character) {
506 const char *text
507 = "<?xml version='1.0' encoding='utf-8'?>\n"
508 /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
509 "<do\xf0\x90\x80\x80/>";
510 expect_failure(text, XML_ERROR_INVALID_TOKEN,
511 "4-byte UTF-8 character in element name not faulted");
512 }
513 END_TEST
514
515 /* Test that a long latin-1 attribute (too long to convert in one go)
516 * is correctly converted
517 */
START_TEST(test_long_latin1_attribute)518 START_TEST(test_long_latin1_attribute) {
519 const char *text
520 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
521 "<doc att='"
522 /* 64 characters per line */
523 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
524 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
525 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
526 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
527 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
528 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
529 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
530 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
531 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
532 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
533 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
534 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
535 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
536 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
537 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
538 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
539 /* Last character splits across a buffer boundary */
540 "\xe4'>\n</doc>";
541
542 const XML_Char *expected =
543 /* 64 characters per line */
544 /* clang-format off */
545 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
546 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
547 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
548 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
549 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
550 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
551 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
552 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
553 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
554 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
555 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
556 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
557 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
558 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
559 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
560 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
561 /* clang-format on */
562 #ifdef XML_UNICODE
563 XCS("\x00e4");
564 #else
565 XCS("\xc3\xa4");
566 #endif
567
568 run_attribute_check(text, expected);
569 }
570 END_TEST
571
572 /* Test that a long ASCII attribute (too long to convert in one go)
573 * is correctly converted
574 */
START_TEST(test_long_ascii_attribute)575 START_TEST(test_long_ascii_attribute) {
576 const char *text
577 = "<?xml version='1.0' encoding='us-ascii'?>\n"
578 "<doc att='"
579 /* 64 characters per line */
580 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
581 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
582 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
583 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
584 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
585 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
586 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
587 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
588 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
589 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
590 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
591 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
592 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
593 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
594 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
595 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
596 "01234'>\n</doc>";
597 const XML_Char *expected =
598 /* 64 characters per line */
599 /* clang-format off */
600 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
601 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
602 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
603 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
604 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
605 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
606 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
607 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
608 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
609 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
610 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
611 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
612 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
613 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
614 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
615 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
616 XCS("01234");
617 /* clang-format on */
618
619 run_attribute_check(text, expected);
620 }
621 END_TEST
622
623 /* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)624 START_TEST(test_line_number_after_parse) {
625 const char *text = "<tag>\n"
626 "\n"
627 "\n</tag>";
628 XML_Size lineno;
629
630 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
631 == XML_STATUS_ERROR)
632 xml_failure(g_parser);
633 lineno = XML_GetCurrentLineNumber(g_parser);
634 if (lineno != 4) {
635 char buffer[100];
636 snprintf(buffer, sizeof(buffer),
637 "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
638 fail(buffer);
639 }
640 }
641 END_TEST
642
643 /* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)644 START_TEST(test_column_number_after_parse) {
645 const char *text = "<tag></tag>";
646 XML_Size colno;
647
648 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
649 == XML_STATUS_ERROR)
650 xml_failure(g_parser);
651 colno = XML_GetCurrentColumnNumber(g_parser);
652 if (colno != 11) {
653 char buffer[100];
654 snprintf(buffer, sizeof(buffer),
655 "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
656 fail(buffer);
657 }
658 }
659 END_TEST
660
661 /* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)662 START_TEST(test_line_and_column_numbers_inside_handlers) {
663 const char *text = "<a>\n" /* Unix end-of-line */
664 " <b>\r\n" /* Windows end-of-line */
665 " <c/>\r" /* Mac OS end-of-line */
666 " </b>\n"
667 " <d>\n"
668 " <f/>\n"
669 " </d>\n"
670 "</a>";
671 const StructDataEntry expected[]
672 = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
673 {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
674 {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG},
675 {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
676 {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}};
677 const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
678 StructData storage;
679
680 StructData_Init(&storage);
681 XML_SetUserData(g_parser, &storage);
682 XML_SetStartElementHandler(g_parser, start_element_event_handler2);
683 XML_SetEndElementHandler(g_parser, end_element_event_handler2);
684 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
685 == XML_STATUS_ERROR)
686 xml_failure(g_parser);
687
688 StructData_CheckItems(&storage, expected, expected_count);
689 StructData_Dispose(&storage);
690 }
691 END_TEST
692
693 /* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)694 START_TEST(test_line_number_after_error) {
695 const char *text = "<a>\n"
696 " <b>\n"
697 " </a>"; /* missing </b> */
698 XML_Size lineno;
699 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
700 != XML_STATUS_ERROR)
701 fail("Expected a parse error");
702
703 lineno = XML_GetCurrentLineNumber(g_parser);
704 if (lineno != 3) {
705 char buffer[100];
706 snprintf(buffer, sizeof(buffer),
707 "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
708 fail(buffer);
709 }
710 }
711 END_TEST
712
713 /* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)714 START_TEST(test_column_number_after_error) {
715 const char *text = "<a>\n"
716 " <b>\n"
717 " </a>"; /* missing </b> */
718 XML_Size colno;
719 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
720 != XML_STATUS_ERROR)
721 fail("Expected a parse error");
722
723 colno = XML_GetCurrentColumnNumber(g_parser);
724 if (colno != 4) {
725 char buffer[100];
726 snprintf(buffer, sizeof(buffer),
727 "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
728 fail(buffer);
729 }
730 }
731 END_TEST
732
733 /* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)734 START_TEST(test_really_long_lines) {
735 /* This parses an input line longer than INIT_DATA_BUF_SIZE
736 characters long (defined to be 1024 in xmlparse.c). We take a
737 really cheesy approach to building the input buffer, because
738 this avoids writing bugs in buffer-filling code.
739 */
740 const char *text
741 = "<e>"
742 /* 64 chars */
743 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
744 /* until we have at least 1024 characters on the line: */
745 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
746 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
747 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
748 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
749 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
750 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
751 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
752 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
753 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
754 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
755 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
757 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761 "</e>";
762 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
763 == XML_STATUS_ERROR)
764 xml_failure(g_parser);
765 }
766 END_TEST
767
768 /* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)769 START_TEST(test_really_long_encoded_lines) {
770 /* As above, except that we want to provoke an output buffer
771 * overflow with a non-trivial encoding. For this we need to pass
772 * the whole cdata in one go, not byte-by-byte.
773 */
774 void *buffer;
775 const char *text
776 = "<?xml version='1.0' encoding='iso-8859-1'?>"
777 "<e>"
778 /* 64 chars */
779 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
780 /* until we have at least 1024 characters on the line: */
781 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
782 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
783 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
784 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
785 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
786 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
787 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
788 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
789 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
790 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
791 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
792 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
793 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
794 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
795 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
796 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
797 "</e>";
798 int parse_len = (int)strlen(text);
799
800 /* Need a cdata handler to provoke the code path we want to test */
801 XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
802 buffer = XML_GetBuffer(g_parser, parse_len);
803 if (buffer == NULL)
804 fail("Could not allocate parse buffer");
805 assert(buffer != NULL);
806 memcpy(buffer, text, parse_len);
807 if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
808 xml_failure(g_parser);
809 }
810 END_TEST
811
812 /*
813 * Element event tests.
814 */
815
START_TEST(test_end_element_events)816 START_TEST(test_end_element_events) {
817 const char *text = "<a><b><c/></b><d><f/></d></a>";
818 const XML_Char *expected = XCS("/c/b/f/d/a");
819 CharData storage;
820
821 CharData_Init(&storage);
822 XML_SetUserData(g_parser, &storage);
823 XML_SetEndElementHandler(g_parser, end_element_event_handler);
824 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
825 == XML_STATUS_ERROR)
826 xml_failure(g_parser);
827 CharData_CheckXMLChars(&storage, expected);
828 }
829 END_TEST
830
831 /*
832 * Attribute tests.
833 */
834
835 /* Helper used by the following tests; this checks any "attr" and "refs"
836 attributes to make sure whitespace has been normalized.
837
838 Return true if whitespace has been normalized in a string, using
839 the rules for attribute value normalization. The 'is_cdata' flag
840 is needed since CDATA attributes don't need to have multiple
841 whitespace characters collapsed to a single space, while other
842 attribute data types do. (Section 3.3.3 of the recommendation.)
843 */
844 static int
is_whitespace_normalized(const XML_Char * s,int is_cdata)845 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
846 int blanks = 0;
847 int at_start = 1;
848 while (*s) {
849 if (*s == XCS(' '))
850 ++blanks;
851 else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
852 return 0;
853 else {
854 if (at_start) {
855 at_start = 0;
856 if (blanks && ! is_cdata)
857 /* illegal leading blanks */
858 return 0;
859 } else if (blanks > 1 && ! is_cdata)
860 return 0;
861 blanks = 0;
862 }
863 ++s;
864 }
865 if (blanks && ! is_cdata)
866 return 0;
867 return 1;
868 }
869
870 /* Check the attribute whitespace checker: */
START_TEST(test_helper_is_whitespace_normalized)871 START_TEST(test_helper_is_whitespace_normalized) {
872 assert(is_whitespace_normalized(XCS("abc"), 0));
873 assert(is_whitespace_normalized(XCS("abc"), 1));
874 assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
875 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
876 assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
877 assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
878 assert(! is_whitespace_normalized(XCS("abc def ghi"), 0));
879 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
880 assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
881 assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
882 assert(! is_whitespace_normalized(XCS(" "), 0));
883 assert(is_whitespace_normalized(XCS(" "), 1));
884 assert(! is_whitespace_normalized(XCS("\t"), 0));
885 assert(! is_whitespace_normalized(XCS("\t"), 1));
886 assert(! is_whitespace_normalized(XCS("\n"), 0));
887 assert(! is_whitespace_normalized(XCS("\n"), 1));
888 assert(! is_whitespace_normalized(XCS("\r"), 0));
889 assert(! is_whitespace_normalized(XCS("\r"), 1));
890 assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
891 }
892 END_TEST
893
894 static void XMLCALL
check_attr_contains_normalized_whitespace(void * userData,const XML_Char * name,const XML_Char ** atts)895 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
896 const XML_Char **atts) {
897 int i;
898 UNUSED_P(userData);
899 UNUSED_P(name);
900 for (i = 0; atts[i] != NULL; i += 2) {
901 const XML_Char *attrname = atts[i];
902 const XML_Char *value = atts[i + 1];
903 if (xcstrcmp(XCS("attr"), attrname) == 0
904 || xcstrcmp(XCS("ents"), attrname) == 0
905 || xcstrcmp(XCS("refs"), attrname) == 0) {
906 if (! is_whitespace_normalized(value, 0)) {
907 char buffer[256];
908 snprintf(buffer, sizeof(buffer),
909 "attribute value not normalized: %" XML_FMT_STR
910 "='%" XML_FMT_STR "'",
911 attrname, value);
912 fail(buffer);
913 }
914 }
915 }
916 }
917
START_TEST(test_attr_whitespace_normalization)918 START_TEST(test_attr_whitespace_normalization) {
919 const char *text
920 = "<!DOCTYPE doc [\n"
921 " <!ATTLIST doc\n"
922 " attr NMTOKENS #REQUIRED\n"
923 " ents ENTITIES #REQUIRED\n"
924 " refs IDREFS #REQUIRED>\n"
925 "]>\n"
926 "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n"
927 " ents=' ent-1 \t\r\n"
928 " ent-2 ' >\n"
929 " <e id='id-1'/>\n"
930 " <e id='id-2'/>\n"
931 "</doc>";
932
933 XML_SetStartElementHandler(g_parser,
934 check_attr_contains_normalized_whitespace);
935 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
936 == XML_STATUS_ERROR)
937 xml_failure(g_parser);
938 }
939 END_TEST
940
941 /*
942 * XML declaration tests.
943 */
944
START_TEST(test_xmldecl_misplaced)945 START_TEST(test_xmldecl_misplaced) {
946 expect_failure("\n"
947 "<?xml version='1.0'?>\n"
948 "<a/>",
949 XML_ERROR_MISPLACED_XML_PI,
950 "failed to report misplaced XML declaration");
951 }
952 END_TEST
953
START_TEST(test_xmldecl_invalid)954 START_TEST(test_xmldecl_invalid) {
955 expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
956 "Failed to report invalid XML declaration");
957 }
958 END_TEST
959
START_TEST(test_xmldecl_missing_attr)960 START_TEST(test_xmldecl_missing_attr) {
961 expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
962 "Failed to report missing XML declaration attribute");
963 }
964 END_TEST
965
START_TEST(test_xmldecl_missing_value)966 START_TEST(test_xmldecl_missing_value) {
967 expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
968 "<doc/>",
969 XML_ERROR_XML_DECL,
970 "Failed to report missing attribute value");
971 }
972 END_TEST
973
974 /* Regression test for SF bug #584832. */
START_TEST(test_unknown_encoding_internal_entity)975 START_TEST(test_unknown_encoding_internal_entity) {
976 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
977 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
978 "<test a='&foo;'/>";
979
980 XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
981 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
982 == XML_STATUS_ERROR)
983 xml_failure(g_parser);
984 }
985 END_TEST
986
987 /* Test unrecognised encoding handler */
START_TEST(test_unrecognised_encoding_internal_entity)988 START_TEST(test_unrecognised_encoding_internal_entity) {
989 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
990 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
991 "<test a='&foo;'/>";
992
993 XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
994 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
995 != XML_STATUS_ERROR)
996 fail("Unrecognised encoding not rejected");
997 }
998 END_TEST
999
1000 /* Regression test for SF bug #620106. */
START_TEST(test_ext_entity_set_encoding)1001 START_TEST(test_ext_entity_set_encoding) {
1002 const char *text = "<!DOCTYPE doc [\n"
1003 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1004 "]>\n"
1005 "<doc>&en;</doc>";
1006 ExtTest test_data
1007 = {/* This text says it's an unsupported encoding, but it's really
1008 UTF-8, which we tell Expat using XML_SetEncoding().
1009 */
1010 "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
1011 #ifdef XML_UNICODE
1012 const XML_Char *expected = XCS("\x00e9");
1013 #else
1014 const XML_Char *expected = XCS("\xc3\xa9");
1015 #endif
1016
1017 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1018 run_ext_character_check(text, &test_data, expected);
1019 }
1020 END_TEST
1021
1022 /* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)1023 START_TEST(test_ext_entity_no_handler) {
1024 const char *text = "<!DOCTYPE doc [\n"
1025 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1026 "]>\n"
1027 "<doc>&en;</doc>";
1028
1029 XML_SetDefaultHandler(g_parser, dummy_default_handler);
1030 run_character_check(text, XCS(""));
1031 }
1032 END_TEST
1033
1034 /* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)1035 START_TEST(test_ext_entity_set_bom) {
1036 const char *text = "<!DOCTYPE doc [\n"
1037 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1038 "]>\n"
1039 "<doc>&en;</doc>";
1040 ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1041 "<?xml encoding='iso-8859-3'?>"
1042 "\xC3\xA9",
1043 XCS("utf-8"), NULL};
1044 #ifdef XML_UNICODE
1045 const XML_Char *expected = XCS("\x00e9");
1046 #else
1047 const XML_Char *expected = XCS("\xc3\xa9");
1048 #endif
1049
1050 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1051 run_ext_character_check(text, &test_data, expected);
1052 }
1053 END_TEST
1054
1055 /* Test that bad encodings are faulted */
START_TEST(test_ext_entity_bad_encoding)1056 START_TEST(test_ext_entity_bad_encoding) {
1057 const char *text = "<!DOCTYPE doc [\n"
1058 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1059 "]>\n"
1060 "<doc>&en;</doc>";
1061 ExtFaults fault
1062 = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1063 XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1064
1065 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1066 XML_SetUserData(g_parser, &fault);
1067 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1068 "Bad encoding should not have been accepted");
1069 }
1070 END_TEST
1071
1072 /* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)1073 START_TEST(test_ext_entity_bad_encoding_2) {
1074 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1075 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1076 "<doc>&entity;</doc>";
1077 ExtFaults fault
1078 = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1079 XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1080
1081 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1082 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1083 XML_SetUserData(g_parser, &fault);
1084 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1085 "Bad encoding not faulted in external entity handler");
1086 }
1087 END_TEST
1088
1089 /* Test that no error is reported for unknown entities if we don't
1090 read an external subset. This was fixed in Expat 1.95.5.
1091 */
START_TEST(test_wfc_undeclared_entity_unread_external_subset)1092 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1093 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1094 "<doc>&entity;</doc>";
1095
1096 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1097 == XML_STATUS_ERROR)
1098 xml_failure(g_parser);
1099 }
1100 END_TEST
1101
1102 /* Test that an error is reported for unknown entities if we don't
1103 have an external subset.
1104 */
START_TEST(test_wfc_undeclared_entity_no_external_subset)1105 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1106 expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1107 "Parser did not report undefined entity w/out a DTD.");
1108 }
1109 END_TEST
1110
1111 /* Test that an error is reported for unknown entities if we don't
1112 read an external subset, but have been declared standalone.
1113 */
START_TEST(test_wfc_undeclared_entity_standalone)1114 START_TEST(test_wfc_undeclared_entity_standalone) {
1115 const char *text
1116 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1117 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1118 "<doc>&entity;</doc>";
1119
1120 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1121 "Parser did not report undefined entity (standalone).");
1122 }
1123 END_TEST
1124
1125 /* Test that an error is reported for unknown entities if we have read
1126 an external subset, and standalone is true.
1127 */
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone)1128 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1129 const char *text
1130 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1131 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1132 "<doc>&entity;</doc>";
1133 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1134
1135 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1136 XML_SetUserData(g_parser, &test_data);
1137 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1138 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1139 "Parser did not report undefined entity (external DTD).");
1140 }
1141 END_TEST
1142
1143 /* Test that external entity handling is not done if the parsing flag
1144 * is set to UNLESS_STANDALONE
1145 */
START_TEST(test_entity_with_external_subset_unless_standalone)1146 START_TEST(test_entity_with_external_subset_unless_standalone) {
1147 const char *text
1148 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1149 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1150 "<doc>&entity;</doc>";
1151 ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1152
1153 XML_SetParamEntityParsing(g_parser,
1154 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1155 XML_SetUserData(g_parser, &test_data);
1156 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1157 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1158 "Parser did not report undefined entity");
1159 }
1160 END_TEST
1161
1162 /* Test that no error is reported for unknown entities if we have read
1163 an external subset, and standalone is false.
1164 */
START_TEST(test_wfc_undeclared_entity_with_external_subset)1165 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1166 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1167 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1168 "<doc>&entity;</doc>";
1169 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1170
1171 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1172 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1173 run_ext_character_check(text, &test_data, XCS(""));
1174 }
1175 END_TEST
1176
1177 /* Test that an error is reported if our NotStandalone handler fails */
START_TEST(test_not_standalone_handler_reject)1178 START_TEST(test_not_standalone_handler_reject) {
1179 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1180 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1181 "<doc>&entity;</doc>";
1182 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1183
1184 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1185 XML_SetUserData(g_parser, &test_data);
1186 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1187 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1188 expect_failure(text, XML_ERROR_NOT_STANDALONE,
1189 "NotStandalone handler failed to reject");
1190
1191 /* Try again but without external entity handling */
1192 XML_ParserReset(g_parser, NULL);
1193 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1194 expect_failure(text, XML_ERROR_NOT_STANDALONE,
1195 "NotStandalone handler failed to reject");
1196 }
1197 END_TEST
1198
1199 /* Test that no error is reported if our NotStandalone handler succeeds */
START_TEST(test_not_standalone_handler_accept)1200 START_TEST(test_not_standalone_handler_accept) {
1201 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1202 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1203 "<doc>&entity;</doc>";
1204 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1205
1206 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1207 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1208 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1209 run_ext_character_check(text, &test_data, XCS(""));
1210
1211 /* Repeat without the external entity handler */
1212 XML_ParserReset(g_parser, NULL);
1213 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1214 run_character_check(text, XCS(""));
1215 }
1216 END_TEST
1217
START_TEST(test_entity_start_tag_level_greater_than_one)1218 START_TEST(test_entity_start_tag_level_greater_than_one) {
1219 const char *const text = "<!DOCTYPE t1 [\n"
1220 " <!ENTITY e1 'hello'>\n"
1221 "]>\n"
1222 "<t1>\n"
1223 " <t2>&e1;</t2>\n"
1224 "</t1>\n";
1225
1226 XML_Parser parser = XML_ParserCreate(NULL);
1227 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
1228 /*isFinal*/ XML_TRUE)
1229 == XML_STATUS_OK);
1230 XML_ParserFree(parser);
1231 }
1232 END_TEST
1233
START_TEST(test_wfc_no_recursive_entity_refs)1234 START_TEST(test_wfc_no_recursive_entity_refs) {
1235 const char *text = "<!DOCTYPE doc [\n"
1236 " <!ENTITY entity '&entity;'>\n"
1237 "]>\n"
1238 "<doc>&entity;</doc>";
1239
1240 expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1241 "Parser did not report recursive entity reference.");
1242 }
1243 END_TEST
1244
START_TEST(test_no_indirectly_recursive_entity_refs)1245 START_TEST(test_no_indirectly_recursive_entity_refs) {
1246 struct TestCase {
1247 const char *doc;
1248 bool usesParameterEntities;
1249 };
1250
1251 const struct TestCase cases[] = {
1252 // general entity + character data
1253 {"<!DOCTYPE a [\n"
1254 " <!ENTITY e1 '&e2;'>\n"
1255 " <!ENTITY e2 '&e1;'>\n"
1256 "]><a>&e2;</a>\n",
1257 false},
1258
1259 // general entity + attribute value
1260 {"<!DOCTYPE a [\n"
1261 " <!ENTITY e1 '&e2;'>\n"
1262 " <!ENTITY e2 '&e1;'>\n"
1263 "]><a k1='&e2;' />\n",
1264 false},
1265
1266 // parameter entity
1267 {"<!DOCTYPE doc [\n"
1268 " <!ENTITY % p1 '%p2;'>\n"
1269 " <!ENTITY % p2 '%p1;'>\n"
1270 " <!ENTITY % define_g \"<!ENTITY g '%p2;'>\">\n"
1271 " %define_g;\n"
1272 "]>\n"
1273 "<doc/>\n",
1274 true},
1275 };
1276 const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE};
1277
1278 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1279 for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]);
1280 j++) {
1281 const XML_Bool reset_wanted = reset_or_not[j];
1282 const char *const doc = cases[i].doc;
1283 const bool usesParameterEntities = cases[i].usesParameterEntities;
1284
1285 set_subtest("[%i,reset=%i] %s", (int)i, (int)j, doc);
1286
1287 #ifdef XML_DTD // both GE and DTD
1288 const bool rejection_expected = true;
1289 #elif XML_GE == 1 // GE but not DTD
1290 const bool rejection_expected = ! usesParameterEntities;
1291 #else // neither DTD nor GE
1292 const bool rejection_expected = false;
1293 #endif
1294
1295 XML_Parser parser = XML_ParserCreate(NULL);
1296
1297 #ifdef XML_DTD
1298 if (usesParameterEntities) {
1299 assert_true(
1300 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS)
1301 == 1);
1302 }
1303 #else
1304 UNUSED_P(usesParameterEntities);
1305 #endif // XML_DTD
1306
1307 const enum XML_Status status
1308 = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
1309 /*isFinal*/ XML_TRUE);
1310
1311 if (rejection_expected) {
1312 assert_true(status == XML_STATUS_ERROR);
1313 assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF);
1314 } else {
1315 assert_true(status == XML_STATUS_OK);
1316 }
1317
1318 if (reset_wanted) {
1319 // This covers free'ing of (eventually) all three open entity lists by
1320 // XML_ParserReset.
1321 XML_ParserReset(parser, NULL);
1322 }
1323
1324 // This covers free'ing of (eventually) all three open entity lists by
1325 // XML_ParserFree (unless XML_ParserReset has already done that above).
1326 XML_ParserFree(parser);
1327 }
1328 }
1329 }
1330 END_TEST
1331
START_TEST(test_recursive_external_parameter_entity_2)1332 START_TEST(test_recursive_external_parameter_entity_2) {
1333 struct TestCase {
1334 const char *doc;
1335 enum XML_Status expectedStatus;
1336 };
1337
1338 struct TestCase cases[] = {
1339 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1340 {"<!ENTITY % p1 '%p1;'>"
1341 "<!ENTITY % p1 'first declaration wins'>",
1342 XML_STATUS_ERROR},
1343 {"<!ENTITY % p1 'first declaration wins'>"
1344 "<!ENTITY % p1 '%p1;'>",
1345 XML_STATUS_OK},
1346 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_OK},
1347 };
1348
1349 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1350 const char *const doc = cases[i].doc;
1351 const enum XML_Status expectedStatus = cases[i].expectedStatus;
1352 set_subtest("%s", doc);
1353
1354 XML_Parser parser = XML_ParserCreate(NULL);
1355 assert_true(parser != NULL);
1356
1357 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1358 assert_true(ext_parser != NULL);
1359
1360 const enum XML_Status actualStatus
1361 = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1362
1363 assert_true(actualStatus == expectedStatus);
1364 if (actualStatus != XML_STATUS_OK) {
1365 assert_true(XML_GetErrorCode(ext_parser)
1366 == XML_ERROR_RECURSIVE_ENTITY_REF);
1367 }
1368
1369 XML_ParserFree(ext_parser);
1370 XML_ParserFree(parser);
1371 }
1372 }
1373 END_TEST
1374
1375 /* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)1376 START_TEST(test_ext_entity_invalid_parse) {
1377 const char *text = "<!DOCTYPE doc [\n"
1378 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1379 "]>\n"
1380 "<doc>&en;</doc>";
1381 const ExtFaults faults[]
1382 = {{"<", "Incomplete element declaration not faulted", NULL,
1383 XML_ERROR_UNCLOSED_TOKEN},
1384 {"<\xe2\x82", /* First two bytes of a three-byte char */
1385 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1386 {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1387 XML_ERROR_PARTIAL_CHAR},
1388 {NULL, NULL, NULL, XML_ERROR_NONE}};
1389 const ExtFaults *fault = faults;
1390
1391 for (; fault->parse_text != NULL; fault++) {
1392 set_subtest("\"%s\"", fault->parse_text);
1393 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1394 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1395 XML_SetUserData(g_parser, (void *)fault);
1396 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1397 "Parser did not report external entity error");
1398 XML_ParserReset(g_parser, NULL);
1399 }
1400 }
1401 END_TEST
1402
1403 /* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)1404 START_TEST(test_dtd_default_handling) {
1405 const char *text = "<!DOCTYPE doc [\n"
1406 "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1407 "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1408 "<!ELEMENT doc EMPTY>\n"
1409 "<!ATTLIST doc a CDATA #IMPLIED>\n"
1410 "<?pi in dtd?>\n"
1411 "<!--comment in dtd-->\n"
1412 "]><doc/>";
1413
1414 XML_SetDefaultHandler(g_parser, accumulate_characters);
1415 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1416 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1417 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1418 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1419 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1420 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1421 XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1422 XML_SetCommentHandler(g_parser, dummy_comment_handler);
1423 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1424 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1425 run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1426 }
1427 END_TEST
1428
1429 /* Test handling of attribute declarations */
START_TEST(test_dtd_attr_handling)1430 START_TEST(test_dtd_attr_handling) {
1431 const char *prolog = "<!DOCTYPE doc [\n"
1432 "<!ELEMENT doc EMPTY>\n";
1433 AttTest attr_data[]
1434 = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1435 "]>"
1436 "<doc a='two'/>",
1437 XCS("doc"), XCS("a"),
1438 XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1439 NULL, XML_TRUE},
1440 {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1441 "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1442 "]>"
1443 "<doc/>",
1444 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1445 {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1446 "]>"
1447 "<doc/>",
1448 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1449 {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1450 "]>"
1451 "<doc/>",
1452 XCS("doc"), XCS("a"), XCS("CDATA"),
1453 #ifdef XML_UNICODE
1454 XCS("\x06f2"),
1455 #else
1456 XCS("\xdb\xb2"),
1457 #endif
1458 XML_FALSE},
1459 {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1460 AttTest *test;
1461
1462 for (test = attr_data; test->definition != NULL; test++) {
1463 set_subtest("%s", test->definition);
1464 XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1465 XML_SetUserData(g_parser, test);
1466 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1467 XML_FALSE)
1468 == XML_STATUS_ERROR)
1469 xml_failure(g_parser);
1470 if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1471 (int)strlen(test->definition), XML_TRUE)
1472 == XML_STATUS_ERROR)
1473 xml_failure(g_parser);
1474 XML_ParserReset(g_parser, NULL);
1475 }
1476 }
1477 END_TEST
1478
1479 /* See related SF bug #673791.
1480 When namespace processing is enabled, setting the namespace URI for
1481 a prefix is not allowed; this test ensures that it *is* allowed
1482 when namespace processing is not enabled.
1483 (See Namespaces in XML, section 2.)
1484 */
START_TEST(test_empty_ns_without_namespaces)1485 START_TEST(test_empty_ns_without_namespaces) {
1486 const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1487 " <e xmlns:prefix=''/>\n"
1488 "</doc>";
1489
1490 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1491 == XML_STATUS_ERROR)
1492 xml_failure(g_parser);
1493 }
1494 END_TEST
1495
1496 /* Regression test for SF bug #824420.
1497 Checks that an xmlns:prefix attribute set in an attribute's default
1498 value isn't misinterpreted.
1499 */
START_TEST(test_ns_in_attribute_default_without_namespaces)1500 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1501 const char *text = "<!DOCTYPE e:element [\n"
1502 " <!ATTLIST e:element\n"
1503 " xmlns:e CDATA 'http://example.org/'>\n"
1504 " ]>\n"
1505 "<e:element/>";
1506
1507 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1508 == XML_STATUS_ERROR)
1509 xml_failure(g_parser);
1510 }
1511 END_TEST
1512
1513 /* Regression test for SF bug #1515266: missing check of stopped
1514 parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)1515 START_TEST(test_stop_parser_between_char_data_calls) {
1516 /* The sample data must be big enough that there are two calls to
1517 the character data handler from within the inner "for" loop of
1518 the XML_TOK_DATA_CHARS case in doContent(), and the character
1519 handler must stop the parser and clear the character data
1520 handler.
1521 */
1522 const char *text = long_character_data_text;
1523
1524 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1525 g_resumable = XML_FALSE;
1526 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1527 != XML_STATUS_ERROR)
1528 xml_failure(g_parser);
1529 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1530 xml_failure(g_parser);
1531 }
1532 END_TEST
1533
1534 /* Regression test for SF bug #1515266: missing check of stopped
1535 parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)1536 START_TEST(test_suspend_parser_between_char_data_calls) {
1537 /* The sample data must be big enough that there are two calls to
1538 the character data handler from within the inner "for" loop of
1539 the XML_TOK_DATA_CHARS case in doContent(), and the character
1540 handler must stop the parser and clear the character data
1541 handler.
1542 */
1543 const char *text = long_character_data_text;
1544
1545 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1546 g_resumable = XML_TRUE;
1547 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1548 // we won't know exactly how much input we actually managed to give Expat.
1549 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
1550 != XML_STATUS_SUSPENDED)
1551 xml_failure(g_parser);
1552 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1553 xml_failure(g_parser);
1554 /* Try parsing directly */
1555 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1556 != XML_STATUS_ERROR)
1557 fail("Attempt to continue parse while suspended not faulted");
1558 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1559 fail("Suspended parse not faulted with correct error");
1560 }
1561 END_TEST
1562
1563 /* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)1564 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1565 const char *text = long_character_data_text;
1566
1567 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1568 g_resumable = XML_FALSE;
1569 g_abortable = XML_FALSE;
1570 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1571 != XML_STATUS_ERROR)
1572 fail("Failed to double-stop parser");
1573
1574 XML_ParserReset(g_parser, NULL);
1575 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1576 g_resumable = XML_TRUE;
1577 g_abortable = XML_FALSE;
1578 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1579 // we won't know exactly how much input we actually managed to give Expat.
1580 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
1581 != XML_STATUS_SUSPENDED)
1582 fail("Failed to double-suspend parser");
1583
1584 XML_ParserReset(g_parser, NULL);
1585 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1586 g_resumable = XML_TRUE;
1587 g_abortable = XML_TRUE;
1588 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1589 != XML_STATUS_ERROR)
1590 fail("Failed to suspend-abort parser");
1591 }
1592 END_TEST
1593
START_TEST(test_good_cdata_ascii)1594 START_TEST(test_good_cdata_ascii) {
1595 const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1596 const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1597
1598 CharData storage;
1599 CharData_Init(&storage);
1600 XML_SetUserData(g_parser, &storage);
1601 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1602 /* Add start and end handlers for coverage */
1603 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1604 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1605
1606 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1607 == XML_STATUS_ERROR)
1608 xml_failure(g_parser);
1609 CharData_CheckXMLChars(&storage, expected);
1610
1611 /* Try again, this time with a default handler */
1612 XML_ParserReset(g_parser, NULL);
1613 CharData_Init(&storage);
1614 XML_SetUserData(g_parser, &storage);
1615 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1616 XML_SetDefaultHandler(g_parser, dummy_default_handler);
1617
1618 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1619 == XML_STATUS_ERROR)
1620 xml_failure(g_parser);
1621 CharData_CheckXMLChars(&storage, expected);
1622 }
1623 END_TEST
1624
START_TEST(test_good_cdata_utf16)1625 START_TEST(test_good_cdata_utf16) {
1626 /* Test data is:
1627 * <?xml version='1.0' encoding='utf-16'?>
1628 * <a><![CDATA[hello]]></a>
1629 */
1630 const char text[]
1631 = "\0<\0?\0x\0m\0l\0"
1632 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1633 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1634 "1\0"
1635 "6\0'"
1636 "\0?\0>\0\n"
1637 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1638 const XML_Char *expected = XCS("hello");
1639
1640 CharData storage;
1641 CharData_Init(&storage);
1642 XML_SetUserData(g_parser, &storage);
1643 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1644
1645 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1646 == XML_STATUS_ERROR)
1647 xml_failure(g_parser);
1648 CharData_CheckXMLChars(&storage, expected);
1649 }
1650 END_TEST
1651
START_TEST(test_good_cdata_utf16_le)1652 START_TEST(test_good_cdata_utf16_le) {
1653 /* Test data is:
1654 * <?xml version='1.0' encoding='utf-16'?>
1655 * <a><![CDATA[hello]]></a>
1656 */
1657 const char text[]
1658 = "<\0?\0x\0m\0l\0"
1659 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1660 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1661 "1\0"
1662 "6\0'"
1663 "\0?\0>\0\n"
1664 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1665 const XML_Char *expected = XCS("hello");
1666
1667 CharData storage;
1668 CharData_Init(&storage);
1669 XML_SetUserData(g_parser, &storage);
1670 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1671
1672 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1673 == XML_STATUS_ERROR)
1674 xml_failure(g_parser);
1675 CharData_CheckXMLChars(&storage, expected);
1676 }
1677 END_TEST
1678
1679 /* Test UTF16 conversion of a long cdata string */
1680
1681 /* 16 characters: handy macro to reduce visual clutter */
1682 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1683
START_TEST(test_long_cdata_utf16)1684 START_TEST(test_long_cdata_utf16) {
1685 /* Test data is:
1686 * <?xlm version='1.0' encoding='utf-16'?>
1687 * <a><![CDATA[
1688 * ABCDEFGHIJKLMNOP
1689 * ]]></a>
1690 */
1691 const char text[]
1692 = "\0<\0?\0x\0m\0l\0 "
1693 "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1694 "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1695 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1696 /* 64 characters per line */
1697 /* clang-format off */
1698 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1699 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1700 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1701 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1702 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1703 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1704 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1705 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1706 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1707 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1708 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1709 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1710 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1711 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1712 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1713 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1714 A_TO_P_IN_UTF16
1715 /* clang-format on */
1716 "\0]\0]\0>\0<\0/\0a\0>";
1717 const XML_Char *expected =
1718 /* clang-format off */
1719 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1720 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1721 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1722 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1723 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1724 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1725 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1726 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1727 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1728 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1729 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1730 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1731 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1732 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1733 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1734 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1735 XCS("ABCDEFGHIJKLMNOP");
1736 /* clang-format on */
1737 CharData storage;
1738 void *buffer;
1739
1740 CharData_Init(&storage);
1741 XML_SetUserData(g_parser, &storage);
1742 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1743 buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1744 if (buffer == NULL)
1745 fail("Could not allocate parse buffer");
1746 assert(buffer != NULL);
1747 memcpy(buffer, text, sizeof(text) - 1);
1748 if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1749 xml_failure(g_parser);
1750 CharData_CheckXMLChars(&storage, expected);
1751 }
1752 END_TEST
1753
1754 /* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)1755 START_TEST(test_multichar_cdata_utf16) {
1756 /* Test data is:
1757 * <?xml version='1.0' encoding='utf-16'?>
1758 * <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1759 *
1760 * where {MINIM} is U+1d15e (a minim or half-note)
1761 * UTF-16: 0xd834 0xdd5e
1762 * UTF-8: 0xf0 0x9d 0x85 0x9e
1763 * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1764 * UTF-16: 0xd834 0xdd5f
1765 * UTF-8: 0xf0 0x9d 0x85 0x9f
1766 */
1767 const char text[] = "\0<\0?\0x\0m\0l\0"
1768 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1769 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1770 "1\0"
1771 "6\0'"
1772 "\0?\0>\0\n"
1773 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1774 "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1775 "\0]\0]\0>\0<\0/\0a\0>";
1776 #ifdef XML_UNICODE
1777 const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1778 #else
1779 const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1780 #endif
1781 CharData storage;
1782
1783 CharData_Init(&storage);
1784 XML_SetUserData(g_parser, &storage);
1785 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1786
1787 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1788 == XML_STATUS_ERROR)
1789 xml_failure(g_parser);
1790 CharData_CheckXMLChars(&storage, expected);
1791 }
1792 END_TEST
1793
1794 /* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)1795 START_TEST(test_utf16_bad_surrogate_pair) {
1796 /* Test data is:
1797 * <?xml version='1.0' encoding='utf-16'?>
1798 * <a><![CDATA[{BADLINB}]]></a>
1799 *
1800 * where {BADLINB} is U+10000 (the first Linear B character)
1801 * with the UTF-16 surrogate pair in the wrong order, i.e.
1802 * 0xdc00 0xd800
1803 */
1804 const char text[] = "\0<\0?\0x\0m\0l\0"
1805 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1806 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1807 "1\0"
1808 "6\0'"
1809 "\0?\0>\0\n"
1810 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1811 "\xdc\x00\xd8\x00"
1812 "\0]\0]\0>\0<\0/\0a\0>";
1813
1814 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1815 != XML_STATUS_ERROR)
1816 fail("Reversed UTF-16 surrogate pair not faulted");
1817 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1818 xml_failure(g_parser);
1819 }
1820 END_TEST
1821
START_TEST(test_bad_cdata)1822 START_TEST(test_bad_cdata) {
1823 struct CaseData {
1824 const char *text;
1825 enum XML_Error expectedError;
1826 };
1827
1828 struct CaseData cases[]
1829 = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1830 {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1831 {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1832 {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1833 {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1834 {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1835 {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1836 {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1837
1838 {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1839 {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1840 {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1841
1842 {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1843 {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1844 {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1845 {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1846 {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1847 {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1848 {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1849
1850 {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1851 {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1852 {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1853
1854 size_t i = 0;
1855 for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1856 set_subtest("%s", cases[i].text);
1857 const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1858 g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1859 const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1860
1861 assert(actualStatus == XML_STATUS_ERROR);
1862
1863 if (actualError != cases[i].expectedError) {
1864 char message[100];
1865 snprintf(message, sizeof(message),
1866 "Expected error %d but got error %d for case %u: \"%s\"\n",
1867 cases[i].expectedError, actualError, (unsigned int)i + 1,
1868 cases[i].text);
1869 fail(message);
1870 }
1871
1872 XML_ParserReset(g_parser, NULL);
1873 }
1874 }
1875 END_TEST
1876
1877 /* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)1878 START_TEST(test_bad_cdata_utf16) {
1879 struct CaseData {
1880 size_t text_bytes;
1881 const char *text;
1882 enum XML_Error expected_error;
1883 };
1884
1885 const char prolog[] = "\0<\0?\0x\0m\0l\0"
1886 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1887 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1888 "1\0"
1889 "6\0'"
1890 "\0?\0>\0\n"
1891 "\0<\0a\0>";
1892 struct CaseData cases[] = {
1893 {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1894 {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1895 {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1896 {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1897 {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1898 {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1899 {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1900 {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1901 {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1902 {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1903 {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1904 {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1905 {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1906 {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1907 {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1908 {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1909 {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1910 {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1911 {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1912 {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1913 /* Now add a four-byte UTF-16 character */
1914 {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1915 XML_ERROR_UNCLOSED_CDATA_SECTION},
1916 {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1917 {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1918 XML_ERROR_PARTIAL_CHAR},
1919 {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1920 XML_ERROR_UNCLOSED_CDATA_SECTION}};
1921 size_t i;
1922
1923 for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1924 set_subtest("case %lu", (long unsigned)(i + 1));
1925 enum XML_Status actual_status;
1926 enum XML_Error actual_error;
1927
1928 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1929 XML_FALSE)
1930 == XML_STATUS_ERROR)
1931 xml_failure(g_parser);
1932 actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1933 (int)cases[i].text_bytes, XML_TRUE);
1934 assert(actual_status == XML_STATUS_ERROR);
1935 actual_error = XML_GetErrorCode(g_parser);
1936 if (actual_error != cases[i].expected_error) {
1937 char message[1024];
1938
1939 snprintf(message, sizeof(message),
1940 "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1941 ") for case %lu\n",
1942 cases[i].expected_error,
1943 XML_ErrorString(cases[i].expected_error), actual_error,
1944 XML_ErrorString(actual_error), (long unsigned)(i + 1));
1945 fail(message);
1946 }
1947 XML_ParserReset(g_parser, NULL);
1948 }
1949 }
1950 END_TEST
1951
1952 /* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)1953 START_TEST(test_stop_parser_between_cdata_calls) {
1954 const char *text = long_cdata_text;
1955
1956 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1957 g_resumable = XML_FALSE;
1958 expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1959 }
1960 END_TEST
1961
1962 /* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)1963 START_TEST(test_suspend_parser_between_cdata_calls) {
1964 if (g_chunkSize != 0) {
1965 // this test does not use SINGLE_BYTES, because of suspension
1966 return;
1967 }
1968
1969 const char *text = long_cdata_text;
1970 enum XML_Status result;
1971
1972 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1973 g_resumable = XML_TRUE;
1974 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1975 // we won't know exactly how much input we actually managed to give Expat.
1976 result = XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE);
1977 if (result != XML_STATUS_SUSPENDED) {
1978 if (result == XML_STATUS_ERROR)
1979 xml_failure(g_parser);
1980 fail("Parse not suspended in CDATA handler");
1981 }
1982 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1983 xml_failure(g_parser);
1984 }
1985 END_TEST
1986
1987 /* Test memory allocation functions */
START_TEST(test_memory_allocation)1988 START_TEST(test_memory_allocation) {
1989 char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1990 char *p;
1991
1992 if (buffer == NULL) {
1993 fail("Allocation failed");
1994 } else {
1995 /* Try writing to memory; some OSes try to cheat! */
1996 buffer[0] = 'T';
1997 buffer[1] = 'E';
1998 buffer[2] = 'S';
1999 buffer[3] = 'T';
2000 buffer[4] = '\0';
2001 if (strcmp(buffer, "TEST") != 0) {
2002 fail("Memory not writable");
2003 } else {
2004 p = (char *)XML_MemRealloc(g_parser, buffer, 512);
2005 if (p == NULL) {
2006 fail("Reallocation failed");
2007 } else {
2008 /* Write again, just to be sure */
2009 buffer = p;
2010 buffer[0] = 'V';
2011 if (strcmp(buffer, "VEST") != 0) {
2012 fail("Reallocated memory not writable");
2013 }
2014 }
2015 }
2016 XML_MemFree(g_parser, buffer);
2017 }
2018 }
2019 END_TEST
2020
2021 /* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)2022 START_TEST(test_default_current) {
2023 const char *text = "<doc>hell]</doc>";
2024 const char *entity_text = "<!DOCTYPE doc [\n"
2025 "<!ENTITY entity '%'>\n"
2026 "]>\n"
2027 "<doc>&entity;</doc>";
2028
2029 set_subtest("with defaulting");
2030 {
2031 struct handler_record_list storage;
2032 storage.count = 0;
2033 XML_SetDefaultHandler(g_parser, record_default_handler);
2034 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2035 XML_SetUserData(g_parser, &storage);
2036 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2037 == XML_STATUS_ERROR)
2038 xml_failure(g_parser);
2039 int i = 0;
2040 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
2041 // we should have gotten one or more cdata callbacks, totaling 5 chars
2042 int cdata_len_remaining = 5;
2043 while (cdata_len_remaining > 0) {
2044 const struct handler_record_entry *c_entry
2045 = handler_record_get(&storage, i++);
2046 assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
2047 assert_true(c_entry->arg > 0);
2048 assert_true(c_entry->arg <= cdata_len_remaining);
2049 cdata_len_remaining -= c_entry->arg;
2050 // default handler must follow, with the exact same len argument.
2051 assert_record_handler_called(&storage, i++, "record_default_handler",
2052 c_entry->arg);
2053 }
2054 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
2055 assert_true(storage.count == i);
2056 }
2057
2058 /* Again, without the defaulting */
2059 set_subtest("no defaulting");
2060 {
2061 struct handler_record_list storage;
2062 storage.count = 0;
2063 XML_ParserReset(g_parser, NULL);
2064 XML_SetDefaultHandler(g_parser, record_default_handler);
2065 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2066 XML_SetUserData(g_parser, &storage);
2067 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2068 == XML_STATUS_ERROR)
2069 xml_failure(g_parser);
2070 int i = 0;
2071 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
2072 // we should have gotten one or more cdata callbacks, totaling 5 chars
2073 int cdata_len_remaining = 5;
2074 while (cdata_len_remaining > 0) {
2075 const struct handler_record_entry *c_entry
2076 = handler_record_get(&storage, i++);
2077 assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
2078 assert_true(c_entry->arg > 0);
2079 assert_true(c_entry->arg <= cdata_len_remaining);
2080 cdata_len_remaining -= c_entry->arg;
2081 }
2082 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
2083 assert_true(storage.count == i);
2084 }
2085
2086 /* Now with an internal entity to complicate matters */
2087 set_subtest("with internal entity");
2088 {
2089 struct handler_record_list storage;
2090 storage.count = 0;
2091 XML_ParserReset(g_parser, NULL);
2092 XML_SetDefaultHandler(g_parser, record_default_handler);
2093 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2094 XML_SetUserData(g_parser, &storage);
2095 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2096 XML_TRUE)
2097 == XML_STATUS_ERROR)
2098 xml_failure(g_parser);
2099 /* The default handler suppresses the entity */
2100 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2101 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2102 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2103 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2104 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2105 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2106 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2107 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2108 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2109 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2110 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2111 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2112 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2113 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2114 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2115 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2116 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2117 assert_record_handler_called(&storage, 17, "record_default_handler", 8);
2118 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2119 assert_true(storage.count == 19);
2120 }
2121
2122 /* Again, with a skip handler */
2123 set_subtest("with skip handler");
2124 {
2125 struct handler_record_list storage;
2126 storage.count = 0;
2127 XML_ParserReset(g_parser, NULL);
2128 XML_SetDefaultHandler(g_parser, record_default_handler);
2129 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2130 XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
2131 XML_SetUserData(g_parser, &storage);
2132 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2133 XML_TRUE)
2134 == XML_STATUS_ERROR)
2135 xml_failure(g_parser);
2136 /* The default handler suppresses the entity */
2137 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2138 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2139 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2140 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2141 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2142 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2143 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2144 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2145 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2146 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2147 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2148 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2149 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2150 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2151 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2152 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2153 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2154 assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2155 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2156 assert_true(storage.count == 19);
2157 }
2158
2159 /* This time, allow the entity through */
2160 set_subtest("allow entity");
2161 {
2162 struct handler_record_list storage;
2163 storage.count = 0;
2164 XML_ParserReset(g_parser, NULL);
2165 XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2166 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2167 XML_SetUserData(g_parser, &storage);
2168 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2169 XML_TRUE)
2170 == XML_STATUS_ERROR)
2171 xml_failure(g_parser);
2172 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2173 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2174 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2175 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2176 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2177 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2178 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2179 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2180 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2181 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2182 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2183 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2184 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2185 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2186 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2187 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2188 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2189 assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2190 assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2191 assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2192 assert_true(storage.count == 20);
2193 }
2194
2195 /* Finally, without passing the cdata to the default handler */
2196 set_subtest("not passing cdata");
2197 {
2198 struct handler_record_list storage;
2199 storage.count = 0;
2200 XML_ParserReset(g_parser, NULL);
2201 XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2202 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2203 XML_SetUserData(g_parser, &storage);
2204 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2205 XML_TRUE)
2206 == XML_STATUS_ERROR)
2207 xml_failure(g_parser);
2208 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2209 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2210 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2211 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2212 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2213 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2214 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2215 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2216 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2217 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2218 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2219 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2220 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2221 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2222 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2223 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2224 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2225 assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2226 1);
2227 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2228 assert_true(storage.count == 19);
2229 }
2230 }
2231 END_TEST
2232
2233 /* Test DTD element parsing code paths */
START_TEST(test_dtd_elements)2234 START_TEST(test_dtd_elements) {
2235 const char *text = "<!DOCTYPE doc [\n"
2236 "<!ELEMENT doc (chapter)>\n"
2237 "<!ELEMENT chapter (#PCDATA)>\n"
2238 "]>\n"
2239 "<doc><chapter>Wombats are go</chapter></doc>";
2240
2241 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2242 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2243 == XML_STATUS_ERROR)
2244 xml_failure(g_parser);
2245 }
2246 END_TEST
2247
2248 static void XMLCALL
element_decl_check_model(void * userData,const XML_Char * name,XML_Content * model)2249 element_decl_check_model(void *userData, const XML_Char *name,
2250 XML_Content *model) {
2251 UNUSED_P(userData);
2252 uint32_t errorFlags = 0;
2253
2254 /* Expected model array structure is this:
2255 * [0] (type 6, quant 0)
2256 * [1] (type 5, quant 0)
2257 * [3] (type 4, quant 0, name "bar")
2258 * [4] (type 4, quant 0, name "foo")
2259 * [5] (type 4, quant 3, name "xyz")
2260 * [2] (type 4, quant 2, name "zebra")
2261 */
2262 errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2263 errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2264
2265 if (model != NULL) {
2266 errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2267 errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2268 errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2269 errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2270 errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2271
2272 errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2273 errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2274 errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2275 errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2276 errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2277
2278 errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2279 errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2280 errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2281 errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2282 errorFlags
2283 |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2284
2285 errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2286 errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2287 errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2288 errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2289 errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2290
2291 errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2292 errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2293 errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2294 errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2295 errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2296
2297 errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2298 errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2299 errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2300 errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2301 errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2302 }
2303
2304 XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2305 XML_FreeContentModel(g_parser, model);
2306 }
2307
START_TEST(test_dtd_elements_nesting)2308 START_TEST(test_dtd_elements_nesting) {
2309 // Payload inspired by a test in Perl's XML::Parser
2310 const char *text = "<!DOCTYPE foo [\n"
2311 "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2312 "]>\n"
2313 "<foo/>";
2314
2315 XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2316
2317 XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2318 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2319 == XML_STATUS_ERROR)
2320 xml_failure(g_parser);
2321
2322 if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2323 fail("Element declaration model regression detected");
2324 }
2325 END_TEST
2326
2327 /* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd)2328 START_TEST(test_set_foreign_dtd) {
2329 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2330 const char *text2 = "<doc>&entity;</doc>";
2331 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2332
2333 /* Check hash salt is passed through too */
2334 XML_SetHashSalt(g_parser, 0x12345678);
2335 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2336 XML_SetUserData(g_parser, &test_data);
2337 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2338 /* Add a default handler to exercise more code paths */
2339 XML_SetDefaultHandler(g_parser, dummy_default_handler);
2340 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2341 fail("Could not set foreign DTD");
2342 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2343 == XML_STATUS_ERROR)
2344 xml_failure(g_parser);
2345
2346 /* Ensure that trying to set the DTD after parsing has started
2347 * is faulted, even if it's the same setting.
2348 */
2349 if (XML_UseForeignDTD(g_parser, XML_TRUE)
2350 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2351 fail("Failed to reject late foreign DTD setting");
2352 /* Ditto for the hash salt */
2353 if (XML_SetHashSalt(g_parser, 0x23456789))
2354 fail("Failed to reject late hash salt change");
2355
2356 /* Now finish the parse */
2357 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2358 == XML_STATUS_ERROR)
2359 xml_failure(g_parser);
2360 }
2361 END_TEST
2362
2363 /* Test foreign DTD handling with a failing NotStandalone handler */
START_TEST(test_foreign_dtd_not_standalone)2364 START_TEST(test_foreign_dtd_not_standalone) {
2365 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2366 "<doc>&entity;</doc>";
2367 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2368
2369 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2370 XML_SetUserData(g_parser, &test_data);
2371 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2372 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2373 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2374 fail("Could not set foreign DTD");
2375 expect_failure(text, XML_ERROR_NOT_STANDALONE,
2376 "NotStandalonehandler failed to reject");
2377 }
2378 END_TEST
2379
2380 /* Test invalid character in a foreign DTD is faulted */
START_TEST(test_invalid_foreign_dtd)2381 START_TEST(test_invalid_foreign_dtd) {
2382 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2383 "<doc>&entity;</doc>";
2384 ExtFaults test_data
2385 = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2386
2387 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2388 XML_SetUserData(g_parser, &test_data);
2389 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2390 XML_UseForeignDTD(g_parser, XML_TRUE);
2391 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2392 "Bad DTD should not have been accepted");
2393 }
2394 END_TEST
2395
2396 /* Test foreign DTD use with a doctype */
START_TEST(test_foreign_dtd_with_doctype)2397 START_TEST(test_foreign_dtd_with_doctype) {
2398 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2399 "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2400 const char *text2 = "<doc>&entity;</doc>";
2401 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2402
2403 /* Check hash salt is passed through too */
2404 XML_SetHashSalt(g_parser, 0x12345678);
2405 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2406 XML_SetUserData(g_parser, &test_data);
2407 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2408 /* Add a default handler to exercise more code paths */
2409 XML_SetDefaultHandler(g_parser, dummy_default_handler);
2410 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2411 fail("Could not set foreign DTD");
2412 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2413 == XML_STATUS_ERROR)
2414 xml_failure(g_parser);
2415
2416 /* Ensure that trying to set the DTD after parsing has started
2417 * is faulted, even if it's the same setting.
2418 */
2419 if (XML_UseForeignDTD(g_parser, XML_TRUE)
2420 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2421 fail("Failed to reject late foreign DTD setting");
2422 /* Ditto for the hash salt */
2423 if (XML_SetHashSalt(g_parser, 0x23456789))
2424 fail("Failed to reject late hash salt change");
2425
2426 /* Now finish the parse */
2427 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2428 == XML_STATUS_ERROR)
2429 xml_failure(g_parser);
2430 }
2431 END_TEST
2432
2433 /* Test XML_UseForeignDTD with no external subset present */
START_TEST(test_foreign_dtd_without_external_subset)2434 START_TEST(test_foreign_dtd_without_external_subset) {
2435 const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2436 "<doc>&foo;</doc>";
2437
2438 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2439 XML_SetUserData(g_parser, NULL);
2440 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2441 XML_UseForeignDTD(g_parser, XML_TRUE);
2442 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2443 == XML_STATUS_ERROR)
2444 xml_failure(g_parser);
2445 }
2446 END_TEST
2447
START_TEST(test_empty_foreign_dtd)2448 START_TEST(test_empty_foreign_dtd) {
2449 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2450 "<doc>&entity;</doc>";
2451
2452 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2453 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2454 XML_UseForeignDTD(g_parser, XML_TRUE);
2455 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2456 "Undefined entity not faulted");
2457 }
2458 END_TEST
2459
2460 /* Test XML Base is set and unset appropriately */
START_TEST(test_set_base)2461 START_TEST(test_set_base) {
2462 const XML_Char *old_base;
2463 const XML_Char *new_base = XCS("/local/file/name.xml");
2464
2465 old_base = XML_GetBase(g_parser);
2466 if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2467 fail("Unable to set base");
2468 if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2469 fail("Base setting not correct");
2470 if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2471 fail("Unable to NULL base");
2472 if (XML_GetBase(g_parser) != NULL)
2473 fail("Base setting not nulled");
2474 XML_SetBase(g_parser, old_base);
2475 }
2476 END_TEST
2477
2478 /* Test attribute counts, indexing, etc */
START_TEST(test_attributes)2479 START_TEST(test_attributes) {
2480 const char *text = "<!DOCTYPE doc [\n"
2481 "<!ELEMENT doc (tag)>\n"
2482 "<!ATTLIST doc id ID #REQUIRED>\n"
2483 "]>"
2484 "<doc a='1' id='one' b='2'>"
2485 "<tag c='3'/>"
2486 "</doc>";
2487 AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2488 {XCS("b"), XCS("2")},
2489 {XCS("id"), XCS("one")},
2490 {NULL, NULL}};
2491 AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2492 ElementInfo info[] = {{XCS("doc"), 3, 0, XCS("id"), doc_info},
2493 {XCS("tag"), 1, 0, NULL, tag_info},
2494 {NULL, 0, 0, NULL, NULL}};
2495
2496 XML_Parser parser = XML_ParserCreate(NULL);
2497 assert_true(parser != NULL);
2498 ParserAndElementInfo parserAndElementInfos = {
2499 parser,
2500 info,
2501 };
2502
2503 XML_SetStartElementHandler(parser, counting_start_element_handler);
2504 XML_SetUserData(parser, &parserAndElementInfos);
2505 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2506 == XML_STATUS_ERROR)
2507 xml_failure(parser);
2508
2509 XML_ParserFree(parser);
2510 }
2511 END_TEST
2512
START_TEST(test_duplicate_cdata_attribute)2513 START_TEST(test_duplicate_cdata_attribute) {
2514 /*
2515 https://www.w3.org/TR/xml/#attdecls
2516
2517 Test the following statement from the linked specification:
2518 When more than one definition is provided for the same attribute of a given
2519 element type, the first declaration is binding and later declarations are
2520 ignored.
2521 */
2522
2523 const char *text
2524 = "<!DOCTYPE doc [\n"
2525 " <!ATTLIST doc attribute CDATA 'expected' attribute CDATA 'ignored'>\n"
2526 "]>\n"
2527 "<doc/>\n";
2528 AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected")}, {NULL, NULL}};
2529 ElementInfo info[]
2530 = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
2531
2532 XML_Parser parser = XML_ParserCreate(NULL);
2533 assert_true(parser != NULL);
2534
2535 ParserAndElementInfo parserAndElementInfos = {
2536 parser,
2537 info,
2538 };
2539
2540 XML_SetStartElementHandler(parser, counting_start_element_handler);
2541 XML_SetUserData(parser, &parserAndElementInfos);
2542
2543 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2544 != XML_STATUS_OK)
2545 xml_failure(parser);
2546
2547 XML_ParserFree(parser);
2548 }
2549 END_TEST
2550
START_TEST(test_duplicate_id_attribute_1)2551 START_TEST(test_duplicate_id_attribute_1) {
2552 /*
2553 https://www.w3.org/TR/xml/#attdecls
2554
2555 Test the following statement from the linked specification:
2556 When more than one definition is provided for the same attribute of a given
2557 element type, the first declaration is binding and later declarations are
2558 ignored.
2559 */
2560
2561 const char *text
2562 = "<!DOCTYPE doc [\n"
2563 " <!ATTLIST doc identifier CDATA 'expected' identifier ID #REQUIRED>\n"
2564 "]>\n"
2565 "<doc/>\n";
2566 AttrInfo doc_info[] = {{XCS("identifier"), XCS("expected")}, {NULL, NULL}};
2567 ElementInfo info[]
2568 = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
2569
2570 XML_Parser parser = XML_ParserCreate(NULL);
2571 assert_true(parser != NULL);
2572
2573 ParserAndElementInfo parserAndElementInfos = {
2574 parser,
2575 info,
2576 };
2577
2578 XML_SetStartElementHandler(parser, counting_start_element_handler);
2579 XML_SetUserData(parser, &parserAndElementInfos);
2580
2581 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2582 != XML_STATUS_OK)
2583 xml_failure(parser);
2584
2585 XML_ParserFree(parser);
2586 }
2587 END_TEST
2588
START_TEST(test_duplicate_id_attribute_2)2589 START_TEST(test_duplicate_id_attribute_2) {
2590 /*
2591 https://www.w3.org/TR/xml/#attdecls
2592
2593 Test the following statement from the linked specification:
2594 When more than one definition is provided for the same attribute of a given
2595 element type, the first declaration is binding and later declarations are
2596 ignored.
2597 */
2598
2599 const char *text
2600 = "<!DOCTYPE doc [\n"
2601 " <!ATTLIST doc identifier ID #REQUIRED identifier CDATA 'unexpected'>\n"
2602 "]>\n"
2603 "<doc/>\n";
2604 AttrInfo doc_info[] = {{NULL, NULL}};
2605
2606 ElementInfo info[]
2607 = {{XCS("doc"), 0, 0, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
2608
2609 XML_Parser parser = XML_ParserCreate(NULL);
2610 assert_true(parser != NULL);
2611
2612 ParserAndElementInfo parserAndElementInfos = {
2613 parser,
2614 info,
2615 };
2616
2617 XML_SetStartElementHandler(parser, counting_start_element_handler);
2618 XML_SetUserData(parser, &parserAndElementInfos);
2619
2620 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2621 != XML_STATUS_OK)
2622 xml_failure(parser);
2623
2624 XML_ParserFree(parser);
2625 }
2626 END_TEST
2627
START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl)2628 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl) {
2629 /*
2630 https://www.w3.org/TR/xml/#attdecls
2631
2632 Test the following statement from the linked specification:
2633 When more than one AttlistDecl is provided for a given element type,
2634 the contents of all those provided are merged.
2635 */
2636 const char *text = "<!DOCTYPE doc [\n"
2637 " <!ATTLIST doc attribute CDATA 'expected'>\n"
2638 " <!ATTLIST doc attribute CDATA 'ignored'>\n"
2639 "]>\n"
2640 "<doc/>\n";
2641 AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected")}, {NULL, NULL}};
2642 ElementInfo info[]
2643 = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
2644
2645 XML_Parser parser = XML_ParserCreate(NULL);
2646 assert_true(parser != NULL);
2647
2648 ParserAndElementInfo parserAndElementInfos = {
2649 parser,
2650 info,
2651 };
2652
2653 XML_SetStartElementHandler(parser, counting_start_element_handler);
2654 XML_SetUserData(parser, &parserAndElementInfos);
2655
2656 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2657 != XML_STATUS_OK)
2658 xml_failure(parser);
2659
2660 XML_ParserFree(parser);
2661 }
2662 END_TEST
2663
START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_2)2664 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_2) {
2665 /*
2666 https://www.w3.org/TR/xml/#attdecls
2667
2668 Test the following statement from the linked specification:
2669 When more than one AttlistDecl is provided for a given element type,
2670 the contents of all those provided are merged.
2671 */
2672 const char *text = "<!DOCTYPE doc [\n"
2673 " <!ATTLIST doc attribute CDATA 'expected_doc'>\n"
2674 " <!ATTLIST tag attribute CDATA 'expected_tag'>\n"
2675 " <!ATTLIST doc attribute CDATA 'ignored_doc'>\n"
2676 "]>\n"
2677 "<doc><tag></tag></doc>\n";
2678 AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected_doc")}, {NULL, NULL}};
2679 AttrInfo tag_info[] = {{XCS("attribute"), XCS("expected_tag")}, {NULL, NULL}};
2680 ElementInfo info[] = {{XCS("doc"), 0, 1, NULL, doc_info},
2681 {XCS("tag"), 0, 1, NULL, tag_info},
2682 {NULL, 0, 0, NULL, NULL}};
2683
2684 XML_Parser parser = XML_ParserCreate(NULL);
2685 assert_true(parser != NULL);
2686
2687 ParserAndElementInfo parserAndElementInfos = {
2688 parser,
2689 info,
2690 };
2691
2692 XML_SetStartElementHandler(parser, counting_start_element_handler);
2693 XML_SetUserData(parser, &parserAndElementInfos);
2694
2695 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2696 != XML_STATUS_OK)
2697 xml_failure(parser);
2698
2699 XML_ParserFree(parser);
2700 }
2701 END_TEST
2702
START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_3)2703 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_3) {
2704 /*
2705 https://www.w3.org/TR/xml/#attdecls
2706
2707 Test the following statement from the linked specification:
2708 When more than one AttlistDecl is provided for a given element type,
2709 the contents of all those provided are merged.
2710 */
2711 const char *text
2712 = "<!DOCTYPE doc [\n"
2713 " <!ATTLIST doc attribute CDATA 'expected_doc'>\n"
2714 " <!ATTLIST tag attribute CDATA 'expected_tag'>\n"
2715 " <!ATTLIST doc second_attribute CDATA 'second_expected_doc' attribute CDATA 'ignored_doc'>\n"
2716 "]>\n"
2717 "<doc><tag></tag></doc>\n";
2718 AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected_doc")},
2719 {XCS("second_attribute"), XCS("second_expected_doc")},
2720 {NULL, NULL}};
2721 AttrInfo tag_info[] = {{XCS("attribute"), XCS("expected_tag")}, {NULL, NULL}};
2722 ElementInfo info[] = {{XCS("doc"), 0, 2, NULL, doc_info},
2723 {XCS("tag"), 0, 1, NULL, tag_info},
2724 {NULL, 0, 0, NULL, NULL}};
2725
2726 XML_Parser parser = XML_ParserCreate(NULL);
2727 assert_true(parser != NULL);
2728
2729 ParserAndElementInfo parserAndElementInfos = {
2730 parser,
2731 info,
2732 };
2733
2734 XML_SetStartElementHandler(parser, counting_start_element_handler);
2735 XML_SetUserData(parser, &parserAndElementInfos);
2736
2737 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2738 != XML_STATUS_OK)
2739 xml_failure(parser);
2740
2741 XML_ParserFree(parser);
2742 }
2743 END_TEST
2744
START_TEST(test_duplicate_id_attribute_multiple_attlistdecl)2745 START_TEST(test_duplicate_id_attribute_multiple_attlistdecl) {
2746 /*
2747 https://www.w3.org/TR/xml/#attdecls
2748
2749 Test the following statement from the linked specification:
2750 When more than one AttlistDecl is provided for a given element type,
2751 the contents of all those provided are merged.
2752 */
2753 const char *text = "<!DOCTYPE doc [\n"
2754 " <!ATTLIST doc identifier ID #REQUIRED>\n"
2755 " <!ATTLIST tag identifier CDATA 'identifier_tag'>\n"
2756 " <!ATTLIST doc identifier CDATA 'ignored'>\n"
2757 "]>\n"
2758 "<doc identifier='doc_identity'><tag></tag></doc>\n";
2759 AttrInfo doc_info[]
2760 = {{XCS("identifier"), XCS("doc_identity")}, {NULL, NULL}};
2761 AttrInfo tag_info[]
2762 = {{XCS("identifier"), XCS("identifier_tag")}, {NULL, NULL}};
2763 ElementInfo info[] = {{XCS("doc"), 1, 0, XCS("identifier"), doc_info},
2764 {XCS("tag"), 0, 1, NULL, tag_info},
2765 {NULL, 0, 0, NULL, NULL}};
2766
2767 XML_Parser parser = XML_ParserCreate(NULL);
2768 assert_true(parser != NULL);
2769
2770 ParserAndElementInfo parserAndElementInfos = {
2771 parser,
2772 info,
2773 };
2774
2775 XML_SetStartElementHandler(parser, counting_start_element_handler);
2776 XML_SetUserData(parser, &parserAndElementInfos);
2777
2778 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2779 != XML_STATUS_OK)
2780 xml_failure(parser);
2781
2782 XML_ParserFree(parser);
2783 }
2784 END_TEST
2785
2786 /* Test reset works correctly in the middle of processing an internal
2787 * entity. Exercises some obscure code in XML_ParserReset().
2788 */
START_TEST(test_reset_in_entity)2789 START_TEST(test_reset_in_entity) {
2790 if (g_chunkSize != 0) {
2791 // this test does not use SINGLE_BYTES, because of suspension
2792 return;
2793 }
2794
2795 const char *text = "<!DOCTYPE doc [\n"
2796 "<!ENTITY wombat 'wom'>\n"
2797 "<!ENTITY entity 'hi &wom; there'>\n"
2798 "]>\n"
2799 "<doc>&entity;</doc>";
2800 XML_ParsingStatus status;
2801
2802 g_resumable = XML_TRUE;
2803 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2804 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
2805 // we won't know exactly how much input we actually managed to give Expat.
2806 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2807 == XML_STATUS_ERROR)
2808 xml_failure(g_parser);
2809 XML_GetParsingStatus(g_parser, &status);
2810 if (status.parsing != XML_SUSPENDED)
2811 fail("Parsing status not SUSPENDED");
2812 XML_ParserReset(g_parser, NULL);
2813 XML_GetParsingStatus(g_parser, &status);
2814 if (status.parsing != XML_INITIALIZED)
2815 fail("Parsing status doesn't reset to INITIALIZED");
2816 }
2817 END_TEST
2818
2819 /* Test that resume correctly passes through parse errors */
START_TEST(test_resume_invalid_parse)2820 START_TEST(test_resume_invalid_parse) {
2821 const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2822
2823 g_resumable = XML_TRUE;
2824 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2825 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2826 == XML_STATUS_ERROR)
2827 xml_failure(g_parser);
2828 if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2829 fail("Resumed invalid parse not faulted");
2830 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2831 fail("Invalid parse not correctly faulted");
2832 }
2833 END_TEST
2834
2835 /* Test that re-suspended parses are correctly passed through */
START_TEST(test_resume_resuspended)2836 START_TEST(test_resume_resuspended) {
2837 const char *text = "<doc>Hello<meep/>world</doc>";
2838
2839 g_resumable = XML_TRUE;
2840 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2841 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2842 == XML_STATUS_ERROR)
2843 xml_failure(g_parser);
2844 g_resumable = XML_TRUE;
2845 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2846 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2847 fail("Resumption not suspended");
2848 /* This one should succeed and finish up */
2849 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2850 xml_failure(g_parser);
2851 }
2852 END_TEST
2853
2854 /* Test that CDATA shows up correctly through a default handler */
START_TEST(test_cdata_default)2855 START_TEST(test_cdata_default) {
2856 const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2857 const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2858 CharData storage;
2859
2860 CharData_Init(&storage);
2861 XML_SetUserData(g_parser, &storage);
2862 XML_SetDefaultHandler(g_parser, accumulate_characters);
2863
2864 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2865 == XML_STATUS_ERROR)
2866 xml_failure(g_parser);
2867 CharData_CheckXMLChars(&storage, expected);
2868 }
2869 END_TEST
2870
2871 /* Test resetting a subordinate parser does exactly nothing */
START_TEST(test_subordinate_reset)2872 START_TEST(test_subordinate_reset) {
2873 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2874 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2875 "<doc>&entity;</doc>";
2876
2877 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2878 XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2879 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2880 == XML_STATUS_ERROR)
2881 xml_failure(g_parser);
2882 }
2883 END_TEST
2884
2885 /* Test suspending a subordinate parser */
START_TEST(test_subordinate_suspend)2886 START_TEST(test_subordinate_suspend) {
2887 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2888 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2889 "<doc>&entity;</doc>";
2890
2891 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2892 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2893 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2894 == XML_STATUS_ERROR)
2895 xml_failure(g_parser);
2896 }
2897 END_TEST
2898
2899 /* Test suspending a subordinate parser from an XML declaration */
2900 /* Increases code coverage of the tests */
2901
START_TEST(test_subordinate_xdecl_suspend)2902 START_TEST(test_subordinate_xdecl_suspend) {
2903 const char *text
2904 = "<!DOCTYPE doc [\n"
2905 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2906 "]>\n"
2907 "<doc>&entity;</doc>";
2908
2909 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2910 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2911 g_resumable = XML_TRUE;
2912 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2913 == XML_STATUS_ERROR)
2914 xml_failure(g_parser);
2915 }
2916 END_TEST
2917
START_TEST(test_subordinate_xdecl_abort)2918 START_TEST(test_subordinate_xdecl_abort) {
2919 const char *text
2920 = "<!DOCTYPE doc [\n"
2921 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2922 "]>\n"
2923 "<doc>&entity;</doc>";
2924
2925 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2926 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2927 g_resumable = XML_FALSE;
2928 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2929 == XML_STATUS_ERROR)
2930 xml_failure(g_parser);
2931 }
2932 END_TEST
2933
2934 /* Test external entity fault handling with suspension */
START_TEST(test_ext_entity_invalid_suspended_parse)2935 START_TEST(test_ext_entity_invalid_suspended_parse) {
2936 const char *text = "<!DOCTYPE doc [\n"
2937 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2938 "]>\n"
2939 "<doc>&en;</doc>";
2940 ExtFaults faults[]
2941 = {{"<?xml version='1.0' encoding='us-ascii'?><",
2942 "Incomplete element declaration not faulted", NULL,
2943 XML_ERROR_UNCLOSED_TOKEN},
2944 {/* First two bytes of a three-byte char */
2945 "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2946 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2947 {NULL, NULL, NULL, XML_ERROR_NONE}};
2948 ExtFaults *fault;
2949
2950 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2951 set_subtest("%s", fault->parse_text);
2952 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2953 XML_SetExternalEntityRefHandler(g_parser,
2954 external_entity_suspending_faulter);
2955 XML_SetUserData(g_parser, fault);
2956 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2957 "Parser did not report external entity error");
2958 XML_ParserReset(g_parser, NULL);
2959 }
2960 }
2961 END_TEST
2962
2963 /* Test setting an explicit encoding */
START_TEST(test_explicit_encoding)2964 START_TEST(test_explicit_encoding) {
2965 const char *text1 = "<doc>Hello ";
2966 const char *text2 = " World</doc>";
2967
2968 /* Just check that we can set the encoding to NULL before starting */
2969 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2970 fail("Failed to initialise encoding to NULL");
2971 /* Say we are UTF-8 */
2972 if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2973 fail("Failed to set explicit encoding");
2974 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2975 == XML_STATUS_ERROR)
2976 xml_failure(g_parser);
2977 /* Try to switch encodings mid-parse */
2978 if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2979 fail("Allowed encoding change");
2980 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2981 == XML_STATUS_ERROR)
2982 xml_failure(g_parser);
2983 /* Try now the parse is over */
2984 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2985 fail("Failed to unset encoding");
2986 }
2987 END_TEST
2988
2989 /* Test handling of trailing CR (rather than newline) */
START_TEST(test_trailing_cr)2990 START_TEST(test_trailing_cr) {
2991 const char *text = "<doc>\r";
2992 int found_cr;
2993
2994 /* Try with a character handler, for code coverage */
2995 XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2996 XML_SetUserData(g_parser, &found_cr);
2997 found_cr = 0;
2998 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2999 == XML_STATUS_OK)
3000 fail("Failed to fault unclosed doc");
3001 if (found_cr == 0)
3002 fail("Did not catch the carriage return");
3003 XML_ParserReset(g_parser, NULL);
3004
3005 /* Now with a default handler instead */
3006 XML_SetDefaultHandler(g_parser, cr_cdata_handler);
3007 XML_SetUserData(g_parser, &found_cr);
3008 found_cr = 0;
3009 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3010 == XML_STATUS_OK)
3011 fail("Failed to fault unclosed doc");
3012 if (found_cr == 0)
3013 fail("Did not catch default carriage return");
3014 }
3015 END_TEST
3016
3017 /* Test trailing CR in an external entity parse */
START_TEST(test_ext_entity_trailing_cr)3018 START_TEST(test_ext_entity_trailing_cr) {
3019 const char *text = "<!DOCTYPE doc [\n"
3020 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
3021 "]>\n"
3022 "<doc>&en;</doc>";
3023 int found_cr;
3024
3025 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3026 XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
3027 XML_SetUserData(g_parser, &found_cr);
3028 found_cr = 0;
3029 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3030 != XML_STATUS_OK)
3031 xml_failure(g_parser);
3032 if (found_cr == 0)
3033 fail("No carriage return found");
3034 XML_ParserReset(g_parser, NULL);
3035
3036 /* Try again with a different trailing CR */
3037 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3038 XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
3039 XML_SetUserData(g_parser, &found_cr);
3040 found_cr = 0;
3041 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3042 != XML_STATUS_OK)
3043 xml_failure(g_parser);
3044 if (found_cr == 0)
3045 fail("No carriage return found");
3046 }
3047 END_TEST
3048
3049 /* Test handling of trailing square bracket */
START_TEST(test_trailing_rsqb)3050 START_TEST(test_trailing_rsqb) {
3051 const char *text8 = "<doc>]";
3052 const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
3053 int found_rsqb;
3054 int text8_len = (int)strlen(text8);
3055
3056 XML_SetCharacterDataHandler(g_parser, rsqb_handler);
3057 XML_SetUserData(g_parser, &found_rsqb);
3058 found_rsqb = 0;
3059 if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
3060 == XML_STATUS_OK)
3061 fail("Failed to fault unclosed doc");
3062 if (found_rsqb == 0)
3063 fail("Did not catch the right square bracket");
3064
3065 /* Try again with a different encoding */
3066 XML_ParserReset(g_parser, NULL);
3067 XML_SetCharacterDataHandler(g_parser, rsqb_handler);
3068 XML_SetUserData(g_parser, &found_rsqb);
3069 found_rsqb = 0;
3070 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
3071 XML_TRUE)
3072 == XML_STATUS_OK)
3073 fail("Failed to fault unclosed doc");
3074 if (found_rsqb == 0)
3075 fail("Did not catch the right square bracket");
3076
3077 /* And finally with a default handler */
3078 XML_ParserReset(g_parser, NULL);
3079 XML_SetDefaultHandler(g_parser, rsqb_handler);
3080 XML_SetUserData(g_parser, &found_rsqb);
3081 found_rsqb = 0;
3082 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
3083 XML_TRUE)
3084 == XML_STATUS_OK)
3085 fail("Failed to fault unclosed doc");
3086 if (found_rsqb == 0)
3087 fail("Did not catch the right square bracket");
3088 }
3089 END_TEST
3090
3091 /* Test trailing right square bracket in an external entity parse */
START_TEST(test_ext_entity_trailing_rsqb)3092 START_TEST(test_ext_entity_trailing_rsqb) {
3093 const char *text = "<!DOCTYPE doc [\n"
3094 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
3095 "]>\n"
3096 "<doc>&en;</doc>";
3097 int found_rsqb;
3098
3099 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3100 XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
3101 XML_SetUserData(g_parser, &found_rsqb);
3102 found_rsqb = 0;
3103 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3104 != XML_STATUS_OK)
3105 xml_failure(g_parser);
3106 if (found_rsqb == 0)
3107 fail("No right square bracket found");
3108 }
3109 END_TEST
3110
3111 /* Test CDATA handling in an external entity */
START_TEST(test_ext_entity_good_cdata)3112 START_TEST(test_ext_entity_good_cdata) {
3113 const char *text = "<!DOCTYPE doc [\n"
3114 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
3115 "]>\n"
3116 "<doc>&en;</doc>";
3117
3118 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3119 XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
3120 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3121 != XML_STATUS_OK)
3122 xml_failure(g_parser);
3123 }
3124 END_TEST
3125
3126 /* Test user parameter settings */
START_TEST(test_user_parameters)3127 START_TEST(test_user_parameters) {
3128 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
3129 "<!-- Primary parse -->\n"
3130 "<!DOCTYPE doc SYSTEM 'foo'>\n"
3131 "<doc>&entity;";
3132 const char *epilog = "<!-- Back to primary parser -->\n"
3133 "</doc>";
3134
3135 g_comment_count = 0;
3136 g_skip_count = 0;
3137 g_xdecl_count = 0;
3138 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3139 XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
3140 XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
3141 XML_SetCommentHandler(g_parser, data_check_comment_handler);
3142 XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
3143 XML_UseParserAsHandlerArg(g_parser);
3144 XML_SetUserData(g_parser, (void *)1);
3145 g_handler_data = g_parser;
3146 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3147 == XML_STATUS_ERROR)
3148 xml_failure(g_parser);
3149 /* Ensure we can't change policy mid-parse */
3150 if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
3151 fail("Changed param entity parsing policy while parsing");
3152 if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
3153 == XML_STATUS_ERROR)
3154 xml_failure(g_parser);
3155 if (g_comment_count != 3)
3156 fail("Comment handler not invoked enough times");
3157 if (g_skip_count != 1)
3158 fail("Skip handler not invoked enough times");
3159 if (g_xdecl_count != 1)
3160 fail("XML declaration handler not invoked");
3161 }
3162 END_TEST
3163
3164 /* Test that an explicit external entity handler argument replaces
3165 * the parser as the first argument.
3166 *
3167 * We do not call the first parameter to the external entity handler
3168 * 'parser' for once, since the first time the handler is called it
3169 * will actually be a text string. We need to be able to access the
3170 * global 'parser' variable to create our external entity parser from,
3171 * since there are code paths we need to ensure get executed.
3172 */
START_TEST(test_ext_entity_ref_parameter)3173 START_TEST(test_ext_entity_ref_parameter) {
3174 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
3175 "<!DOCTYPE doc SYSTEM 'foo'>\n"
3176 "<doc>&entity;</doc>";
3177
3178 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3179 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
3180 /* Set a handler arg that is not NULL and not parser (which is
3181 * what NULL would cause to be passed.
3182 */
3183 XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
3184 g_handler_data = text;
3185 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3186 == XML_STATUS_ERROR)
3187 xml_failure(g_parser);
3188
3189 /* Now try again with unset args */
3190 XML_ParserReset(g_parser, NULL);
3191 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3192 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
3193 XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
3194 g_handler_data = g_parser;
3195 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3196 == XML_STATUS_ERROR)
3197 xml_failure(g_parser);
3198 }
3199 END_TEST
3200
3201 /* Test the parsing of an empty string */
START_TEST(test_empty_parse)3202 START_TEST(test_empty_parse) {
3203 const char *text = "<doc></doc>";
3204 const char *partial = "<doc>";
3205
3206 if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
3207 fail("Parsing empty string faulted");
3208 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3209 fail("Parsing final empty string not faulted");
3210 if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
3211 fail("Parsing final empty string faulted for wrong reason");
3212
3213 /* Now try with valid text before the empty end */
3214 XML_ParserReset(g_parser, NULL);
3215 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3216 == XML_STATUS_ERROR)
3217 xml_failure(g_parser);
3218 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
3219 fail("Parsing final empty string faulted");
3220
3221 /* Now try with invalid text before the empty end */
3222 XML_ParserReset(g_parser, NULL);
3223 if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
3224 XML_FALSE)
3225 == XML_STATUS_ERROR)
3226 xml_failure(g_parser);
3227 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3228 fail("Parsing final incomplete empty string not faulted");
3229 }
3230 END_TEST
3231
3232 /* Test XML_Parse for len < 0 */
START_TEST(test_negative_len_parse)3233 START_TEST(test_negative_len_parse) {
3234 const char *const doc = "<root/>";
3235 for (int isFinal = 0; isFinal < 2; isFinal++) {
3236 set_subtest("isFinal=%d", isFinal);
3237
3238 XML_Parser parser = XML_ParserCreate(NULL);
3239
3240 if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
3241 fail("There was not supposed to be any initial parse error.");
3242
3243 const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
3244
3245 if (status != XML_STATUS_ERROR)
3246 fail("Negative len was expected to fail the parse but did not.");
3247
3248 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
3249 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
3250
3251 XML_ParserFree(parser);
3252 }
3253 }
3254 END_TEST
3255
3256 /* Test XML_ParseBuffer for len < 0 */
START_TEST(test_negative_len_parse_buffer)3257 START_TEST(test_negative_len_parse_buffer) {
3258 const char *const doc = "<root/>";
3259 for (int isFinal = 0; isFinal < 2; isFinal++) {
3260 set_subtest("isFinal=%d", isFinal);
3261
3262 XML_Parser parser = XML_ParserCreate(NULL);
3263
3264 if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
3265 fail("There was not supposed to be any initial parse error.");
3266
3267 void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
3268
3269 if (buffer == NULL)
3270 fail("XML_GetBuffer failed.");
3271
3272 memcpy(buffer, doc, strlen(doc));
3273
3274 const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
3275
3276 if (status != XML_STATUS_ERROR)
3277 fail("Negative len was expected to fail the parse but did not.");
3278
3279 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
3280 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
3281
3282 XML_ParserFree(parser);
3283 }
3284 }
3285 END_TEST
3286
3287 /* Test odd corners of the XML_GetBuffer interface */
3288 static enum XML_Status
get_feature(enum XML_FeatureEnum feature_id,long * presult)3289 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
3290 const XML_Feature *feature = XML_GetFeatureList();
3291
3292 if (feature == NULL)
3293 return XML_STATUS_ERROR;
3294 for (; feature->feature != XML_FEATURE_END; feature++) {
3295 if (feature->feature == feature_id) {
3296 *presult = feature->value;
3297 return XML_STATUS_OK;
3298 }
3299 }
3300 return XML_STATUS_ERROR;
3301 }
3302
3303 /* Test odd corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_1)3304 START_TEST(test_get_buffer_1) {
3305 const char *text = get_buffer_test_text;
3306 long context_bytes;
3307
3308 /* Attempt to allocate a negative length buffer */
3309 if (XML_GetBuffer(g_parser, -12) != NULL)
3310 fail("Negative length buffer not failed");
3311
3312 /* Now get a small buffer and extend it past valid length */
3313 void *const buffer = XML_GetBuffer(g_parser, 1536);
3314 if (buffer == NULL)
3315 fail("1.5K buffer failed");
3316 assert(buffer != NULL);
3317 memcpy(buffer, text, strlen(text));
3318 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
3319 == XML_STATUS_ERROR)
3320 xml_failure(g_parser);
3321 if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
3322 fail("INT_MAX buffer not failed");
3323
3324 /* Now try extending it a more reasonable but still too large
3325 * amount. The allocator in XML_GetBuffer() doubles the buffer
3326 * size until it exceeds the requested amount or INT_MAX. If it
3327 * exceeds INT_MAX, it rejects the request, so we want a request
3328 * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable,
3329 * with an extra byte just to ensure that the request is off any
3330 * boundary. The request will be inflated internally by
3331 * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
3332 * request.
3333 */
3334 if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
3335 context_bytes = 0;
3336 if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
3337 fail("INT_MAX- buffer not failed");
3338
3339 /* Now try extending it a carefully crafted amount */
3340 if (XML_GetBuffer(g_parser, 1000) == NULL)
3341 fail("1000 buffer failed");
3342 }
3343 END_TEST
3344
3345 /* Test more corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_2)3346 START_TEST(test_get_buffer_2) {
3347 const char *text = get_buffer_test_text;
3348
3349 /* Now get a decent buffer */
3350 void *const buffer = XML_GetBuffer(g_parser, 1536);
3351 if (buffer == NULL)
3352 fail("1.5K buffer failed");
3353 assert(buffer != NULL);
3354 memcpy(buffer, text, strlen(text));
3355 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
3356 == XML_STATUS_ERROR)
3357 xml_failure(g_parser);
3358
3359 /* Extend it, to catch a different code path */
3360 if (XML_GetBuffer(g_parser, 1024) == NULL)
3361 fail("1024 buffer failed");
3362 }
3363 END_TEST
3364
3365 /* Test for signed integer overflow CVE-2022-23852 */
3366 #if XML_CONTEXT_BYTES > 0
START_TEST(test_get_buffer_3_overflow)3367 START_TEST(test_get_buffer_3_overflow) {
3368 XML_Parser parser = XML_ParserCreate(NULL);
3369 assert(parser != NULL);
3370
3371 const char *const text = "\n";
3372 const int expectedKeepValue = (int)strlen(text);
3373
3374 // After this call, variable "keep" in XML_GetBuffer will
3375 // have value expectedKeepValue
3376 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
3377 XML_FALSE /* isFinal */)
3378 == XML_STATUS_ERROR)
3379 xml_failure(parser);
3380
3381 assert(expectedKeepValue > 0);
3382 if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
3383 fail("enlarging buffer not failed");
3384
3385 XML_ParserFree(parser);
3386 }
3387 END_TEST
3388 #endif // XML_CONTEXT_BYTES > 0
3389
START_TEST(test_buffer_can_grow_to_max)3390 START_TEST(test_buffer_can_grow_to_max) {
3391 const char *const prefixes[] = {
3392 "",
3393 "<",
3394 "<x a='",
3395 "<doc><x a='",
3396 "<document><x a='",
3397 "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
3398 "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
3399 "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
3400 "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
3401 "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
3402 const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
3403 int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
3404 #if defined(__MINGW32__) && ! defined(__MINGW64__)
3405 // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
3406 // Can we make a big allocation?
3407 for (int i = 1; i <= 2; i++) {
3408 void *const big = malloc(maxbuf);
3409 if (big != NULL) {
3410 free(big);
3411 break;
3412 }
3413 // The big allocation failed. Let's be a little lenient.
3414 maxbuf = maxbuf / 2;
3415 fprintf(stderr, "Reducing maxbuf to %d...\n", maxbuf);
3416 }
3417 #endif
3418
3419 for (int i = 0; i < num_prefixes; ++i) {
3420 set_subtest("\"%s\"", prefixes[i]);
3421 XML_Parser parser = XML_ParserCreate(NULL);
3422 #if XML_GE == 1
3423 assert_true(XML_SetAllocTrackerActivationThreshold(parser, (size_t)-1)
3424 == XML_TRUE); // i.e. deactivate
3425 #endif
3426 const int prefix_len = (int)strlen(prefixes[i]);
3427 const enum XML_Status s
3428 = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
3429 if (s != XML_STATUS_OK)
3430 xml_failure(parser);
3431
3432 // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
3433 // subtracting the whole prefix is easiest, and close enough.
3434 assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
3435 // The limit should be consistent; no prefix should allow us to
3436 // reach above the max buffer size.
3437 assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
3438 XML_ParserFree(parser);
3439 }
3440 }
3441 END_TEST
3442
START_TEST(test_getbuffer_allocates_on_zero_len)3443 START_TEST(test_getbuffer_allocates_on_zero_len) {
3444 for (int first_len = 1; first_len >= 0; first_len--) {
3445 set_subtest("with len=%d first", first_len);
3446 XML_Parser parser = XML_ParserCreate(NULL);
3447 assert_true(parser != NULL);
3448 assert_true(XML_GetBuffer(parser, first_len) != NULL);
3449 assert_true(XML_GetBuffer(parser, 0) != NULL);
3450 if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
3451 xml_failure(parser);
3452 XML_ParserFree(parser);
3453 }
3454 }
3455 END_TEST
3456
3457 /* Test position information macros */
START_TEST(test_byte_info_at_end)3458 START_TEST(test_byte_info_at_end) {
3459 const char *text = "<doc></doc>";
3460
3461 if (XML_GetCurrentByteIndex(g_parser) != -1
3462 || XML_GetCurrentByteCount(g_parser) != 0)
3463 fail("Byte index/count incorrect at start of parse");
3464 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3465 == XML_STATUS_ERROR)
3466 xml_failure(g_parser);
3467 /* At end, the count will be zero and the index the end of string */
3468 if (XML_GetCurrentByteCount(g_parser) != 0)
3469 fail("Terminal byte count incorrect");
3470 if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
3471 fail("Terminal byte index incorrect");
3472 }
3473 END_TEST
3474
3475 /* Test position information from errors */
3476 #define PRE_ERROR_STR "<doc></"
3477 #define POST_ERROR_STR "wombat></doc>"
START_TEST(test_byte_info_at_error)3478 START_TEST(test_byte_info_at_error) {
3479 const char *text = PRE_ERROR_STR POST_ERROR_STR;
3480
3481 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3482 == XML_STATUS_OK)
3483 fail("Syntax error not faulted");
3484 if (XML_GetCurrentByteCount(g_parser) != 0)
3485 fail("Error byte count incorrect");
3486 if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3487 fail("Error byte index incorrect");
3488 }
3489 END_TEST
3490 #undef PRE_ERROR_STR
3491 #undef POST_ERROR_STR
3492
3493 /* Test position information in handler */
3494 #define START_ELEMENT "<e>"
3495 #define CDATA_TEXT "Hello"
3496 #define END_ELEMENT "</e>"
START_TEST(test_byte_info_at_cdata)3497 START_TEST(test_byte_info_at_cdata) {
3498 const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3499 int offset, size;
3500 ByteTestData data;
3501
3502 /* Check initial context is empty */
3503 if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3504 fail("Unexpected context at start of parse");
3505
3506 data.start_element_len = (int)strlen(START_ELEMENT);
3507 data.cdata_len = (int)strlen(CDATA_TEXT);
3508 data.total_string_len = (int)strlen(text);
3509 XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3510 XML_SetUserData(g_parser, &data);
3511 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3512 xml_failure(g_parser);
3513 }
3514 END_TEST
3515 #undef START_ELEMENT
3516 #undef CDATA_TEXT
3517 #undef END_ELEMENT
3518
3519 /* Test predefined entities are correctly recognised */
START_TEST(test_predefined_entities)3520 START_TEST(test_predefined_entities) {
3521 const char *text = "<doc><>&"'</doc>";
3522 const XML_Char *expected = XCS("<doc><>&"'</doc>");
3523 const XML_Char *result = XCS("<>&\"'");
3524 CharData storage;
3525
3526 XML_SetDefaultHandler(g_parser, accumulate_characters);
3527 /* run_character_check uses XML_SetCharacterDataHandler(), which
3528 * unfortunately heads off a code path that we need to exercise.
3529 */
3530 CharData_Init(&storage);
3531 XML_SetUserData(g_parser, &storage);
3532 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3533 == XML_STATUS_ERROR)
3534 xml_failure(g_parser);
3535 /* The default handler doesn't translate the entities */
3536 CharData_CheckXMLChars(&storage, expected);
3537
3538 /* Now try again and check the translation */
3539 XML_ParserReset(g_parser, NULL);
3540 run_character_check(text, result);
3541 }
3542 END_TEST
3543
3544 /* Regression test that an invalid tag in an external parameter
3545 * reference in an external DTD is correctly faulted.
3546 *
3547 * Only a few specific tags are legal in DTDs ignoring comments and
3548 * processing instructions, all of which begin with an exclamation
3549 * mark. "<el/>" is not one of them, so the parser should raise an
3550 * error on encountering it.
3551 */
START_TEST(test_invalid_tag_in_dtd)3552 START_TEST(test_invalid_tag_in_dtd) {
3553 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3554 "<doc></doc>\n";
3555
3556 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3557 XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3558 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3559 "Invalid tag IN DTD external param not rejected");
3560 }
3561 END_TEST
3562
3563 /* Test entities not quite the predefined ones are not mis-recognised */
START_TEST(test_not_predefined_entities)3564 START_TEST(test_not_predefined_entities) {
3565 const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3566 "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3567 int i = 0;
3568
3569 while (text[i] != NULL) {
3570 expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3571 "Undefined entity not rejected");
3572 XML_ParserReset(g_parser, NULL);
3573 i++;
3574 }
3575 }
3576 END_TEST
3577
3578 /* Test conditional inclusion (IGNORE) */
START_TEST(test_ignore_section)3579 START_TEST(test_ignore_section) {
3580 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3581 "<doc><e>&entity;</e></doc>";
3582 const XML_Char *expected
3583 = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3584 CharData storage;
3585
3586 CharData_Init(&storage);
3587 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3588 XML_SetUserData(g_parser, &storage);
3589 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3590 XML_SetDefaultHandler(g_parser, accumulate_characters);
3591 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3592 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3593 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3594 XML_SetStartElementHandler(g_parser, dummy_start_element);
3595 XML_SetEndElementHandler(g_parser, dummy_end_element);
3596 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3597 == XML_STATUS_ERROR)
3598 xml_failure(g_parser);
3599 CharData_CheckXMLChars(&storage, expected);
3600 }
3601 END_TEST
3602
START_TEST(test_ignore_section_utf16)3603 START_TEST(test_ignore_section_utf16) {
3604 const char text[] =
3605 /* <!DOCTYPE d SYSTEM 's'> */
3606 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3607 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3608 /* <d><e>&en;</e></d> */
3609 "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3610 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3611 CharData storage;
3612
3613 CharData_Init(&storage);
3614 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3615 XML_SetUserData(g_parser, &storage);
3616 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3617 XML_SetDefaultHandler(g_parser, accumulate_characters);
3618 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3619 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3620 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3621 XML_SetStartElementHandler(g_parser, dummy_start_element);
3622 XML_SetEndElementHandler(g_parser, dummy_end_element);
3623 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3624 == XML_STATUS_ERROR)
3625 xml_failure(g_parser);
3626 CharData_CheckXMLChars(&storage, expected);
3627 }
3628 END_TEST
3629
START_TEST(test_ignore_section_utf16_be)3630 START_TEST(test_ignore_section_utf16_be) {
3631 const char text[] =
3632 /* <!DOCTYPE d SYSTEM 's'> */
3633 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3634 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3635 /* <d><e>&en;</e></d> */
3636 "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3637 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3638 CharData storage;
3639
3640 CharData_Init(&storage);
3641 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3642 XML_SetUserData(g_parser, &storage);
3643 XML_SetExternalEntityRefHandler(g_parser,
3644 external_entity_load_ignore_utf16_be);
3645 XML_SetDefaultHandler(g_parser, accumulate_characters);
3646 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3647 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3648 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3649 XML_SetStartElementHandler(g_parser, dummy_start_element);
3650 XML_SetEndElementHandler(g_parser, dummy_end_element);
3651 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3652 == XML_STATUS_ERROR)
3653 xml_failure(g_parser);
3654 CharData_CheckXMLChars(&storage, expected);
3655 }
3656 END_TEST
3657
3658 /* Test mis-formatted conditional exclusion */
START_TEST(test_bad_ignore_section)3659 START_TEST(test_bad_ignore_section) {
3660 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3661 "<doc><e>&entity;</e></doc>";
3662 ExtFaults faults[]
3663 = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3664 XML_ERROR_SYNTAX},
3665 {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3666 XML_ERROR_INVALID_TOKEN},
3667 {/* FIrst two bytes of a three-byte char */
3668 "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3669 XML_ERROR_PARTIAL_CHAR},
3670 {NULL, NULL, NULL, XML_ERROR_NONE}};
3671 ExtFaults *fault;
3672
3673 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3674 set_subtest("%s", fault->parse_text);
3675 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3676 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3677 XML_SetUserData(g_parser, fault);
3678 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3679 "Incomplete IGNORE section not failed");
3680 XML_ParserReset(g_parser, NULL);
3681 }
3682 }
3683 END_TEST
3684
3685 struct bom_testdata {
3686 const char *external;
3687 int split;
3688 XML_Bool nested_callback_happened;
3689 };
3690
3691 static int XMLCALL
external_bom_checker(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)3692 external_bom_checker(XML_Parser parser, const XML_Char *context,
3693 const XML_Char *base, const XML_Char *systemId,
3694 const XML_Char *publicId) {
3695 const char *text;
3696 UNUSED_P(base);
3697 UNUSED_P(systemId);
3698 UNUSED_P(publicId);
3699
3700 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3701 if (ext_parser == NULL)
3702 fail("Could not create external entity parser");
3703
3704 if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3705 struct bom_testdata *const testdata = XML_GetUserData(parser);
3706 const char *const external = testdata->external;
3707 const int split = testdata->split;
3708 testdata->nested_callback_happened = XML_TRUE;
3709
3710 if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3711 != XML_STATUS_OK) {
3712 xml_failure(ext_parser);
3713 }
3714 text = external + split; // the parse below will continue where we left off.
3715 } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3716 text = "<!ELEMENT doc EMPTY>\n"
3717 "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3718 "<!ENTITY % e2 '%e1;'>\n";
3719 } else {
3720 fail("unknown systemId");
3721 }
3722
3723 if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3724 != XML_STATUS_OK)
3725 xml_failure(ext_parser);
3726
3727 XML_ParserFree(ext_parser);
3728 return XML_STATUS_OK;
3729 }
3730
3731 /* regression test: BOM should be consumed when followed by a partial token. */
START_TEST(test_external_bom_consumed)3732 START_TEST(test_external_bom_consumed) {
3733 const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3734 "<doc></doc>\n";
3735 const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3736 const int len = (int)strlen(external);
3737 for (int split = 0; split <= len; ++split) {
3738 set_subtest("split at byte %d", split);
3739
3740 struct bom_testdata testdata;
3741 testdata.external = external;
3742 testdata.split = split;
3743 testdata.nested_callback_happened = XML_FALSE;
3744
3745 XML_Parser parser = XML_ParserCreate(NULL);
3746 if (parser == NULL) {
3747 fail("Couldn't create parser");
3748 }
3749 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3750 XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3751 XML_SetUserData(parser, &testdata);
3752 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3753 == XML_STATUS_ERROR)
3754 xml_failure(parser);
3755 if (! testdata.nested_callback_happened) {
3756 fail("ref handler not called");
3757 }
3758 XML_ParserFree(parser);
3759 }
3760 }
3761 END_TEST
3762
3763 /* Test recursive parsing */
START_TEST(test_external_entity_values)3764 START_TEST(test_external_entity_values) {
3765 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3766 "<doc></doc>\n";
3767 ExtFaults data_004_2[] = {
3768 {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3769 {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3770 XML_ERROR_INVALID_TOKEN},
3771 {"'wombat", "Unterminated string not faulted", NULL,
3772 XML_ERROR_UNCLOSED_TOKEN},
3773 {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3774 XML_ERROR_PARTIAL_CHAR},
3775 {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3776 {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3777 XML_ERROR_XML_DECL},
3778 {/* UTF-8 BOM */
3779 "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3780 XML_ERROR_NONE},
3781 {"<?xml version='1.0' encoding='utf-8'?>\n$",
3782 "Invalid token after text declaration not faulted", NULL,
3783 XML_ERROR_INVALID_TOKEN},
3784 {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3785 "Unterminated string after text decl not faulted", NULL,
3786 XML_ERROR_UNCLOSED_TOKEN},
3787 {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3788 "Partial UTF-8 character after text decl not faulted", NULL,
3789 XML_ERROR_PARTIAL_CHAR},
3790 {"%e1;", "Recursive parameter entity not faulted", NULL,
3791 XML_ERROR_RECURSIVE_ENTITY_REF},
3792 {NULL, NULL, NULL, XML_ERROR_NONE}};
3793 int i;
3794
3795 for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3796 set_subtest("%s", data_004_2[i].parse_text);
3797 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3798 XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3799 XML_SetUserData(g_parser, &data_004_2[i]);
3800 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3801 == XML_STATUS_ERROR)
3802 xml_failure(g_parser);
3803 XML_ParserReset(g_parser, NULL);
3804 }
3805 }
3806 END_TEST
3807
3808 /* Test the recursive parse interacts with a not standalone handler */
START_TEST(test_ext_entity_not_standalone)3809 START_TEST(test_ext_entity_not_standalone) {
3810 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3811 "<doc></doc>";
3812
3813 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3814 XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3815 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3816 "Standalone rejection not caught");
3817 }
3818 END_TEST
3819
START_TEST(test_ext_entity_value_abort)3820 START_TEST(test_ext_entity_value_abort) {
3821 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3822 "<doc></doc>\n";
3823
3824 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3825 XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3826 g_resumable = XML_FALSE;
3827 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3828 == XML_STATUS_ERROR)
3829 xml_failure(g_parser);
3830 }
3831 END_TEST
3832
START_TEST(test_bad_public_doctype)3833 START_TEST(test_bad_public_doctype) {
3834 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3835 "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3836 "<doc></doc>";
3837
3838 /* Setting a handler provokes a particular code path */
3839 XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3840 dummy_end_doctype_handler);
3841 expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3842 }
3843 END_TEST
3844
3845 /* Test based on ibm/valid/P32/ibm32v04.xml */
START_TEST(test_attribute_enum_value)3846 START_TEST(test_attribute_enum_value) {
3847 const char *text = "<?xml version='1.0' standalone='no'?>\n"
3848 "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3849 "<animal>This is a \n <a/> \n\nyellow tiger</animal>";
3850 ExtTest dtd_data
3851 = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3852 "<!ELEMENT a EMPTY>\n"
3853 "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3854 NULL, NULL};
3855 const XML_Char *expected = XCS("This is a \n \n\nyellow tiger");
3856
3857 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3858 XML_SetUserData(g_parser, &dtd_data);
3859 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3860 /* An attribute list handler provokes a different code path */
3861 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3862 run_ext_character_check(text, &dtd_data, expected);
3863 }
3864 END_TEST
3865
3866 /* Slightly bizarrely, the library seems to silently ignore entity
3867 * definitions for predefined entities, even when they are wrong. The
3868 * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3869 * to happen, so this is currently treated as acceptable.
3870 */
START_TEST(test_predefined_entity_redefinition)3871 START_TEST(test_predefined_entity_redefinition) {
3872 const char *text = "<!DOCTYPE doc [\n"
3873 "<!ENTITY apos 'foo'>\n"
3874 "]>\n"
3875 "<doc>'</doc>";
3876 run_character_check(text, XCS("'"));
3877 }
3878 END_TEST
3879
3880 /* Test that the parser stops processing the DTD after an unresolved
3881 * parameter entity is encountered.
3882 */
START_TEST(test_dtd_stop_processing)3883 START_TEST(test_dtd_stop_processing) {
3884 const char *text = "<!DOCTYPE doc [\n"
3885 "%foo;\n"
3886 "<!ENTITY bar 'bas'>\n"
3887 "]><doc/>";
3888
3889 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3890 init_dummy_handlers();
3891 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3892 == XML_STATUS_ERROR)
3893 xml_failure(g_parser);
3894 if (get_dummy_handler_flags() != 0)
3895 fail("DTD processing still going after undefined PE");
3896 }
3897 END_TEST
3898
3899 /* Test public notations with no system ID */
START_TEST(test_public_notation_no_sysid)3900 START_TEST(test_public_notation_no_sysid) {
3901 const char *text = "<!DOCTYPE doc [\n"
3902 "<!NOTATION note PUBLIC 'foo'>\n"
3903 "<!ELEMENT doc EMPTY>\n"
3904 "]>\n<doc/>";
3905
3906 init_dummy_handlers();
3907 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3908 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3909 == XML_STATUS_ERROR)
3910 xml_failure(g_parser);
3911 if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3912 fail("Notation declaration handler not called");
3913 }
3914 END_TEST
3915
START_TEST(test_nested_groups)3916 START_TEST(test_nested_groups) {
3917 const char *text
3918 = "<!DOCTYPE doc [\n"
3919 "<!ELEMENT doc "
3920 /* Sixteen elements per line */
3921 "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3922 "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3923 "))))))))))))))))))))))))))))))))>\n"
3924 "<!ELEMENT e EMPTY>"
3925 "]>\n"
3926 "<doc><e/></doc>";
3927 CharData storage;
3928
3929 CharData_Init(&storage);
3930 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3931 XML_SetStartElementHandler(g_parser, record_element_start_handler);
3932 XML_SetUserData(g_parser, &storage);
3933 init_dummy_handlers();
3934 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3935 == XML_STATUS_ERROR)
3936 xml_failure(g_parser);
3937 CharData_CheckXMLChars(&storage, XCS("doce"));
3938 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3939 fail("Element handler not fired");
3940 }
3941 END_TEST
3942
START_TEST(test_group_choice)3943 START_TEST(test_group_choice) {
3944 const char *text = "<!DOCTYPE doc [\n"
3945 "<!ELEMENT doc (a|b|c)+>\n"
3946 "<!ELEMENT a EMPTY>\n"
3947 "<!ELEMENT b (#PCDATA)>\n"
3948 "<!ELEMENT c ANY>\n"
3949 "]>\n"
3950 "<doc>\n"
3951 "<a/>\n"
3952 "<b attr='foo'>This is a foo</b>\n"
3953 "<c></c>\n"
3954 "</doc>\n";
3955
3956 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3957 init_dummy_handlers();
3958 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3959 == XML_STATUS_ERROR)
3960 xml_failure(g_parser);
3961 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3962 fail("Element handler flag not raised");
3963 }
3964 END_TEST
3965
START_TEST(test_standalone_parameter_entity)3966 START_TEST(test_standalone_parameter_entity) {
3967 const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3968 "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3969 "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3970 "%entity;\n"
3971 "]>\n"
3972 "<doc></doc>";
3973 char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3974
3975 XML_SetUserData(g_parser, dtd_data);
3976 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3977 XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3978 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3979 == XML_STATUS_ERROR)
3980 xml_failure(g_parser);
3981 }
3982 END_TEST
3983
3984 /* Test skipping of parameter entity in an external DTD */
3985 /* Derived from ibm/invalid/P69/ibm69i01.xml */
START_TEST(test_skipped_parameter_entity)3986 START_TEST(test_skipped_parameter_entity) {
3987 const char *text = "<?xml version='1.0'?>\n"
3988 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3989 "<!ELEMENT root (#PCDATA|a)* >\n"
3990 "]>\n"
3991 "<root></root>";
3992 ExtTest dtd_data = {"%pe2;", NULL, NULL};
3993
3994 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3995 XML_SetUserData(g_parser, &dtd_data);
3996 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3997 XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3998 init_dummy_handlers();
3999 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4000 == XML_STATUS_ERROR)
4001 xml_failure(g_parser);
4002 if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
4003 fail("Skip handler not executed");
4004 }
4005 END_TEST
4006
4007 /* Test recursive parameter entity definition rejected in external DTD */
START_TEST(test_recursive_external_parameter_entity)4008 START_TEST(test_recursive_external_parameter_entity) {
4009 const char *text = "<?xml version='1.0'?>\n"
4010 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
4011 "<!ELEMENT root (#PCDATA|a)* >\n"
4012 "]>\n"
4013 "<root></root>";
4014 ExtFaults dtd_data = {"<!ENTITY % pe2 '%pe2;'>\n%pe2;",
4015 "Recursive external parameter entity not faulted", NULL,
4016 XML_ERROR_RECURSIVE_ENTITY_REF};
4017
4018 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
4019 XML_SetUserData(g_parser, &dtd_data);
4020 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4021 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4022 "Recursive external parameter not spotted");
4023 }
4024 END_TEST
4025
4026 /* Test undefined parameter entity in external entity handler */
START_TEST(test_undefined_ext_entity_in_external_dtd)4027 START_TEST(test_undefined_ext_entity_in_external_dtd) {
4028 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
4029 "<doc></doc>\n";
4030
4031 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4032 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
4033 XML_SetUserData(g_parser, NULL);
4034 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4035 == XML_STATUS_ERROR)
4036 xml_failure(g_parser);
4037
4038 /* Now repeat without the external entity ref handler invoking
4039 * another copy of itself.
4040 */
4041 XML_ParserReset(g_parser, NULL);
4042 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4043 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
4044 XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
4045 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4046 == XML_STATUS_ERROR)
4047 xml_failure(g_parser);
4048 }
4049 END_TEST
4050
4051 /* Test suspending the parse on receiving an XML declaration works */
START_TEST(test_suspend_xdecl)4052 START_TEST(test_suspend_xdecl) {
4053 const char *text = long_character_data_text;
4054
4055 XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
4056 XML_SetUserData(g_parser, g_parser);
4057 g_resumable = XML_TRUE;
4058 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
4059 // we won't know exactly how much input we actually managed to give Expat.
4060 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4061 != XML_STATUS_SUSPENDED)
4062 xml_failure(g_parser);
4063 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
4064 xml_failure(g_parser);
4065 /* Attempt to start a new parse while suspended */
4066 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4067 != XML_STATUS_ERROR)
4068 fail("Attempt to parse while suspended not faulted");
4069 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
4070 fail("Suspended parse not faulted with correct error");
4071 }
4072 END_TEST
4073
4074 /* Test aborting the parse in an epilog works */
START_TEST(test_abort_epilog)4075 START_TEST(test_abort_epilog) {
4076 const char *text = "<doc></doc>\n\r\n";
4077 XML_Char trigger_char = XCS('\r');
4078
4079 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
4080 XML_SetUserData(g_parser, &trigger_char);
4081 g_resumable = XML_FALSE;
4082 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4083 != XML_STATUS_ERROR)
4084 fail("Abort not triggered");
4085 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
4086 xml_failure(g_parser);
4087 }
4088 END_TEST
4089
4090 /* Test a different code path for abort in the epilog */
START_TEST(test_abort_epilog_2)4091 START_TEST(test_abort_epilog_2) {
4092 const char *text = "<doc></doc>\n";
4093 XML_Char trigger_char = XCS('\n');
4094
4095 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
4096 XML_SetUserData(g_parser, &trigger_char);
4097 g_resumable = XML_FALSE;
4098 expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
4099 }
4100 END_TEST
4101
4102 /* Test suspension from the epilog */
START_TEST(test_suspend_epilog)4103 START_TEST(test_suspend_epilog) {
4104 const char *text = "<doc></doc>\n";
4105 XML_Char trigger_char = XCS('\n');
4106
4107 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
4108 XML_SetUserData(g_parser, &trigger_char);
4109 g_resumable = XML_TRUE;
4110 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4111 != XML_STATUS_SUSPENDED)
4112 xml_failure(g_parser);
4113 }
4114 END_TEST
4115
START_TEST(test_suspend_in_sole_empty_tag)4116 START_TEST(test_suspend_in_sole_empty_tag) {
4117 const char *text = "<doc/>";
4118 enum XML_Status rc;
4119
4120 XML_SetEndElementHandler(g_parser, suspending_end_handler);
4121 XML_SetUserData(g_parser, g_parser);
4122 rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
4123 if (rc == XML_STATUS_ERROR)
4124 xml_failure(g_parser);
4125 else if (rc != XML_STATUS_SUSPENDED)
4126 fail("Suspend not triggered");
4127 rc = XML_ResumeParser(g_parser);
4128 if (rc == XML_STATUS_ERROR)
4129 xml_failure(g_parser);
4130 else if (rc != XML_STATUS_OK)
4131 fail("Resume failed");
4132 }
4133 END_TEST
4134
START_TEST(test_unfinished_epilog)4135 START_TEST(test_unfinished_epilog) {
4136 const char *text = "<doc></doc><";
4137
4138 expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
4139 "Incomplete epilog entry not faulted");
4140 }
4141 END_TEST
4142
START_TEST(test_partial_char_in_epilog)4143 START_TEST(test_partial_char_in_epilog) {
4144 const char *text = "<doc></doc>\xe2\x82";
4145
4146 /* First check that no fault is raised if the parse is not finished */
4147 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
4148 == XML_STATUS_ERROR)
4149 xml_failure(g_parser);
4150 /* Now check that it is faulted once we finish */
4151 if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
4152 fail("Partial character in epilog not faulted");
4153 if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
4154 xml_failure(g_parser);
4155 }
4156 END_TEST
4157
4158 /* Test resuming a parse suspended in entity substitution */
START_TEST(test_suspend_resume_internal_entity)4159 START_TEST(test_suspend_resume_internal_entity) {
4160 const char *text
4161 = "<!DOCTYPE doc [\n"
4162 "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
4163 "]>\n"
4164 "<doc>&foo;</doc>\n";
4165 const XML_Char *expected1 = XCS("Hi");
4166 const XML_Char *expected2 = XCS("HiHo");
4167 CharData storage;
4168
4169 CharData_Init(&storage);
4170 XML_SetStartElementHandler(g_parser, start_element_suspender);
4171 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
4172 XML_SetUserData(g_parser, &storage);
4173 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
4174 // we won't know exactly how much input we actually managed to give Expat.
4175 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4176 != XML_STATUS_SUSPENDED)
4177 xml_failure(g_parser);
4178 CharData_CheckXMLChars(&storage, XCS(""));
4179 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
4180 xml_failure(g_parser);
4181 CharData_CheckXMLChars(&storage, expected1);
4182 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
4183 xml_failure(g_parser);
4184 CharData_CheckXMLChars(&storage, expected2);
4185 }
4186 END_TEST
4187
START_TEST(test_suspend_resume_internal_entity_issue_629)4188 START_TEST(test_suspend_resume_internal_entity_issue_629) {
4189 const char *const text
4190 = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
4191 "<"
4192 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4193 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4194 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4195 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4196 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4197 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4198 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4199 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4200 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4201 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4202 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4203 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4204 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4205 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4206 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4207 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4208 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4209 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4210 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4211 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4212 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4213 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4214 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4215 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4216 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4217 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4218 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4219 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4220 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4221 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4222 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4223 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4224 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4225 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4226 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4227 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4228 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4229 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4230 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4231 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4232 "/>"
4233 "</b></a>";
4234 const size_t firstChunkSizeBytes = 54;
4235
4236 XML_Parser parser = XML_ParserCreate(NULL);
4237 XML_SetUserData(parser, parser);
4238 XML_SetCommentHandler(parser, suspending_comment_handler);
4239
4240 if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
4241 != XML_STATUS_SUSPENDED)
4242 xml_failure(parser);
4243 if (XML_ResumeParser(parser) != XML_STATUS_OK)
4244 xml_failure(parser);
4245 if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
4246 (int)(strlen(text) - firstChunkSizeBytes),
4247 XML_TRUE)
4248 != XML_STATUS_OK)
4249 xml_failure(parser);
4250 XML_ParserFree(parser);
4251 }
4252 END_TEST
4253
4254 /* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error)4255 START_TEST(test_resume_entity_with_syntax_error) {
4256 if (g_chunkSize != 0) {
4257 // this test does not use SINGLE_BYTES, because of suspension
4258 return;
4259 }
4260
4261 const char *text = "<!DOCTYPE doc [\n"
4262 "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
4263 "]>\n"
4264 "<doc>&foo;</doc>\n";
4265
4266 XML_SetStartElementHandler(g_parser, start_element_suspender);
4267 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
4268 // we won't know exactly how much input we actually managed to give Expat.
4269 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4270 != XML_STATUS_SUSPENDED)
4271 xml_failure(g_parser);
4272 if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
4273 fail("Syntax error in entity not faulted");
4274 if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
4275 xml_failure(g_parser);
4276 }
4277 END_TEST
4278
4279 /* Test suspending and resuming in a parameter entity substitution */
START_TEST(test_suspend_resume_parameter_entity)4280 START_TEST(test_suspend_resume_parameter_entity) {
4281 const char *text = "<!DOCTYPE doc [\n"
4282 "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
4283 "%foo;\n"
4284 "]>\n"
4285 "<doc>Hello, world</doc>";
4286 const XML_Char *expected = XCS("Hello, world");
4287 CharData storage;
4288
4289 CharData_Init(&storage);
4290 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4291 XML_SetElementDeclHandler(g_parser, element_decl_suspender);
4292 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
4293 XML_SetUserData(g_parser, &storage);
4294 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4295 != XML_STATUS_SUSPENDED)
4296 xml_failure(g_parser);
4297 CharData_CheckXMLChars(&storage, XCS(""));
4298 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
4299 xml_failure(g_parser);
4300 CharData_CheckXMLChars(&storage, expected);
4301 }
4302 END_TEST
4303
4304 /* Test attempting to use parser after an error is faulted */
START_TEST(test_restart_on_error)4305 START_TEST(test_restart_on_error) {
4306 const char *text = "<$doc><doc></doc>";
4307
4308 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4309 != XML_STATUS_ERROR)
4310 fail("Invalid tag name not faulted");
4311 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
4312 xml_failure(g_parser);
4313 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
4314 fail("Restarting invalid parse not faulted");
4315 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
4316 xml_failure(g_parser);
4317 }
4318 END_TEST
4319
4320 /* Test that angle brackets in an attribute default value are faulted */
START_TEST(test_reject_lt_in_attribute_value)4321 START_TEST(test_reject_lt_in_attribute_value) {
4322 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
4323 "<doc></doc>";
4324
4325 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4326 "Bad attribute default not faulted");
4327 }
4328 END_TEST
4329
START_TEST(test_reject_unfinished_param_in_att_value)4330 START_TEST(test_reject_unfinished_param_in_att_value) {
4331 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
4332 "<doc></doc>";
4333
4334 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4335 "Bad attribute default not faulted");
4336 }
4337 END_TEST
4338
START_TEST(test_trailing_cr_in_att_value)4339 START_TEST(test_trailing_cr_in_att_value) {
4340 const char *text = "<doc a='value\r'/>";
4341
4342 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4343 == XML_STATUS_ERROR)
4344 xml_failure(g_parser);
4345 }
4346 END_TEST
4347
4348 /* Try parsing a general entity within a parameter entity in a
4349 * standalone internal DTD. Covers a corner case in the parser.
4350 */
START_TEST(test_standalone_internal_entity)4351 START_TEST(test_standalone_internal_entity) {
4352 const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
4353 "<!DOCTYPE doc [\n"
4354 " <!ELEMENT doc (#PCDATA)>\n"
4355 " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"≥\">'>\n"
4356 " <!ENTITY ge 'AttDefaultValue'>\n"
4357 " %pe;\n"
4358 "]>\n"
4359 "<doc att2='any'/>";
4360
4361 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4362 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4363 == XML_STATUS_ERROR)
4364 xml_failure(g_parser);
4365 }
4366 END_TEST
4367
4368 /* Test that a reference to an unknown external entity is skipped */
START_TEST(test_skipped_external_entity)4369 START_TEST(test_skipped_external_entity) {
4370 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
4371 "<doc></doc>\n";
4372 ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
4373 "<!ENTITY % e2 '%e1;'>\n",
4374 NULL, NULL};
4375
4376 XML_SetUserData(g_parser, &test_data);
4377 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4378 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4379 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4380 == XML_STATUS_ERROR)
4381 xml_failure(g_parser);
4382 }
4383 END_TEST
4384
START_TEST(test_scaff_index_shared_across_external_entity_parser)4385 START_TEST(test_scaff_index_shared_across_external_entity_parser) {
4386 const char text[]
4387 = "<!DOCTYPE doc [\n"
4388 "<!ELEMENT a "
4389 "((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((b))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))>\n"
4390 "<!ENTITY % e SYSTEM 'ext'>\n"
4391 "%e;\n"
4392 "<!ELEMENT c "
4393 "(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((d)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))>\n"
4394 "]>\n"
4395 "<doc/>";
4396 ExtOption options[]
4397 = {{XCS("ext"),
4398 "<!ELEMENT x "
4399 "((((((((((((((((((((((((((((((((y))))))))))))))))))))))))))))))))>"},
4400 {NULL, NULL}};
4401
4402 XML_Parser parser = XML_ParserCreate(NULL);
4403 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4404 XML_SetUserData(parser, options);
4405 XML_SetExternalEntityRefHandler(parser, external_entity_optioner);
4406 XML_SetElementDeclHandler(parser, dummy_element_decl_handler);
4407
4408 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
4409 == XML_STATUS_ERROR)
4410 xml_failure(parser);
4411
4412 XML_ParserFree(parser);
4413 }
4414 END_TEST
4415
4416 /* Test a different form of unknown external entity */
START_TEST(test_skipped_null_loaded_ext_entity)4417 START_TEST(test_skipped_null_loaded_ext_entity) {
4418 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
4419 "<doc />";
4420 ExtHdlrData test_data
4421 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
4422 "<!ENTITY % pe2 '%pe1;'>\n"
4423 "%pe2;\n",
4424 external_entity_null_loader, NULL};
4425
4426 XML_SetUserData(g_parser, &test_data);
4427 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4428 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
4429 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4430 == XML_STATUS_ERROR)
4431 xml_failure(g_parser);
4432 }
4433 END_TEST
4434
START_TEST(test_skipped_unloaded_ext_entity)4435 START_TEST(test_skipped_unloaded_ext_entity) {
4436 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
4437 "<doc />";
4438 ExtHdlrData test_data
4439 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
4440 "<!ENTITY % pe2 '%pe1;'>\n"
4441 "%pe2;\n",
4442 NULL, NULL};
4443
4444 XML_SetUserData(g_parser, &test_data);
4445 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4446 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
4447 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4448 == XML_STATUS_ERROR)
4449 xml_failure(g_parser);
4450 }
4451 END_TEST
4452
4453 /* Test that a parameter entity value ending with a carriage return
4454 * has it translated internally into a newline.
4455 */
START_TEST(test_param_entity_with_trailing_cr)4456 START_TEST(test_param_entity_with_trailing_cr) {
4457 #define PARAM_ENTITY_NAME "pe"
4458 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
4459 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
4460 "<doc/>";
4461 ExtTest test_data
4462 = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
4463 "%" PARAM_ENTITY_NAME ";\n",
4464 NULL, NULL};
4465
4466 XML_SetUserData(g_parser, &test_data);
4467 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4468 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4469 XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
4470 param_entity_match_init(XCS(PARAM_ENTITY_NAME),
4471 XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
4472 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4473 == XML_STATUS_ERROR)
4474 xml_failure(g_parser);
4475 int entity_match_flag = get_param_entity_match_flag();
4476 if (entity_match_flag == ENTITY_MATCH_FAIL)
4477 fail("Parameter entity CR->NEWLINE conversion failed");
4478 else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
4479 fail("Parameter entity not parsed");
4480 }
4481 #undef PARAM_ENTITY_NAME
4482 #undef PARAM_ENTITY_CORE_VALUE
4483 END_TEST
4484
START_TEST(test_invalid_character_entity)4485 START_TEST(test_invalid_character_entity) {
4486 const char *text = "<!DOCTYPE doc [\n"
4487 " <!ENTITY entity '�'>\n"
4488 "]>\n"
4489 "<doc>&entity;</doc>";
4490
4491 expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4492 "Out of range character reference not faulted");
4493 }
4494 END_TEST
4495
START_TEST(test_invalid_character_entity_2)4496 START_TEST(test_invalid_character_entity_2) {
4497 const char *text = "<!DOCTYPE doc [\n"
4498 " <!ENTITY entity '&#xg0;'>\n"
4499 "]>\n"
4500 "<doc>&entity;</doc>";
4501
4502 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4503 "Out of range character reference not faulted");
4504 }
4505 END_TEST
4506
START_TEST(test_invalid_character_entity_3)4507 START_TEST(test_invalid_character_entity_3) {
4508 const char text[] =
4509 /* <!DOCTYPE doc [\n */
4510 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4511 /* U+0E04 = KHO KHWAI
4512 * U+0E08 = CHO CHAN */
4513 /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
4514 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
4515 "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
4516 /* ]>\n */
4517 "\0]\0>\0\n"
4518 /* <doc>&entity;</doc> */
4519 "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
4520
4521 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4522 != XML_STATUS_ERROR)
4523 fail("Invalid start of entity name not faulted");
4524 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4525 xml_failure(g_parser);
4526 }
4527 END_TEST
4528
START_TEST(test_invalid_character_entity_4)4529 START_TEST(test_invalid_character_entity_4) {
4530 const char *text = "<!DOCTYPE doc [\n"
4531 " <!ENTITY entity '�'>\n" /* = � */
4532 "]>\n"
4533 "<doc>&entity;</doc>";
4534
4535 expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4536 "Out of range character reference not faulted");
4537 }
4538 END_TEST
4539
4540 /* Test that processing instructions are picked up by a default handler */
START_TEST(test_pi_handled_in_default)4541 START_TEST(test_pi_handled_in_default) {
4542 const char *text = "<?test processing instruction?>\n<doc/>";
4543 const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4544 CharData storage;
4545
4546 CharData_Init(&storage);
4547 XML_SetDefaultHandler(g_parser, accumulate_characters);
4548 XML_SetUserData(g_parser, &storage);
4549 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4550 == XML_STATUS_ERROR)
4551 xml_failure(g_parser);
4552 CharData_CheckXMLChars(&storage, expected);
4553 }
4554 END_TEST
4555
4556 /* Test that comments are picked up by a default handler */
START_TEST(test_comment_handled_in_default)4557 START_TEST(test_comment_handled_in_default) {
4558 const char *text = "<!-- This is a comment -->\n<doc/>";
4559 const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4560 CharData storage;
4561
4562 CharData_Init(&storage);
4563 XML_SetDefaultHandler(g_parser, accumulate_characters);
4564 XML_SetUserData(g_parser, &storage);
4565 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4566 == XML_STATUS_ERROR)
4567 xml_failure(g_parser);
4568 CharData_CheckXMLChars(&storage, expected);
4569 }
4570 END_TEST
4571
4572 /* Test PIs that look almost but not quite like XML declarations */
START_TEST(test_pi_yml)4573 START_TEST(test_pi_yml) {
4574 const char *text = "<?yml something like data?><doc/>";
4575 const XML_Char *expected = XCS("yml: something like data\n");
4576 CharData storage;
4577
4578 CharData_Init(&storage);
4579 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4580 XML_SetUserData(g_parser, &storage);
4581 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4582 == XML_STATUS_ERROR)
4583 xml_failure(g_parser);
4584 CharData_CheckXMLChars(&storage, expected);
4585 }
4586 END_TEST
4587
START_TEST(test_pi_xnl)4588 START_TEST(test_pi_xnl) {
4589 const char *text = "<?xnl nothing like data?><doc/>";
4590 const XML_Char *expected = XCS("xnl: nothing like data\n");
4591 CharData storage;
4592
4593 CharData_Init(&storage);
4594 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4595 XML_SetUserData(g_parser, &storage);
4596 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4597 == XML_STATUS_ERROR)
4598 xml_failure(g_parser);
4599 CharData_CheckXMLChars(&storage, expected);
4600 }
4601 END_TEST
4602
START_TEST(test_pi_xmm)4603 START_TEST(test_pi_xmm) {
4604 const char *text = "<?xmm everything like data?><doc/>";
4605 const XML_Char *expected = XCS("xmm: everything like data\n");
4606 CharData storage;
4607
4608 CharData_Init(&storage);
4609 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4610 XML_SetUserData(g_parser, &storage);
4611 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4612 == XML_STATUS_ERROR)
4613 xml_failure(g_parser);
4614 CharData_CheckXMLChars(&storage, expected);
4615 }
4616 END_TEST
4617
START_TEST(test_utf16_pi)4618 START_TEST(test_utf16_pi) {
4619 const char text[] =
4620 /* <?{KHO KHWAI}{CHO CHAN}?>
4621 * where {KHO KHWAI} = U+0E04
4622 * and {CHO CHAN} = U+0E08
4623 */
4624 "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4625 /* <q/> */
4626 "<\0q\0/\0>\0";
4627 #ifdef XML_UNICODE
4628 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4629 #else
4630 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4631 #endif
4632 CharData storage;
4633
4634 CharData_Init(&storage);
4635 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4636 XML_SetUserData(g_parser, &storage);
4637 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4638 == XML_STATUS_ERROR)
4639 xml_failure(g_parser);
4640 CharData_CheckXMLChars(&storage, expected);
4641 }
4642 END_TEST
4643
START_TEST(test_utf16_be_pi)4644 START_TEST(test_utf16_be_pi) {
4645 const char text[] =
4646 /* <?{KHO KHWAI}{CHO CHAN}?>
4647 * where {KHO KHWAI} = U+0E04
4648 * and {CHO CHAN} = U+0E08
4649 */
4650 "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4651 /* <q/> */
4652 "\0<\0q\0/\0>";
4653 #ifdef XML_UNICODE
4654 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4655 #else
4656 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4657 #endif
4658 CharData storage;
4659
4660 CharData_Init(&storage);
4661 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4662 XML_SetUserData(g_parser, &storage);
4663 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4664 == XML_STATUS_ERROR)
4665 xml_failure(g_parser);
4666 CharData_CheckXMLChars(&storage, expected);
4667 }
4668 END_TEST
4669
4670 /* Test that comments can be picked up and translated */
START_TEST(test_utf16_be_comment)4671 START_TEST(test_utf16_be_comment) {
4672 const char text[] =
4673 /* <!-- Comment A --> */
4674 "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4675 /* <doc/> */
4676 "\0<\0d\0o\0c\0/\0>";
4677 const XML_Char *expected = XCS(" Comment A ");
4678 CharData storage;
4679
4680 CharData_Init(&storage);
4681 XML_SetCommentHandler(g_parser, accumulate_comment);
4682 XML_SetUserData(g_parser, &storage);
4683 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4684 == XML_STATUS_ERROR)
4685 xml_failure(g_parser);
4686 CharData_CheckXMLChars(&storage, expected);
4687 }
4688 END_TEST
4689
START_TEST(test_utf16_le_comment)4690 START_TEST(test_utf16_le_comment) {
4691 const char text[] =
4692 /* <!-- Comment B --> */
4693 "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4694 /* <doc/> */
4695 "<\0d\0o\0c\0/\0>\0";
4696 const XML_Char *expected = XCS(" Comment B ");
4697 CharData storage;
4698
4699 CharData_Init(&storage);
4700 XML_SetCommentHandler(g_parser, accumulate_comment);
4701 XML_SetUserData(g_parser, &storage);
4702 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4703 == XML_STATUS_ERROR)
4704 xml_failure(g_parser);
4705 CharData_CheckXMLChars(&storage, expected);
4706 }
4707 END_TEST
4708
4709 /* Test that the unknown encoding handler with map entries that expect
4710 * conversion but no conversion function is faulted
4711 */
START_TEST(test_missing_encoding_conversion_fn)4712 START_TEST(test_missing_encoding_conversion_fn) {
4713 const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4714 "<doc>\x81</doc>";
4715
4716 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4717 /* MiscEncodingHandler sets up an encoding with every top-bit-set
4718 * character introducing a two-byte sequence. For this, it
4719 * requires a convert function. The above function call doesn't
4720 * pass one through, so when BadEncodingHandler actually gets
4721 * called it should supply an invalid encoding.
4722 */
4723 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4724 "Encoding with missing convert() not faulted");
4725 }
4726 END_TEST
4727
START_TEST(test_failing_encoding_conversion_fn)4728 START_TEST(test_failing_encoding_conversion_fn) {
4729 const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4730 "<doc>\x81</doc>";
4731
4732 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4733 /* BadEncodingHandler sets up an encoding with every top-bit-set
4734 * character introducing a two-byte sequence. For this, it
4735 * requires a convert function. The above function call passes
4736 * one that insists all possible sequences are invalid anyway.
4737 */
4738 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4739 "Encoding with failing convert() not faulted");
4740 }
4741 END_TEST
4742
4743 /* Test unknown encoding conversions */
START_TEST(test_unknown_encoding_success)4744 START_TEST(test_unknown_encoding_success) {
4745 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4746 /* Equivalent to <eoc>Hello, world</eoc> */
4747 "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4748
4749 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4750 run_character_check(text, XCS("Hello, world"));
4751 }
4752 END_TEST
4753
4754 /* Test bad name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name)4755 START_TEST(test_unknown_encoding_bad_name) {
4756 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4757 "<\xff\x64oc>Hello, world</\xff\x64oc>";
4758
4759 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4760 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4761 "Bad name start in unknown encoding not faulted");
4762 }
4763 END_TEST
4764
4765 /* Test bad mid-name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name_2)4766 START_TEST(test_unknown_encoding_bad_name_2) {
4767 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4768 "<d\xffoc>Hello, world</d\xffoc>";
4769
4770 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4771 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4772 "Bad name in unknown encoding not faulted");
4773 }
4774 END_TEST
4775
4776 /* Test element name that is long enough to fill the conversion buffer
4777 * in an unknown encoding, finishing with an encoded character.
4778 */
START_TEST(test_unknown_encoding_long_name_1)4779 START_TEST(test_unknown_encoding_long_name_1) {
4780 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4781 "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4782 "Hi"
4783 "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4784 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4785 CharData storage;
4786
4787 CharData_Init(&storage);
4788 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4789 XML_SetStartElementHandler(g_parser, record_element_start_handler);
4790 XML_SetUserData(g_parser, &storage);
4791 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4792 == XML_STATUS_ERROR)
4793 xml_failure(g_parser);
4794 CharData_CheckXMLChars(&storage, expected);
4795 }
4796 END_TEST
4797
4798 /* Test element name that is long enough to fill the conversion buffer
4799 * in an unknown encoding, finishing with an simple character.
4800 */
START_TEST(test_unknown_encoding_long_name_2)4801 START_TEST(test_unknown_encoding_long_name_2) {
4802 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4803 "<abcdefghabcdefghabcdefghijklmnop>"
4804 "Hi"
4805 "</abcdefghabcdefghabcdefghijklmnop>";
4806 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4807 CharData storage;
4808
4809 CharData_Init(&storage);
4810 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4811 XML_SetStartElementHandler(g_parser, record_element_start_handler);
4812 XML_SetUserData(g_parser, &storage);
4813 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4814 == XML_STATUS_ERROR)
4815 xml_failure(g_parser);
4816 CharData_CheckXMLChars(&storage, expected);
4817 }
4818 END_TEST
4819
START_TEST(test_invalid_unknown_encoding)4820 START_TEST(test_invalid_unknown_encoding) {
4821 const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4822 "<doc>Hello world</doc>";
4823
4824 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4825 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4826 "Invalid unknown encoding not faulted");
4827 }
4828 END_TEST
4829
START_TEST(test_unknown_ascii_encoding_ok)4830 START_TEST(test_unknown_ascii_encoding_ok) {
4831 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4832 "<doc>Hello, world</doc>";
4833
4834 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4835 run_character_check(text, XCS("Hello, world"));
4836 }
4837 END_TEST
4838
START_TEST(test_unknown_ascii_encoding_fail)4839 START_TEST(test_unknown_ascii_encoding_fail) {
4840 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4841 "<doc>Hello, \x80 world</doc>";
4842
4843 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4844 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4845 "Invalid character not faulted");
4846 }
4847 END_TEST
4848
START_TEST(test_unknown_encoding_invalid_length)4849 START_TEST(test_unknown_encoding_invalid_length) {
4850 const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4851 "<doc>Hello, world</doc>";
4852
4853 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4854 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4855 "Invalid unknown encoding not faulted");
4856 }
4857 END_TEST
4858
START_TEST(test_unknown_encoding_invalid_topbit)4859 START_TEST(test_unknown_encoding_invalid_topbit) {
4860 const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4861 "<doc>Hello, world</doc>";
4862
4863 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4864 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4865 "Invalid unknown encoding not faulted");
4866 }
4867 END_TEST
4868
START_TEST(test_unknown_encoding_invalid_surrogate)4869 START_TEST(test_unknown_encoding_invalid_surrogate) {
4870 const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4871 "<doc>Hello, \x82 world</doc>";
4872
4873 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4874 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4875 "Invalid unknown encoding not faulted");
4876 }
4877 END_TEST
4878
START_TEST(test_unknown_encoding_invalid_high)4879 START_TEST(test_unknown_encoding_invalid_high) {
4880 const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4881 "<doc>Hello, world</doc>";
4882
4883 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4884 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4885 "Invalid unknown encoding not faulted");
4886 }
4887 END_TEST
4888
START_TEST(test_unknown_encoding_invalid_attr_value)4889 START_TEST(test_unknown_encoding_invalid_attr_value) {
4890 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4891 "<doc attr='\xff\x30'/>";
4892
4893 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4894 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4895 "Invalid attribute valid not faulted");
4896 }
4897 END_TEST
4898
START_TEST(test_unknown_encoding_user_data_primary)4899 START_TEST(test_unknown_encoding_user_data_primary) {
4900 // This test is based on ideas contributed by Artiphishell Inc.
4901 const char *const text = "<?xml version='1.0' encoding='x-unk'?>\n"
4902 "<root />\n";
4903 XML_Parser parser = XML_ParserCreate(NULL);
4904 XML_SetUnknownEncodingHandler(parser,
4905 user_data_checking_unknown_encoding_handler,
4906 (void *)(intptr_t)0xC0FFEE);
4907
4908 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
4909 == XML_STATUS_OK);
4910
4911 XML_ParserFree(parser);
4912 }
4913 END_TEST
4914
START_TEST(test_unknown_encoding_user_data_secondary)4915 START_TEST(test_unknown_encoding_user_data_secondary) {
4916 // This test is based on ideas contributed by Artiphishell Inc.
4917 const char *const text_main = "<!DOCTYPE r [\n"
4918 " <!ENTITY ext SYSTEM 'ext.ent'>\n"
4919 "]>\n"
4920 "<r>&ext;</r>\n";
4921 const char *const text_external = "<?xml version='1.0' encoding='x-unk'?>\n"
4922 "<e>data</e>";
4923 ExtTest2 test_data = {text_external, (int)strlen(text_external), NULL, NULL};
4924 XML_Parser parser = XML_ParserCreate(NULL);
4925 XML_SetExternalEntityRefHandler(parser, external_entity_loader2);
4926 XML_SetUnknownEncodingHandler(parser,
4927 user_data_checking_unknown_encoding_handler,
4928 (void *)(intptr_t)0xC0FFEE);
4929 XML_SetUserData(parser, &test_data);
4930
4931 assert_true(_XML_Parse_SINGLE_BYTES(parser, text_main, (int)strlen(text_main),
4932 XML_TRUE)
4933 == XML_STATUS_OK);
4934
4935 XML_ParserFree(parser);
4936 }
4937 END_TEST
4938
4939 /* Test an external entity parser set to use latin-1 detects UTF-16
4940 * BOMs correctly.
4941 */
4942 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
START_TEST(test_ext_entity_latin1_utf16le_bom)4943 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4944 const char *text = "<!DOCTYPE doc [\n"
4945 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4946 "]>\n"
4947 "<doc>&en;</doc>";
4948 ExtTest2 test_data
4949 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4950 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4951 * 0x4c = L and 0x20 is a space
4952 */
4953 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4954 #ifdef XML_UNICODE
4955 const XML_Char *expected = XCS("\x00ff\x00feL ");
4956 #else
4957 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4958 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4959 #endif
4960 CharData storage;
4961
4962 CharData_Init(&storage);
4963 test_data.storage = &storage;
4964 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4965 XML_SetUserData(g_parser, &test_data);
4966 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4967 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4968 == XML_STATUS_ERROR)
4969 xml_failure(g_parser);
4970 CharData_CheckXMLChars(&storage, expected);
4971 }
4972 END_TEST
4973
START_TEST(test_ext_entity_latin1_utf16be_bom)4974 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4975 const char *text = "<!DOCTYPE doc [\n"
4976 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4977 "]>\n"
4978 "<doc>&en;</doc>";
4979 ExtTest2 test_data
4980 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4981 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4982 * 0x4c = L and 0x20 is a space
4983 */
4984 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4985 #ifdef XML_UNICODE
4986 const XML_Char *expected = XCS("\x00fe\x00ff L");
4987 #else
4988 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4989 const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4990 #endif
4991 CharData storage;
4992
4993 CharData_Init(&storage);
4994 test_data.storage = &storage;
4995 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4996 XML_SetUserData(g_parser, &test_data);
4997 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4998 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4999 == XML_STATUS_ERROR)
5000 xml_failure(g_parser);
5001 CharData_CheckXMLChars(&storage, expected);
5002 }
5003 END_TEST
5004
5005 /* Parsing the full buffer rather than a byte at a time makes a
5006 * difference to the encoding scanning code, so repeat the above tests
5007 * without breaking them down by byte.
5008 */
START_TEST(test_ext_entity_latin1_utf16le_bom2)5009 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
5010 const char *text = "<!DOCTYPE doc [\n"
5011 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5012 "]>\n"
5013 "<doc>&en;</doc>";
5014 ExtTest2 test_data
5015 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
5016 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
5017 * 0x4c = L and 0x20 is a space
5018 */
5019 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
5020 #ifdef XML_UNICODE
5021 const XML_Char *expected = XCS("\x00ff\x00feL ");
5022 #else
5023 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
5024 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
5025 #endif
5026 CharData storage;
5027
5028 CharData_Init(&storage);
5029 test_data.storage = &storage;
5030 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5031 XML_SetUserData(g_parser, &test_data);
5032 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5033 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5034 == XML_STATUS_ERROR)
5035 xml_failure(g_parser);
5036 CharData_CheckXMLChars(&storage, expected);
5037 }
5038 END_TEST
5039
START_TEST(test_ext_entity_latin1_utf16be_bom2)5040 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
5041 const char *text = "<!DOCTYPE doc [\n"
5042 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5043 "]>\n"
5044 "<doc>&en;</doc>";
5045 ExtTest2 test_data
5046 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
5047 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
5048 * 0x4c = L and 0x20 is a space
5049 */
5050 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
5051 #ifdef XML_UNICODE
5052 const XML_Char *expected = XCS("\x00fe\x00ff L");
5053 #else
5054 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
5055 const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
5056 #endif
5057 CharData storage;
5058
5059 CharData_Init(&storage);
5060 test_data.storage = &storage;
5061 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5062 XML_SetUserData(g_parser, &test_data);
5063 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5064 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5065 == XML_STATUS_ERROR)
5066 xml_failure(g_parser);
5067 CharData_CheckXMLChars(&storage, expected);
5068 }
5069 END_TEST
5070
5071 /* Test little-endian UTF-16 given an explicit big-endian encoding */
START_TEST(test_ext_entity_utf16_be)5072 START_TEST(test_ext_entity_utf16_be) {
5073 const char *text = "<!DOCTYPE doc [\n"
5074 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5075 "]>\n"
5076 "<doc>&en;</doc>";
5077 ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
5078 #ifdef XML_UNICODE
5079 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
5080 #else
5081 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
5082 "\xe6\x94\x80" /* U+6500 */
5083 "\xe2\xbc\x80" /* U+2F00 */
5084 "\xe3\xb8\x80"); /* U+3E00 */
5085 #endif
5086 CharData storage;
5087
5088 CharData_Init(&storage);
5089 test_data.storage = &storage;
5090 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5091 XML_SetUserData(g_parser, &test_data);
5092 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5093 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5094 == XML_STATUS_ERROR)
5095 xml_failure(g_parser);
5096 CharData_CheckXMLChars(&storage, expected);
5097 }
5098 END_TEST
5099
5100 /* Test big-endian UTF-16 given an explicit little-endian encoding */
START_TEST(test_ext_entity_utf16_le)5101 START_TEST(test_ext_entity_utf16_le) {
5102 const char *text = "<!DOCTYPE doc [\n"
5103 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5104 "]>\n"
5105 "<doc>&en;</doc>";
5106 ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
5107 #ifdef XML_UNICODE
5108 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
5109 #else
5110 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
5111 "\xe6\x94\x80" /* U+6500 */
5112 "\xe2\xbc\x80" /* U+2F00 */
5113 "\xe3\xb8\x80"); /* U+3E00 */
5114 #endif
5115 CharData storage;
5116
5117 CharData_Init(&storage);
5118 test_data.storage = &storage;
5119 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5120 XML_SetUserData(g_parser, &test_data);
5121 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5122 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5123 == XML_STATUS_ERROR)
5124 xml_failure(g_parser);
5125 CharData_CheckXMLChars(&storage, expected);
5126 }
5127 END_TEST
5128
5129 /* Test little-endian UTF-16 given no explicit encoding.
5130 * The existing default encoding (UTF-8) is assumed to hold without a
5131 * BOM to contradict it, so the entity value will in fact provoke an
5132 * error because 0x00 is not a valid XML character. We parse the
5133 * whole buffer in one go rather than feeding it in byte by byte to
5134 * exercise different code paths in the initial scanning routines.
5135 */
START_TEST(test_ext_entity_utf16_unknown)5136 START_TEST(test_ext_entity_utf16_unknown) {
5137 const char *text = "<!DOCTYPE doc [\n"
5138 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5139 "]>\n"
5140 "<doc>&en;</doc>";
5141 ExtFaults2 test_data
5142 = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
5143 XML_ERROR_INVALID_TOKEN};
5144
5145 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
5146 XML_SetUserData(g_parser, &test_data);
5147 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5148 "Invalid character should not have been accepted");
5149 }
5150 END_TEST
5151
5152 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
START_TEST(test_ext_entity_utf8_non_bom)5153 START_TEST(test_ext_entity_utf8_non_bom) {
5154 const char *text = "<!DOCTYPE doc [\n"
5155 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5156 "]>\n"
5157 "<doc>&en;</doc>";
5158 ExtTest2 test_data
5159 = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
5160 3, NULL, NULL};
5161 #ifdef XML_UNICODE
5162 const XML_Char *expected = XCS("\xfec0");
5163 #else
5164 const XML_Char *expected = XCS("\xef\xbb\x80");
5165 #endif
5166 CharData storage;
5167
5168 CharData_Init(&storage);
5169 test_data.storage = &storage;
5170 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5171 XML_SetUserData(g_parser, &test_data);
5172 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5173 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5174 == XML_STATUS_ERROR)
5175 xml_failure(g_parser);
5176 CharData_CheckXMLChars(&storage, expected);
5177 }
5178 END_TEST
5179
5180 /* Test that UTF-8 in a CDATA section is correctly passed through */
START_TEST(test_utf8_in_cdata_section)5181 START_TEST(test_utf8_in_cdata_section) {
5182 const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
5183 #ifdef XML_UNICODE
5184 const XML_Char *expected = XCS("one \x00e9 two");
5185 #else
5186 const XML_Char *expected = XCS("one \xc3\xa9 two");
5187 #endif
5188
5189 run_character_check(text, expected);
5190 }
5191 END_TEST
5192
5193 /* Test that little-endian UTF-16 in a CDATA section is handled */
START_TEST(test_utf8_in_cdata_section_2)5194 START_TEST(test_utf8_in_cdata_section_2) {
5195 const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
5196 #ifdef XML_UNICODE
5197 const XML_Char *expected = XCS("\x00e9]\x00e9two");
5198 #else
5199 const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
5200 #endif
5201
5202 run_character_check(text, expected);
5203 }
5204 END_TEST
5205
START_TEST(test_utf8_in_start_tags)5206 START_TEST(test_utf8_in_start_tags) {
5207 struct test_case {
5208 bool goodName;
5209 bool goodNameStart;
5210 const char *tagName;
5211 };
5212
5213 // The idea with the tests below is this:
5214 // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
5215 // go to isNever and are hence not a concern.
5216 //
5217 // We start with a character that is a valid name character
5218 // (or even name-start character, see XML 1.0r4 spec) and then we flip
5219 // single bits at places where (1) the result leaves the UTF-8 encoding space
5220 // and (2) we stay in the same n-byte sequence family.
5221 //
5222 // The flipped bits are highlighted in angle brackets in comments,
5223 // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
5224 // the most significant bit to 1 to leave UTF-8 encoding space.
5225 struct test_case cases[] = {
5226 // 1-byte UTF-8: [0xxx xxxx]
5227 {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
5228 {false, false, "\xBA"}, // [<1>011 1010]
5229 {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
5230 {false, false, "\xB9"}, // [<1>011 1001]
5231
5232 // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
5233 {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
5234 // Arabic small waw U+06E5
5235 {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
5236 {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
5237 {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
5238 {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
5239 // combining char U+0301
5240 {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
5241 {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
5242 {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
5243
5244 // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
5245 {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
5246 // Devanagari Letter A U+0905
5247 {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
5248 {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
5249 {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
5250 {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
5251 {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
5252 {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
5253 // combining char U+0901
5254 {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
5255 {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
5256 {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
5257 {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
5258 {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
5259 };
5260 const bool atNameStart[] = {true, false};
5261
5262 size_t i = 0;
5263 char doc[1024];
5264 size_t failCount = 0;
5265
5266 // we need all the bytes to be parsed, but we don't want the errors that can
5267 // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
5268 if (g_reparseDeferralEnabledDefault) {
5269 return;
5270 }
5271
5272 for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
5273 size_t j = 0;
5274 for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
5275 const bool expectedSuccess
5276 = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
5277 snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
5278 cases[i].tagName);
5279 XML_Parser parser = XML_ParserCreate(NULL);
5280
5281 const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
5282 parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
5283
5284 bool success = true;
5285 if ((status == XML_STATUS_OK) != expectedSuccess) {
5286 success = false;
5287 }
5288 if ((status == XML_STATUS_ERROR)
5289 && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
5290 success = false;
5291 }
5292
5293 if (! success) {
5294 fprintf(
5295 stderr,
5296 "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
5297 (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
5298 (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
5299 failCount++;
5300 }
5301
5302 XML_ParserFree(parser);
5303 }
5304 }
5305
5306 if (failCount > 0) {
5307 fail("UTF-8 regression detected");
5308 }
5309 }
5310 END_TEST
5311
5312 /* Test trailing spaces in elements are accepted */
START_TEST(test_trailing_spaces_in_elements)5313 START_TEST(test_trailing_spaces_in_elements) {
5314 const char *text = "<doc >Hi</doc >";
5315 const XML_Char *expected = XCS("doc/doc");
5316 CharData storage;
5317
5318 CharData_Init(&storage);
5319 XML_SetElementHandler(g_parser, record_element_start_handler,
5320 record_element_end_handler);
5321 XML_SetUserData(g_parser, &storage);
5322 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5323 == XML_STATUS_ERROR)
5324 xml_failure(g_parser);
5325 CharData_CheckXMLChars(&storage, expected);
5326 }
5327 END_TEST
5328
START_TEST(test_utf16_attribute)5329 START_TEST(test_utf16_attribute) {
5330 const char text[] =
5331 /* <d {KHO KHWAI}{CHO CHAN}='a'/>
5332 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5333 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5334 */
5335 "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
5336 const XML_Char *expected = XCS("a");
5337 CharData storage;
5338
5339 CharData_Init(&storage);
5340 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5341 XML_SetUserData(g_parser, &storage);
5342 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5343 == XML_STATUS_ERROR)
5344 xml_failure(g_parser);
5345 CharData_CheckXMLChars(&storage, expected);
5346 }
5347 END_TEST
5348
START_TEST(test_utf16_second_attr)5349 START_TEST(test_utf16_second_attr) {
5350 /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
5351 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5352 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5353 */
5354 const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
5355 "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
5356 const XML_Char *expected = XCS("1");
5357 CharData storage;
5358
5359 CharData_Init(&storage);
5360 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5361 XML_SetUserData(g_parser, &storage);
5362 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5363 == XML_STATUS_ERROR)
5364 xml_failure(g_parser);
5365 CharData_CheckXMLChars(&storage, expected);
5366 }
5367 END_TEST
5368
START_TEST(test_attr_after_solidus)5369 START_TEST(test_attr_after_solidus) {
5370 const char *text = "<doc attr1='a' / attr2='b'>";
5371
5372 expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
5373 }
5374 END_TEST
5375
START_TEST(test_utf16_pe)5376 START_TEST(test_utf16_pe) {
5377 /* <!DOCTYPE doc [
5378 * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
5379 * %{KHO KHWAI}{CHO CHAN};
5380 * ]>
5381 * <doc></doc>
5382 *
5383 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5384 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5385 */
5386 const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
5387 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
5388 "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
5389 "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
5390 "\0%\x0e\x04\x0e\x08\0;\0\n"
5391 "\0]\0>\0\n"
5392 "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
5393 #ifdef XML_UNICODE
5394 const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
5395 #else
5396 const XML_Char *expected
5397 = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
5398 #endif
5399 CharData storage;
5400
5401 CharData_Init(&storage);
5402 XML_SetUserData(g_parser, &storage);
5403 XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
5404 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5405 == XML_STATUS_ERROR)
5406 xml_failure(g_parser);
5407 CharData_CheckXMLChars(&storage, expected);
5408 }
5409 END_TEST
5410
5411 /* Test that duff attribute description keywords are rejected */
START_TEST(test_bad_attr_desc_keyword)5412 START_TEST(test_bad_attr_desc_keyword) {
5413 const char *text = "<!DOCTYPE doc [\n"
5414 " <!ATTLIST doc attr CDATA #!IMPLIED>\n"
5415 "]>\n"
5416 "<doc />";
5417
5418 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5419 "Bad keyword !IMPLIED not faulted");
5420 }
5421 END_TEST
5422
5423 /* Test that an invalid attribute description keyword consisting of
5424 * UTF-16 characters with their top bytes non-zero are correctly
5425 * faulted
5426 */
START_TEST(test_bad_attr_desc_keyword_utf16)5427 START_TEST(test_bad_attr_desc_keyword_utf16) {
5428 /* <!DOCTYPE d [
5429 * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
5430 * ]><d/>
5431 *
5432 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5433 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5434 */
5435 const char text[]
5436 = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5437 "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
5438 "\0#\x0e\x04\x0e\x08\0>\0\n"
5439 "\0]\0>\0<\0d\0/\0>";
5440
5441 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5442 != XML_STATUS_ERROR)
5443 fail("Invalid UTF16 attribute keyword not faulted");
5444 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
5445 xml_failure(g_parser);
5446 }
5447 END_TEST
5448
5449 /* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this
5450 * using prefix-encoding (see above) to trigger specific code paths
5451 */
START_TEST(test_bad_doctype)5452 START_TEST(test_bad_doctype) {
5453 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
5454 "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
5455
5456 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5457 expect_failure(text, XML_ERROR_SYNTAX,
5458 "Invalid bytes in DOCTYPE not faulted");
5459 }
5460 END_TEST
5461
START_TEST(test_bad_doctype_utf8)5462 START_TEST(test_bad_doctype_utf8) {
5463 const char *text = "<!DOCTYPE \xDB\x25"
5464 "doc><doc/>"; // [1101 1011] [<0>010 0101]
5465 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5466 "Invalid UTF-8 in DOCTYPE not faulted");
5467 }
5468 END_TEST
5469
START_TEST(test_bad_doctype_utf16)5470 START_TEST(test_bad_doctype_utf16) {
5471 const char text[] =
5472 /* <!DOCTYPE doc [ \x06f2 ]><doc/>
5473 *
5474 * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
5475 * (name character) but not a valid letter (name start character)
5476 */
5477 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
5478 "\x06\xf2"
5479 "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
5480
5481 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5482 != XML_STATUS_ERROR)
5483 fail("Invalid bytes in DOCTYPE not faulted");
5484 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
5485 xml_failure(g_parser);
5486 }
5487 END_TEST
5488
START_TEST(test_bad_doctype_plus)5489 START_TEST(test_bad_doctype_plus) {
5490 const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
5491 "<1+>&foo;</1+>";
5492
5493 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5494 "'+' in document name not faulted");
5495 }
5496 END_TEST
5497
START_TEST(test_bad_doctype_star)5498 START_TEST(test_bad_doctype_star) {
5499 const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
5500 "<1*>&foo;</1*>";
5501
5502 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5503 "'*' in document name not faulted");
5504 }
5505 END_TEST
5506
START_TEST(test_bad_doctype_query)5507 START_TEST(test_bad_doctype_query) {
5508 const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
5509 "<1?>&foo;</1?>";
5510
5511 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5512 "'?' in document name not faulted");
5513 }
5514 END_TEST
5515
START_TEST(test_unknown_encoding_bad_ignore)5516 START_TEST(test_unknown_encoding_bad_ignore) {
5517 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
5518 "<!DOCTYPE doc SYSTEM 'foo'>"
5519 "<doc><e>&entity;</e></doc>";
5520 ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
5521 "Invalid character not faulted", XCS("prefix-conv"),
5522 XML_ERROR_INVALID_TOKEN};
5523
5524 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5525 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5526 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
5527 XML_SetUserData(g_parser, &fault);
5528 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5529 "Bad IGNORE section with unknown encoding not failed");
5530 }
5531 END_TEST
5532
START_TEST(test_entity_in_utf16_be_attr)5533 START_TEST(test_entity_in_utf16_be_attr) {
5534 const char text[] =
5535 /* <e a='ä ä'></e> */
5536 "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
5537 "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
5538 #ifdef XML_UNICODE
5539 const XML_Char *expected = XCS("\x00e4 \x00e4");
5540 #else
5541 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5542 #endif
5543 CharData storage;
5544
5545 CharData_Init(&storage);
5546 XML_SetUserData(g_parser, &storage);
5547 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5548 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5549 == XML_STATUS_ERROR)
5550 xml_failure(g_parser);
5551 CharData_CheckXMLChars(&storage, expected);
5552 }
5553 END_TEST
5554
START_TEST(test_entity_in_utf16_le_attr)5555 START_TEST(test_entity_in_utf16_le_attr) {
5556 const char text[] =
5557 /* <e a='ä ä'></e> */
5558 "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
5559 "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
5560 #ifdef XML_UNICODE
5561 const XML_Char *expected = XCS("\x00e4 \x00e4");
5562 #else
5563 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5564 #endif
5565 CharData storage;
5566
5567 CharData_Init(&storage);
5568 XML_SetUserData(g_parser, &storage);
5569 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5570 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5571 == XML_STATUS_ERROR)
5572 xml_failure(g_parser);
5573 CharData_CheckXMLChars(&storage, expected);
5574 }
5575 END_TEST
5576
START_TEST(test_entity_public_utf16_be)5577 START_TEST(test_entity_public_utf16_be) {
5578 const char text[] =
5579 /* <!DOCTYPE d [ */
5580 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5581 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5582 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5583 "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5584 /* %e; */
5585 "\0%\0e\0;\0\n"
5586 /* ]> */
5587 "\0]\0>\0\n"
5588 /* <d>&j;</d> */
5589 "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5590 ExtTest2 test_data
5591 = {/* <!ENTITY j 'baz'> */
5592 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5593 const XML_Char *expected = XCS("baz");
5594 CharData storage;
5595
5596 CharData_Init(&storage);
5597 test_data.storage = &storage;
5598 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5599 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5600 XML_SetUserData(g_parser, &test_data);
5601 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5602 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5603 == XML_STATUS_ERROR)
5604 xml_failure(g_parser);
5605 CharData_CheckXMLChars(&storage, expected);
5606 }
5607 END_TEST
5608
START_TEST(test_entity_public_utf16_le)5609 START_TEST(test_entity_public_utf16_le) {
5610 const char text[] =
5611 /* <!DOCTYPE d [ */
5612 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5613 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5614 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5615 "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5616 /* %e; */
5617 "%\0e\0;\0\n\0"
5618 /* ]> */
5619 "]\0>\0\n\0"
5620 /* <d>&j;</d> */
5621 "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5622 ExtTest2 test_data
5623 = {/* <!ENTITY j 'baz'> */
5624 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5625 const XML_Char *expected = XCS("baz");
5626 CharData storage;
5627
5628 CharData_Init(&storage);
5629 test_data.storage = &storage;
5630 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5631 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5632 XML_SetUserData(g_parser, &test_data);
5633 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5634 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5635 == XML_STATUS_ERROR)
5636 xml_failure(g_parser);
5637 CharData_CheckXMLChars(&storage, expected);
5638 }
5639 END_TEST
5640
5641 /* Test that a doctype with neither an internal nor external subset is
5642 * faulted
5643 */
START_TEST(test_short_doctype)5644 START_TEST(test_short_doctype) {
5645 const char *text = "<!DOCTYPE doc></doc>";
5646 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5647 "DOCTYPE without subset not rejected");
5648 }
5649 END_TEST
5650
START_TEST(test_short_doctype_2)5651 START_TEST(test_short_doctype_2) {
5652 const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5653 expect_failure(text, XML_ERROR_SYNTAX,
5654 "DOCTYPE without Public ID not rejected");
5655 }
5656 END_TEST
5657
START_TEST(test_short_doctype_3)5658 START_TEST(test_short_doctype_3) {
5659 const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5660 expect_failure(text, XML_ERROR_SYNTAX,
5661 "DOCTYPE without System ID not rejected");
5662 }
5663 END_TEST
5664
START_TEST(test_long_doctype)5665 START_TEST(test_long_doctype) {
5666 const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5667 expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5668 }
5669 END_TEST
5670
START_TEST(test_bad_entity)5671 START_TEST(test_bad_entity) {
5672 const char *text = "<!DOCTYPE doc [\n"
5673 " <!ENTITY foo PUBLIC>\n"
5674 "]>\n"
5675 "<doc/>";
5676 expect_failure(text, XML_ERROR_SYNTAX,
5677 "ENTITY without Public ID is not rejected");
5678 }
5679 END_TEST
5680
5681 /* Test unquoted value is faulted */
START_TEST(test_bad_entity_2)5682 START_TEST(test_bad_entity_2) {
5683 const char *text = "<!DOCTYPE doc [\n"
5684 " <!ENTITY % foo bar>\n"
5685 "]>\n"
5686 "<doc/>";
5687 expect_failure(text, XML_ERROR_SYNTAX,
5688 "ENTITY without Public ID is not rejected");
5689 }
5690 END_TEST
5691
START_TEST(test_bad_entity_3)5692 START_TEST(test_bad_entity_3) {
5693 const char *text = "<!DOCTYPE doc [\n"
5694 " <!ENTITY % foo PUBLIC>\n"
5695 "]>\n"
5696 "<doc/>";
5697 expect_failure(text, XML_ERROR_SYNTAX,
5698 "Parameter ENTITY without Public ID is not rejected");
5699 }
5700 END_TEST
5701
START_TEST(test_bad_entity_4)5702 START_TEST(test_bad_entity_4) {
5703 const char *text = "<!DOCTYPE doc [\n"
5704 " <!ENTITY % foo SYSTEM>\n"
5705 "]>\n"
5706 "<doc/>";
5707 expect_failure(text, XML_ERROR_SYNTAX,
5708 "Parameter ENTITY without Public ID is not rejected");
5709 }
5710 END_TEST
5711
START_TEST(test_bad_notation)5712 START_TEST(test_bad_notation) {
5713 const char *text = "<!DOCTYPE doc [\n"
5714 " <!NOTATION n SYSTEM>\n"
5715 "]>\n"
5716 "<doc/>";
5717 expect_failure(text, XML_ERROR_SYNTAX,
5718 "Notation without System ID is not rejected");
5719 }
5720 END_TEST
5721
5722 /* Test for issue #11, wrongly suppressed default handler */
START_TEST(test_default_doctype_handler)5723 START_TEST(test_default_doctype_handler) {
5724 const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5725 " <!ENTITY foo 'bar'>\n"
5726 "]>\n"
5727 "<doc>&foo;</doc>";
5728 DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5729 {XCS("'test.dtd'"), 10, XML_FALSE},
5730 {NULL, 0, XML_FALSE}};
5731 int i;
5732
5733 XML_SetUserData(g_parser, &test_data);
5734 XML_SetDefaultHandler(g_parser, checking_default_handler);
5735 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5736 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5737 == XML_STATUS_ERROR)
5738 xml_failure(g_parser);
5739 for (i = 0; test_data[i].expected != NULL; i++)
5740 if (! test_data[i].seen)
5741 fail("Default handler not run for public !DOCTYPE");
5742 }
5743 END_TEST
5744
START_TEST(test_empty_element_abort)5745 START_TEST(test_empty_element_abort) {
5746 const char *text = "<abort/>";
5747
5748 XML_SetStartElementHandler(g_parser, start_element_suspender);
5749 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5750 != XML_STATUS_ERROR)
5751 fail("Expected to error on abort");
5752 }
5753 END_TEST
5754
5755 /* Regression test for GH issue #612: unfinished m_declAttributeType
5756 * allocation in ->m_tempPool can corrupt following allocation.
5757 */
START_TEST(test_pool_integrity_with_unfinished_attr)5758 START_TEST(test_pool_integrity_with_unfinished_attr) {
5759 const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5760 "<!DOCTYPE foo [\n"
5761 "<!ELEMENT foo ANY>\n"
5762 "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5763 "%entp;\n"
5764 "]>\n"
5765 "<a></a>\n";
5766 const XML_Char *expected = XCS("COMMENT");
5767 CharData storage;
5768
5769 CharData_Init(&storage);
5770 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5771 XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5772 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5773 XML_SetCommentHandler(g_parser, accumulate_comment);
5774 XML_SetUserData(g_parser, &storage);
5775 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5776 == XML_STATUS_ERROR)
5777 xml_failure(g_parser);
5778 CharData_CheckXMLChars(&storage, expected);
5779 }
5780 END_TEST
5781
5782 /* Test a possible early return location in internalEntityProcessor */
START_TEST(test_entity_ref_no_elements)5783 START_TEST(test_entity_ref_no_elements) {
5784 const char *const text = "<!DOCTYPE foo [\n"
5785 "<!ENTITY e1 \"test\">\n"
5786 "]> <foo>&e1;"; // intentionally missing newline
5787
5788 XML_Parser parser = XML_ParserCreate(NULL);
5789 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5790 == XML_STATUS_ERROR);
5791 assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS);
5792 XML_ParserFree(parser);
5793 }
5794 END_TEST
5795
5796 /* Tests if chained entity references lead to unbounded recursion */
START_TEST(test_deep_nested_entity)5797 START_TEST(test_deep_nested_entity) {
5798 const size_t N_LINES = 60000;
5799 const size_t SIZE_PER_LINE = 50;
5800
5801 char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE);
5802 if (text == NULL) {
5803 fail("malloc failed");
5804 }
5805
5806 char *textPtr = text;
5807
5808 // Create the XML
5809 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5810 "<!DOCTYPE foo [\n"
5811 " <!ENTITY s0 'deepText'>\n");
5812
5813 for (size_t i = 1; i < N_LINES; ++i) {
5814 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n",
5815 (long unsigned)i, (long unsigned)(i - 1));
5816 }
5817
5818 snprintf(textPtr, SIZE_PER_LINE, "]> <foo>&s%lu;</foo>\n",
5819 (long unsigned)(N_LINES - 1));
5820
5821 const XML_Char *const expected = XCS("deepText");
5822
5823 CharData storage;
5824 CharData_Init(&storage);
5825
5826 XML_Parser parser = XML_ParserCreate(NULL);
5827
5828 XML_SetCharacterDataHandler(parser, accumulate_characters);
5829 XML_SetUserData(parser, &storage);
5830
5831 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5832 == XML_STATUS_ERROR)
5833 xml_failure(parser);
5834
5835 CharData_CheckXMLChars(&storage, expected);
5836 XML_ParserFree(parser);
5837 free(text);
5838 }
5839 END_TEST
5840
5841 /* Tests if chained entity references in attributes
5842 lead to unbounded recursion */
START_TEST(test_deep_nested_attribute_entity)5843 START_TEST(test_deep_nested_attribute_entity) {
5844 const size_t N_LINES = 60000;
5845 const size_t SIZE_PER_LINE = 100;
5846
5847 char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE);
5848 if (text == NULL) {
5849 fail("malloc failed");
5850 }
5851
5852 char *textPtr = text;
5853
5854 // Create the XML
5855 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5856 "<!DOCTYPE foo [\n"
5857 " <!ENTITY s0 'deepText'>\n");
5858
5859 for (size_t i = 1; i < N_LINES; ++i) {
5860 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n",
5861 (long unsigned)i, (long unsigned)(i - 1));
5862 }
5863
5864 snprintf(textPtr, SIZE_PER_LINE, "]> <foo name='&s%lu;'>mainText</foo>\n",
5865 (long unsigned)(N_LINES - 1));
5866
5867 AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}};
5868 ElementInfo info[]
5869 = {{XCS("foo"), 1, 0, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
5870
5871 XML_Parser parser = XML_ParserCreate(NULL);
5872 ParserAndElementInfo parserPlusElemenInfo = {parser, info};
5873
5874 XML_SetStartElementHandler(parser, counting_start_element_handler);
5875 XML_SetUserData(parser, &parserPlusElemenInfo);
5876
5877 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5878 == XML_STATUS_ERROR)
5879 xml_failure(parser);
5880
5881 XML_ParserFree(parser);
5882 free(text);
5883 }
5884 END_TEST
5885
START_TEST(test_deep_nested_entity_delayed_interpretation)5886 START_TEST(test_deep_nested_entity_delayed_interpretation) {
5887 const size_t N_LINES = 70000;
5888 const size_t SIZE_PER_LINE = 100;
5889
5890 char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE);
5891 if (text == NULL) {
5892 fail("malloc failed");
5893 }
5894
5895 char *textPtr = text;
5896
5897 // Create the XML
5898 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5899 "<!DOCTYPE foo [\n"
5900 " <!ENTITY %% s0 'deepText'>\n");
5901
5902 for (size_t i = 1; i < N_LINES; ++i) {
5903 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5904 " <!ENTITY %% s%lu '%s%lu;'>\n", (long unsigned)i,
5905 (long unsigned)(i - 1));
5906 }
5907
5908 snprintf(textPtr, SIZE_PER_LINE,
5909 " <!ENTITY %% define_g \"<!ENTITY g '%s%lu;'>\">\n"
5910 " %%define_g;\n"
5911 "]>\n"
5912 "<foo/>\n",
5913 (long unsigned)(N_LINES - 1));
5914
5915 XML_Parser parser = XML_ParserCreate(NULL);
5916
5917 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5918 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5919 == XML_STATUS_ERROR)
5920 xml_failure(parser);
5921
5922 XML_ParserFree(parser);
5923 free(text);
5924 }
5925 END_TEST
5926
START_TEST(test_nested_entity_suspend)5927 START_TEST(test_nested_entity_suspend) {
5928 const char *const text = "<!DOCTYPE a [\n"
5929 " <!ENTITY e1 '<!--e1-->'>\n"
5930 " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5931 " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5932 "]>\n"
5933 "<a><!--start-->&e3;<!--end--></a>";
5934 const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5935 XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5936 CharData storage;
5937 CharData_Init(&storage);
5938 XML_Parser parser = XML_ParserCreate(NULL);
5939 ParserPlusStorage parserPlusStorage = {parser, &storage};
5940
5941 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5942 XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5943 XML_SetUserData(parser, &parserPlusStorage);
5944
5945 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5946 while (status == XML_STATUS_SUSPENDED) {
5947 status = XML_ResumeParser(parser);
5948 }
5949 if (status != XML_STATUS_OK)
5950 xml_failure(parser);
5951
5952 CharData_CheckXMLChars(&storage, expected);
5953 XML_ParserFree(parser);
5954 }
5955 END_TEST
5956
START_TEST(test_nested_entity_suspend_2)5957 START_TEST(test_nested_entity_suspend_2) {
5958 const char *const text = "<!DOCTYPE doc [\n"
5959 " <!ENTITY ge1 'head1Ztail1'>\n"
5960 " <!ENTITY ge2 'head2&ge1;tail2'>\n"
5961 " <!ENTITY ge3 'head3&ge2;tail3'>\n"
5962 "]>\n"
5963 "<doc>&ge3;</doc>";
5964 const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1")
5965 XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3");
5966 CharData storage;
5967 CharData_Init(&storage);
5968 XML_Parser parser = XML_ParserCreate(NULL);
5969 ParserPlusStorage parserPlusStorage = {parser, &storage};
5970
5971 XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend);
5972 XML_SetUserData(parser, &parserPlusStorage);
5973
5974 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5975 while (status == XML_STATUS_SUSPENDED) {
5976 status = XML_ResumeParser(parser);
5977 }
5978 if (status != XML_STATUS_OK)
5979 xml_failure(parser);
5980
5981 CharData_CheckXMLChars(&storage, expected);
5982 XML_ParserFree(parser);
5983 }
5984 END_TEST
5985
5986 /* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_scale_linearly)5987 START_TEST(test_big_tokens_scale_linearly) {
5988 const struct {
5989 const char *pre;
5990 const char *post;
5991 } text[] = {
5992 {"<a>", "</a>"}, // assumed good, used as baseline
5993 {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5994 {"<c attr='", "'></c>"}, // big attribute, used to be O(N²)
5995 {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²)
5996 {"<e><", "/></e>"}, // big elem name, used to be O(N²)
5997 };
5998 const int num_cases = sizeof(text) / sizeof(text[0]);
5999 char aaaaaa[4096];
6000 const int fillsize = (int)sizeof(aaaaaa);
6001 const int fillcount = 100;
6002 const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
6003 const unsigned max_factor = 4;
6004 const unsigned max_scanned = max_factor * approx_bytes;
6005
6006 memset(aaaaaa, 'a', fillsize);
6007
6008 if (! g_reparseDeferralEnabledDefault) {
6009 return; // heuristic is disabled; we would get O(n^2) and fail.
6010 }
6011
6012 for (int i = 0; i < num_cases; ++i) {
6013 XML_Parser parser = XML_ParserCreate(NULL);
6014 assert_true(parser != NULL);
6015 enum XML_Status status;
6016 set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
6017
6018 // parse the start text
6019 g_bytesScanned = 0;
6020 status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
6021 (int)strlen(text[i].pre), XML_FALSE);
6022 if (status != XML_STATUS_OK) {
6023 xml_failure(parser);
6024 }
6025
6026 // parse lots of 'a', failing the test early if it takes too long
6027 unsigned past_max_count = 0;
6028 for (int f = 0; f < fillcount; ++f) {
6029 status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
6030 if (status != XML_STATUS_OK) {
6031 xml_failure(parser);
6032 }
6033 if (g_bytesScanned > max_scanned) {
6034 // We're not done, and have already passed the limit -- the test will
6035 // definitely fail. This block allows us to save time by failing early.
6036 const unsigned pushed
6037 = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
6038 fprintf(
6039 stderr,
6040 "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
6041 f + 1, fillcount, pushed, g_bytesScanned,
6042 g_bytesScanned / (double)pushed, max_scanned, max_factor);
6043 past_max_count++;
6044 // We are failing, but allow a few log prints first. If we don't reach
6045 // a count of five, the test will fail after the loop instead.
6046 assert_true(past_max_count < 5);
6047 }
6048 }
6049
6050 // parse the end text
6051 status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
6052 (int)strlen(text[i].post), XML_TRUE);
6053 if (status != XML_STATUS_OK) {
6054 xml_failure(parser);
6055 }
6056
6057 assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
6058 if (g_bytesScanned > max_scanned) {
6059 fprintf(
6060 stderr,
6061 "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
6062 g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
6063 max_factor);
6064 fail("scanned too many bytes");
6065 }
6066
6067 XML_ParserFree(parser);
6068 }
6069 }
6070 END_TEST
6071
START_TEST(test_set_reparse_deferral)6072 START_TEST(test_set_reparse_deferral) {
6073 const char *const pre = "<d>";
6074 const char *const start = "<x attr='";
6075 const char *const end = "'></x>";
6076 char eeeeee[100];
6077 const int fillsize = (int)sizeof(eeeeee);
6078 memset(eeeeee, 'e', fillsize);
6079
6080 for (int enabled = 0; enabled <= 1; enabled += 1) {
6081 set_subtest("deferral=%d", enabled);
6082
6083 XML_Parser parser = XML_ParserCreate(NULL);
6084 assert_true(parser != NULL);
6085 assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
6086 // pre-grow the buffer to avoid reparsing due to almost-fullness
6087 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
6088
6089 CharData storage;
6090 CharData_Init(&storage);
6091 XML_SetUserData(parser, &storage);
6092 XML_SetStartElementHandler(parser, start_element_event_handler);
6093
6094 enum XML_Status status;
6095 // parse the start text
6096 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
6097 if (status != XML_STATUS_OK) {
6098 xml_failure(parser);
6099 }
6100 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
6101
6102 // ..and the start of the token
6103 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
6104 if (status != XML_STATUS_OK) {
6105 xml_failure(parser);
6106 }
6107 CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
6108
6109 // try to parse lots of 'e', but the token isn't finished
6110 for (int c = 0; c < 100; ++c) {
6111 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
6112 if (status != XML_STATUS_OK) {
6113 xml_failure(parser);
6114 }
6115 }
6116 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
6117
6118 // end the <x> token.
6119 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
6120 if (status != XML_STATUS_OK) {
6121 xml_failure(parser);
6122 }
6123
6124 if (enabled) {
6125 // In general, we may need to push more data to trigger a reparse attempt,
6126 // but in this test, the data is constructed to always require it.
6127 CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
6128 // 2x the token length should suffice; the +1 covers the start and end.
6129 for (int c = 0; c < 101; ++c) {
6130 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
6131 if (status != XML_STATUS_OK) {
6132 xml_failure(parser);
6133 }
6134 }
6135 }
6136 CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
6137
6138 XML_ParserFree(parser);
6139 }
6140 }
6141 END_TEST
6142
6143 struct element_decl_data {
6144 XML_Parser parser;
6145 int count;
6146 };
6147
6148 static void
element_decl_counter(void * userData,const XML_Char * name,XML_Content * model)6149 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
6150 UNUSED_P(name);
6151 struct element_decl_data *testdata = (struct element_decl_data *)userData;
6152 testdata->count += 1;
6153 XML_FreeContentModel(testdata->parser, model);
6154 }
6155
6156 static int
external_inherited_parser(XML_Parser p,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)6157 external_inherited_parser(XML_Parser p, const XML_Char *context,
6158 const XML_Char *base, const XML_Char *systemId,
6159 const XML_Char *publicId) {
6160 UNUSED_P(base);
6161 UNUSED_P(systemId);
6162 UNUSED_P(publicId);
6163 const char *const pre = "<!ELEMENT document ANY>\n";
6164 const char *const start = "<!ELEMENT ";
6165 const char *const end = " ANY>\n";
6166 const char *const post = "<!ELEMENT xyz ANY>\n";
6167 const int enabled = *(int *)XML_GetUserData(p);
6168 char eeeeee[100];
6169 char spaces[100];
6170 const int fillsize = (int)sizeof(eeeeee);
6171 assert_true(fillsize == (int)sizeof(spaces));
6172 memset(eeeeee, 'e', fillsize);
6173 memset(spaces, ' ', fillsize);
6174
6175 XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
6176 assert_true(parser != NULL);
6177 // pre-grow the buffer to avoid reparsing due to almost-fullness
6178 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
6179
6180 struct element_decl_data testdata;
6181 testdata.parser = parser;
6182 testdata.count = 0;
6183 XML_SetUserData(parser, &testdata);
6184 XML_SetElementDeclHandler(parser, element_decl_counter);
6185
6186 enum XML_Status status;
6187 // parse the initial text
6188 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
6189 if (status != XML_STATUS_OK) {
6190 xml_failure(parser);
6191 }
6192 assert_true(testdata.count == 1); // first element should be done
6193
6194 // ..and the start of the big token
6195 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
6196 if (status != XML_STATUS_OK) {
6197 xml_failure(parser);
6198 }
6199 assert_true(testdata.count == 1); // still just the first one
6200
6201 // try to parse lots of 'e', but the token isn't finished
6202 for (int c = 0; c < 100; ++c) {
6203 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
6204 if (status != XML_STATUS_OK) {
6205 xml_failure(parser);
6206 }
6207 }
6208 assert_true(testdata.count == 1); // *still* just the first one
6209
6210 // end the big token.
6211 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
6212 if (status != XML_STATUS_OK) {
6213 xml_failure(parser);
6214 }
6215
6216 if (enabled) {
6217 // In general, we may need to push more data to trigger a reparse attempt,
6218 // but in this test, the data is constructed to always require it.
6219 assert_true(testdata.count == 1); // or the test is incorrect
6220 // 2x the token length should suffice; the +1 covers the start and end.
6221 for (int c = 0; c < 101; ++c) {
6222 status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
6223 if (status != XML_STATUS_OK) {
6224 xml_failure(parser);
6225 }
6226 }
6227 }
6228 assert_true(testdata.count == 2); // the big token should be done
6229
6230 // parse the final text
6231 status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
6232 if (status != XML_STATUS_OK) {
6233 xml_failure(parser);
6234 }
6235 assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
6236
6237 XML_ParserFree(parser);
6238 return XML_STATUS_OK;
6239 }
6240
START_TEST(test_reparse_deferral_is_inherited)6241 START_TEST(test_reparse_deferral_is_inherited) {
6242 const char *const text
6243 = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
6244 for (int enabled = 0; enabled <= 1; ++enabled) {
6245 set_subtest("deferral=%d", enabled);
6246
6247 XML_Parser parser = XML_ParserCreate(NULL);
6248 assert_true(parser != NULL);
6249 XML_SetUserData(parser, (void *)&enabled);
6250 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
6251 // this handler creates a sub-parser and checks that its deferral behavior
6252 // is what we expected, based on the value of `enabled` (in userdata).
6253 XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
6254 assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
6255 if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
6256 xml_failure(parser);
6257
6258 XML_ParserFree(parser);
6259 }
6260 }
6261 END_TEST
6262
START_TEST(test_set_reparse_deferral_on_null_parser)6263 START_TEST(test_set_reparse_deferral_on_null_parser) {
6264 assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
6265 assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
6266 assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
6267 assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
6268 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
6269 == XML_FALSE);
6270 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
6271 == XML_FALSE);
6272 }
6273 END_TEST
6274
START_TEST(test_set_reparse_deferral_on_the_fly)6275 START_TEST(test_set_reparse_deferral_on_the_fly) {
6276 const char *const pre = "<d><x attr='";
6277 const char *const end = "'></x>";
6278 char iiiiii[100];
6279 const int fillsize = (int)sizeof(iiiiii);
6280 memset(iiiiii, 'i', fillsize);
6281
6282 XML_Parser parser = XML_ParserCreate(NULL);
6283 assert_true(parser != NULL);
6284 assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
6285
6286 CharData storage;
6287 CharData_Init(&storage);
6288 XML_SetUserData(parser, &storage);
6289 XML_SetStartElementHandler(parser, start_element_event_handler);
6290
6291 enum XML_Status status;
6292 // parse the start text
6293 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
6294 if (status != XML_STATUS_OK) {
6295 xml_failure(parser);
6296 }
6297 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
6298
6299 // try to parse some 'i', but the token isn't finished
6300 status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
6301 if (status != XML_STATUS_OK) {
6302 xml_failure(parser);
6303 }
6304 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
6305
6306 // end the <x> token.
6307 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
6308 if (status != XML_STATUS_OK) {
6309 xml_failure(parser);
6310 }
6311 CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
6312
6313 // now change the heuristic setting and add *no* data
6314 assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
6315 // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
6316 status = XML_Parse(parser, "", 0, XML_FALSE);
6317 if (status != XML_STATUS_OK) {
6318 xml_failure(parser);
6319 }
6320 CharData_CheckXMLChars(&storage, XCS("dx"));
6321
6322 XML_ParserFree(parser);
6323 }
6324 END_TEST
6325
START_TEST(test_set_bad_reparse_option)6326 START_TEST(test_set_bad_reparse_option) {
6327 XML_Parser parser = XML_ParserCreate(NULL);
6328 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
6329 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
6330 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
6331 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
6332 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
6333 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
6334 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
6335 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
6336 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
6337 XML_ParserFree(parser);
6338 }
6339 END_TEST
6340
6341 static size_t g_totalAlloc = 0;
6342 static size_t g_biggestAlloc = 0;
6343
6344 static void *
counting_realloc(void * ptr,size_t size)6345 counting_realloc(void *ptr, size_t size) {
6346 g_totalAlloc += size;
6347 if (size > g_biggestAlloc) {
6348 g_biggestAlloc = size;
6349 }
6350 return realloc(ptr, size);
6351 }
6352
6353 static void *
counting_malloc(size_t size)6354 counting_malloc(size_t size) {
6355 return counting_realloc(NULL, size);
6356 }
6357
START_TEST(test_bypass_heuristic_when_close_to_bufsize)6358 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
6359 if (g_chunkSize != 0) {
6360 // this test does not use SINGLE_BYTES, because it depends on very precise
6361 // buffer fills.
6362 return;
6363 }
6364 if (! g_reparseDeferralEnabledDefault) {
6365 return; // this test is irrelevant when the deferral heuristic is disabled.
6366 }
6367
6368 const int document_length = 65536;
6369 char *const document = malloc(document_length);
6370 assert_true(document != NULL);
6371
6372 const XML_Memory_Handling_Suite memfuncs = {
6373 counting_malloc,
6374 counting_realloc,
6375 free,
6376 };
6377
6378 const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
6379 const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
6380 const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
6381
6382 for (const int *leading = leading_list; *leading >= 0; leading++) {
6383 for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
6384 for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
6385 set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
6386 *fillsize);
6387 // start by checking that the test looks reasonably valid
6388 assert_true(*leading + *bigtoken <= document_length);
6389
6390 // put 'x' everywhere; some will be overwritten by elements.
6391 memset(document, 'x', document_length);
6392 // maybe add an initial tag
6393 if (*leading) {
6394 assert_true(*leading >= 3); // or the test case is invalid
6395 memcpy(document, "<a>", 3);
6396 }
6397 // add the large token
6398 document[*leading + 0] = '<';
6399 document[*leading + 1] = 'b';
6400 memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
6401 document[*leading + *bigtoken - 1] = '>';
6402
6403 // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
6404 const int expected_elem_total = 1 + (*leading ? 1 : 0);
6405
6406 XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
6407 assert_true(parser != NULL);
6408
6409 CharData storage;
6410 CharData_Init(&storage);
6411 XML_SetUserData(parser, &storage);
6412 XML_SetStartElementHandler(parser, start_element_event_handler);
6413
6414 g_biggestAlloc = 0;
6415 g_totalAlloc = 0;
6416 int offset = 0;
6417 // fill data until the big token is covered (but not necessarily parsed)
6418 while (offset < *leading + *bigtoken) {
6419 assert_true(offset + *fillsize <= document_length);
6420 const enum XML_Status status
6421 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6422 if (status != XML_STATUS_OK) {
6423 xml_failure(parser);
6424 }
6425 offset += *fillsize;
6426 }
6427 // Now, check that we've had a buffer allocation that could fit the
6428 // context bytes and our big token. In order to detect a special case,
6429 // we need to know how many bytes of our big token were included in the
6430 // first push that contained _any_ bytes of the big token:
6431 const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
6432 if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
6433 // Special case: we aren't saving any context, and the whole big token
6434 // was covered by a single fill, so Expat may have parsed directly
6435 // from our input pointer, without allocating an internal buffer.
6436 } else if (*leading < XML_CONTEXT_BYTES) {
6437 assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
6438 } else {
6439 assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
6440 }
6441 // fill data until the big token is actually parsed
6442 while (storage.count < expected_elem_total) {
6443 const size_t alloc_before = g_totalAlloc;
6444 assert_true(offset + *fillsize <= document_length);
6445 const enum XML_Status status
6446 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6447 if (status != XML_STATUS_OK) {
6448 xml_failure(parser);
6449 }
6450 offset += *fillsize;
6451 // since all the bytes of the big token are already in the buffer,
6452 // the bufsize ceiling should make us finish its parsing without any
6453 // further buffer allocations. We assume that there will be no other
6454 // large allocations in this test.
6455 assert_true(g_totalAlloc - alloc_before < 4096);
6456 }
6457 // test-the-test: was our alloc even called?
6458 assert_true(g_totalAlloc > 0);
6459 // test-the-test: there shouldn't be any extra start elements
6460 assert_true(storage.count == expected_elem_total);
6461
6462 XML_ParserFree(parser);
6463 }
6464 }
6465 }
6466 free(document);
6467 }
6468 END_TEST
6469
START_TEST(test_varying_buffer_fills)6470 START_TEST(test_varying_buffer_fills) {
6471 const int KiB = 1024;
6472 const int MiB = 1024 * KiB;
6473 const int document_length = 16 * MiB;
6474 const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
6475
6476 if (g_chunkSize != 0) {
6477 return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
6478 }
6479
6480 char *const document = malloc(document_length);
6481 assert_true(document != NULL);
6482 memset(document, 'x', document_length);
6483 document[0] = '<';
6484 document[1] = 't';
6485 memset(&document[2], ' ', big - 2); // a very spacy token
6486 document[big - 1] = '>';
6487
6488 // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
6489 // When reparse deferral is enabled, the final (negated) value is the expected
6490 // maximum number of bytes scanned in parse attempts.
6491 const int testcases[][30] = {
6492 {8 * MiB, -8 * MiB},
6493 {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
6494 // zero-size fills shouldn't trigger the bypass
6495 {4 * MiB, 0, 4 * MiB, -12 * MiB},
6496 {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
6497 {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
6498 // try to hit the buffer ceiling only once (at the end)
6499 {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
6500 // try to hit the same buffer ceiling multiple times
6501 {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
6502
6503 // try to hit every ceiling, by always landing 1K shy of the buffer size
6504 {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
6505 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
6506
6507 // try to avoid every ceiling, by always landing 1B past the buffer size
6508 // the normal 2x heuristic threshold still forces parse attempts.
6509 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
6510 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
6511 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
6512 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
6513 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6514 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6515 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7
6516 -(10 * MiB + 682 * KiB + 7)},
6517 // try to avoid every ceiling again, except on our last fill.
6518 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
6519 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
6520 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
6521 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
6522 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6523 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6524 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
6525 -(10 * MiB + 682 * KiB + 6)},
6526
6527 // try to hit ceilings on the way multiple times
6528 {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
6529 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
6530 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer
6531 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer
6532 // we'll make a parse attempt at every parse call
6533 -(45 * MiB + 12)},
6534 };
6535 const int testcount = sizeof(testcases) / sizeof(testcases[0]);
6536 for (int test_i = 0; test_i < testcount; test_i++) {
6537 const int *fillsize = testcases[test_i];
6538 set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
6539 fillsize[2], fillsize[3]);
6540 XML_Parser parser = XML_ParserCreate(NULL);
6541 assert_true(parser != NULL);
6542
6543 CharData storage;
6544 CharData_Init(&storage);
6545 XML_SetUserData(parser, &storage);
6546 XML_SetStartElementHandler(parser, start_element_event_handler);
6547
6548 g_bytesScanned = 0;
6549 int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
6550 int offset = 0;
6551 while (*fillsize >= 0) {
6552 assert_true(offset + *fillsize <= document_length); // or test is invalid
6553 const enum XML_Status status
6554 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6555 if (status != XML_STATUS_OK) {
6556 xml_failure(parser);
6557 }
6558 offset += *fillsize;
6559 fillsize++;
6560 assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
6561 worstcase_bytes += offset; // we might've tried to parse all pending bytes
6562 }
6563 assert_true(storage.count == 1); // the big token should've been parsed
6564 assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
6565 if (g_reparseDeferralEnabledDefault) {
6566 // heuristic is enabled; some XML_Parse calls may have deferred reparsing
6567 const unsigned max_bytes_scanned = -*fillsize;
6568 if (g_bytesScanned > max_bytes_scanned) {
6569 fprintf(stderr,
6570 "bytes scanned in parse attempts: actual=%u limit=%u \n",
6571 g_bytesScanned, max_bytes_scanned);
6572 fail("too many bytes scanned in parse attempts");
6573 }
6574 }
6575 assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
6576
6577 XML_ParserFree(parser);
6578 }
6579 free(document);
6580 }
6581 END_TEST
6582
START_TEST(test_empty_ext_param_entity_in_value)6583 START_TEST(test_empty_ext_param_entity_in_value) {
6584 const char *text = "<!DOCTYPE r SYSTEM \"ext.dtd\"><r/>";
6585 ExtOption options[] = {
6586 {XCS("ext.dtd"), "<!ENTITY % pe SYSTEM \"empty\">"
6587 "<!ENTITY ge \"%pe;\">"},
6588 {XCS("empty"), ""},
6589 {NULL, NULL},
6590 };
6591
6592 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
6593 XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner);
6594 XML_SetUserData(g_parser, options);
6595 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
6596 == XML_STATUS_ERROR)
6597 xml_failure(g_parser);
6598 }
6599 END_TEST
6600
6601 void
make_basic_test_case(Suite * s)6602 make_basic_test_case(Suite *s) {
6603 TCase *tc_basic = tcase_create("basic tests");
6604
6605 suite_add_tcase(s, tc_basic);
6606 tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
6607
6608 tcase_add_test(tc_basic, test_nul_byte);
6609 tcase_add_test(tc_basic, test_u0000_char);
6610 tcase_add_test(tc_basic, test_siphash_self);
6611 tcase_add_test(tc_basic, test_siphash_spec);
6612 tcase_add_test(tc_basic, test_bom_utf8);
6613 tcase_add_test(tc_basic, test_bom_utf16_be);
6614 tcase_add_test(tc_basic, test_bom_utf16_le);
6615 tcase_add_test(tc_basic, test_nobom_utf16_le);
6616 tcase_add_test(tc_basic, test_hash_collision);
6617 tcase_add_test(tc_basic, test_hash_salt_setter);
6618 tcase_add_test(tc_basic, test_illegal_utf8);
6619 tcase_add_test(tc_basic, test_utf8_auto_align);
6620 tcase_add_test(tc_basic, test_utf16);
6621 tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
6622 tcase_add_test(tc_basic, test_not_utf16);
6623 tcase_add_test(tc_basic, test_bad_encoding);
6624 tcase_add_test(tc_basic, test_latin1_umlauts);
6625 tcase_add_test(tc_basic, test_long_utf8_character);
6626 tcase_add_test(tc_basic, test_long_latin1_attribute);
6627 tcase_add_test(tc_basic, test_long_ascii_attribute);
6628 /* Regression test for SF bug #491986. */
6629 tcase_add_test(tc_basic, test_danish_latin1);
6630 /* Regression test for SF bug #514281. */
6631 tcase_add_test(tc_basic, test_french_charref_hexidecimal);
6632 tcase_add_test(tc_basic, test_french_charref_decimal);
6633 tcase_add_test(tc_basic, test_french_latin1);
6634 tcase_add_test(tc_basic, test_french_utf8);
6635 tcase_add_test(tc_basic, test_utf8_false_rejection);
6636 tcase_add_test(tc_basic, test_line_number_after_parse);
6637 tcase_add_test(tc_basic, test_column_number_after_parse);
6638 tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
6639 tcase_add_test(tc_basic, test_line_number_after_error);
6640 tcase_add_test(tc_basic, test_column_number_after_error);
6641 tcase_add_test(tc_basic, test_really_long_lines);
6642 tcase_add_test(tc_basic, test_really_long_encoded_lines);
6643 tcase_add_test(tc_basic, test_end_element_events);
6644 tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
6645 tcase_add_test(tc_basic, test_attr_whitespace_normalization);
6646 tcase_add_test(tc_basic, test_xmldecl_misplaced);
6647 tcase_add_test(tc_basic, test_xmldecl_invalid);
6648 tcase_add_test(tc_basic, test_xmldecl_missing_attr);
6649 tcase_add_test(tc_basic, test_xmldecl_missing_value);
6650 tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
6651 tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
6652 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
6653 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
6654 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
6655 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
6656 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
6657 tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
6658 tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
6659 tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
6660 tcase_add_test(tc_basic,
6661 test_wfc_undeclared_entity_with_external_subset_standalone);
6662 tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
6663 tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
6664 tcase_add_test(tc_basic, test_not_standalone_handler_reject);
6665 tcase_add_test(tc_basic, test_not_standalone_handler_accept);
6666 tcase_add_test(tc_basic, test_entity_start_tag_level_greater_than_one);
6667 tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
6668 tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs);
6669 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
6670 tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
6671 tcase_add_test(tc_basic, test_dtd_attr_handling);
6672 tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
6673 tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
6674 tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
6675 tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
6676 tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
6677 tcase_add_test(tc_basic, test_good_cdata_ascii);
6678 tcase_add_test(tc_basic, test_good_cdata_utf16);
6679 tcase_add_test(tc_basic, test_good_cdata_utf16_le);
6680 tcase_add_test(tc_basic, test_long_cdata_utf16);
6681 tcase_add_test(tc_basic, test_multichar_cdata_utf16);
6682 tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
6683 tcase_add_test(tc_basic, test_bad_cdata);
6684 tcase_add_test(tc_basic, test_bad_cdata_utf16);
6685 tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
6686 tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
6687 tcase_add_test(tc_basic, test_memory_allocation);
6688 tcase_add_test__if_xml_ge(tc_basic, test_default_current);
6689 tcase_add_test(tc_basic, test_dtd_elements);
6690 tcase_add_test(tc_basic, test_dtd_elements_nesting);
6691 tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
6692 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
6693 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
6694 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
6695 tcase_add_test__ifdef_xml_dtd(tc_basic,
6696 test_foreign_dtd_without_external_subset);
6697 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
6698 tcase_add_test(tc_basic, test_set_base);
6699 tcase_add_test(tc_basic, test_attributes);
6700 tcase_add_test(tc_basic, test_duplicate_cdata_attribute);
6701 tcase_add_test(tc_basic, test_duplicate_id_attribute_1);
6702 tcase_add_test(tc_basic, test_duplicate_id_attribute_2);
6703 tcase_add_test(tc_basic, test_duplicate_cdata_attribute_multiple_attlistdecl);
6704 tcase_add_test(tc_basic,
6705 test_duplicate_cdata_attribute_multiple_attlistdecl_2);
6706 tcase_add_test(tc_basic,
6707 test_duplicate_cdata_attribute_multiple_attlistdecl_3);
6708 tcase_add_test(tc_basic, test_duplicate_id_attribute_multiple_attlistdecl);
6709 tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
6710 tcase_add_test(tc_basic, test_resume_invalid_parse);
6711 tcase_add_test(tc_basic, test_resume_resuspended);
6712 tcase_add_test(tc_basic, test_cdata_default);
6713 tcase_add_test(tc_basic, test_subordinate_reset);
6714 tcase_add_test(tc_basic, test_subordinate_suspend);
6715 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
6716 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
6717 tcase_add_test__ifdef_xml_dtd(tc_basic,
6718 test_ext_entity_invalid_suspended_parse);
6719 tcase_add_test(tc_basic, test_explicit_encoding);
6720 tcase_add_test(tc_basic, test_trailing_cr);
6721 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
6722 tcase_add_test(tc_basic, test_trailing_rsqb);
6723 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
6724 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
6725 tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
6726 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
6727 tcase_add_test(tc_basic, test_empty_parse);
6728 tcase_add_test(tc_basic, test_negative_len_parse);
6729 tcase_add_test(tc_basic, test_negative_len_parse_buffer);
6730 tcase_add_test(tc_basic, test_get_buffer_1);
6731 tcase_add_test(tc_basic, test_get_buffer_2);
6732 #if XML_CONTEXT_BYTES > 0
6733 tcase_add_test(tc_basic, test_get_buffer_3_overflow);
6734 #endif
6735 tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
6736 tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
6737 tcase_add_test(tc_basic, test_byte_info_at_end);
6738 tcase_add_test(tc_basic, test_byte_info_at_error);
6739 tcase_add_test(tc_basic, test_byte_info_at_cdata);
6740 tcase_add_test(tc_basic, test_predefined_entities);
6741 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
6742 tcase_add_test(tc_basic, test_not_predefined_entities);
6743 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
6744 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
6745 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
6746 tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
6747 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
6748 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
6749 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
6750 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
6751 tcase_add_test(tc_basic, test_bad_public_doctype);
6752 tcase_add_test(tc_basic, test_attribute_enum_value);
6753 tcase_add_test(tc_basic, test_predefined_entity_redefinition);
6754 tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
6755 tcase_add_test(tc_basic, test_public_notation_no_sysid);
6756 tcase_add_test(tc_basic, test_nested_groups);
6757 tcase_add_test(tc_basic, test_group_choice);
6758 tcase_add_test(tc_basic, test_standalone_parameter_entity);
6759 tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
6760 tcase_add_test__ifdef_xml_dtd(tc_basic,
6761 test_recursive_external_parameter_entity);
6762 tcase_add_test__ifdef_xml_dtd(tc_basic,
6763 test_recursive_external_parameter_entity_2);
6764 tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
6765 tcase_add_test(tc_basic, test_suspend_xdecl);
6766 tcase_add_test(tc_basic, test_abort_epilog);
6767 tcase_add_test(tc_basic, test_abort_epilog_2);
6768 tcase_add_test(tc_basic, test_suspend_epilog);
6769 tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
6770 tcase_add_test(tc_basic, test_unfinished_epilog);
6771 tcase_add_test(tc_basic, test_partial_char_in_epilog);
6772 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6773 tcase_add_test__ifdef_xml_dtd(tc_basic,
6774 test_suspend_resume_internal_entity_issue_629);
6775 tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6776 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6777 tcase_add_test(tc_basic, test_restart_on_error);
6778 tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6779 tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6780 tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6781 tcase_add_test(tc_basic, test_standalone_internal_entity);
6782 tcase_add_test(tc_basic, test_skipped_external_entity);
6783 tcase_add_test__ifdef_xml_dtd(
6784 tc_basic, test_scaff_index_shared_across_external_entity_parser);
6785 tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6786 tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6787 tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6788 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6789 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6790 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6791 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6792 tcase_add_test(tc_basic, test_pi_handled_in_default);
6793 tcase_add_test(tc_basic, test_comment_handled_in_default);
6794 tcase_add_test(tc_basic, test_pi_yml);
6795 tcase_add_test(tc_basic, test_pi_xnl);
6796 tcase_add_test(tc_basic, test_pi_xmm);
6797 tcase_add_test(tc_basic, test_utf16_pi);
6798 tcase_add_test(tc_basic, test_utf16_be_pi);
6799 tcase_add_test(tc_basic, test_utf16_be_comment);
6800 tcase_add_test(tc_basic, test_utf16_le_comment);
6801 tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6802 tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6803 tcase_add_test(tc_basic, test_unknown_encoding_success);
6804 tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6805 tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6806 tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6807 tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6808 tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6809 tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6810 tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6811 tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6812 tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6813 tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6814 tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6815 tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6816 tcase_add_test(tc_basic, test_unknown_encoding_user_data_primary);
6817 tcase_add_test(tc_basic, test_unknown_encoding_user_data_secondary);
6818 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6819 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6820 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6821 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6822 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6823 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6824 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6825 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6826 tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6827 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6828 tcase_add_test(tc_basic, test_utf8_in_start_tags);
6829 tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6830 tcase_add_test(tc_basic, test_utf16_attribute);
6831 tcase_add_test(tc_basic, test_utf16_second_attr);
6832 tcase_add_test(tc_basic, test_attr_after_solidus);
6833 tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6834 tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6835 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6836 tcase_add_test(tc_basic, test_bad_doctype);
6837 tcase_add_test(tc_basic, test_bad_doctype_utf8);
6838 tcase_add_test(tc_basic, test_bad_doctype_utf16);
6839 tcase_add_test(tc_basic, test_bad_doctype_plus);
6840 tcase_add_test(tc_basic, test_bad_doctype_star);
6841 tcase_add_test(tc_basic, test_bad_doctype_query);
6842 tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6843 tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6844 tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6845 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6846 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6847 tcase_add_test(tc_basic, test_short_doctype);
6848 tcase_add_test(tc_basic, test_short_doctype_2);
6849 tcase_add_test(tc_basic, test_short_doctype_3);
6850 tcase_add_test(tc_basic, test_long_doctype);
6851 tcase_add_test(tc_basic, test_bad_entity);
6852 tcase_add_test(tc_basic, test_bad_entity_2);
6853 tcase_add_test(tc_basic, test_bad_entity_3);
6854 tcase_add_test(tc_basic, test_bad_entity_4);
6855 tcase_add_test(tc_basic, test_bad_notation);
6856 tcase_add_test(tc_basic, test_default_doctype_handler);
6857 tcase_add_test(tc_basic, test_empty_element_abort);
6858 tcase_add_test__ifdef_xml_dtd(tc_basic,
6859 test_pool_integrity_with_unfinished_attr);
6860 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_ext_param_entity_in_value);
6861 tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements);
6862 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity);
6863 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity);
6864 tcase_add_test__if_xml_ge(tc_basic,
6865 test_deep_nested_entity_delayed_interpretation);
6866 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6867 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2);
6868 tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6869 tcase_add_test(tc_basic, test_set_reparse_deferral);
6870 tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6871 tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6872 tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6873 tcase_add_test(tc_basic, test_set_bad_reparse_option);
6874 tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6875 tcase_add_test(tc_basic, test_varying_buffer_fills);
6876 }
6877