1 /* Tests in the "basic" test case for the Expat test suite
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net>
11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org>
14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15 Copyright (c) 2017 Joe Orton <jorton@redhat.com>
16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com>
17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com>
20 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
21 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22 Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com>
23 Licensed under the MIT license:
24
25 Permission is hereby granted, free of charge, to any person obtaining
26 a copy of this software and associated documentation files (the
27 "Software"), to deal in the Software without restriction, including
28 without limitation the rights to use, copy, modify, merge, publish,
29 distribute, sublicense, and/or sell copies of the Software, and to permit
30 persons to whom the Software is furnished to do so, subject to the
31 following conditions:
32
33 The above copyright notice and this permission notice shall be included
34 in all copies or substantial portions of the Software.
35
36 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
37 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
38 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
39 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
40 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
41 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
42 USE OR OTHER DEALINGS IN THE SOFTWARE.
43 */
44
45 #if defined(NDEBUG)
46 # undef NDEBUG /* because test suite relies on assert(...) at the moment */
47 #endif
48
49 #include <assert.h>
50
51 #include <stdio.h>
52 #include <string.h>
53 #include <time.h>
54
55 #if ! defined(__cplusplus)
56 # include <stdbool.h>
57 #endif
58
59 #include "expat_config.h"
60
61 #include "expat.h"
62 #include "internal.h"
63 #include "minicheck.h"
64 #include "structdata.h"
65 #include "common.h"
66 #include "dummy.h"
67 #include "handlers.h"
68 #include "siphash.h"
69 #include "basic_tests.h"
70
71 static void
basic_setup(void)72 basic_setup(void) {
73 g_parser = XML_ParserCreate(NULL);
74 if (g_parser == NULL)
75 fail("Parser not created.");
76 }
77
78 /*
79 * Character & encoding tests.
80 */
81
START_TEST(test_nul_byte)82 START_TEST(test_nul_byte) {
83 char text[] = "<doc>\0</doc>";
84
85 /* test that a NUL byte (in US-ASCII data) is an error */
86 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
87 == XML_STATUS_OK)
88 fail("Parser did not report error on NUL-byte.");
89 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
90 xml_failure(g_parser);
91 }
92 END_TEST
93
START_TEST(test_u0000_char)94 START_TEST(test_u0000_char) {
95 /* test that a NUL byte (in US-ASCII data) is an error */
96 expect_failure("<doc>�</doc>", XML_ERROR_BAD_CHAR_REF,
97 "Parser did not report error on NUL-byte.");
98 }
99 END_TEST
100
START_TEST(test_siphash_self)101 START_TEST(test_siphash_self) {
102 if (! sip24_valid())
103 fail("SipHash self-test failed");
104 }
105 END_TEST
106
START_TEST(test_siphash_spec)107 START_TEST(test_siphash_spec) {
108 /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
109 const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
110 "\x0a\x0b\x0c\x0d\x0e";
111 const size_t len = sizeof(message) - 1;
112 const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
113 struct siphash state;
114 struct sipkey key;
115
116 sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
117 "\x0a\x0b\x0c\x0d\x0e\x0f");
118 sip24_init(&state, &key);
119
120 /* Cover spread across calls */
121 sip24_update(&state, message, 4);
122 sip24_update(&state, message + 4, len - 4);
123
124 /* Cover null length */
125 sip24_update(&state, message, 0);
126
127 if (sip24_final(&state) != expected)
128 fail("sip24_final failed spec test\n");
129
130 /* Cover wrapper */
131 if (siphash24(message, len, &key) != expected)
132 fail("siphash24 failed spec test\n");
133 }
134 END_TEST
135
START_TEST(test_bom_utf8)136 START_TEST(test_bom_utf8) {
137 /* This test is really just making sure we don't core on a UTF-8 BOM. */
138 const char *text = "\357\273\277<e/>";
139
140 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
141 == XML_STATUS_ERROR)
142 xml_failure(g_parser);
143 }
144 END_TEST
145
START_TEST(test_bom_utf16_be)146 START_TEST(test_bom_utf16_be) {
147 char text[] = "\376\377\0<\0e\0/\0>";
148
149 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
150 == XML_STATUS_ERROR)
151 xml_failure(g_parser);
152 }
153 END_TEST
154
START_TEST(test_bom_utf16_le)155 START_TEST(test_bom_utf16_le) {
156 char text[] = "\377\376<\0e\0/\0>\0";
157
158 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
159 == XML_STATUS_ERROR)
160 xml_failure(g_parser);
161 }
162 END_TEST
163
START_TEST(test_nobom_utf16_le)164 START_TEST(test_nobom_utf16_le) {
165 char text[] = " \0<\0e\0/\0>\0";
166
167 if (g_chunkSize == 1) {
168 // TODO: with just the first byte, we can't tell the difference between
169 // UTF-16-LE and UTF-8. Avoid the failure for now.
170 return;
171 }
172
173 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
174 == XML_STATUS_ERROR)
175 xml_failure(g_parser);
176 }
177 END_TEST
178
START_TEST(test_hash_collision)179 START_TEST(test_hash_collision) {
180 /* For full coverage of the lookup routine, we need to ensure a
181 * hash collision even though we can only tell that we have one
182 * through breakpoint debugging or coverage statistics. The
183 * following will cause a hash collision on machines with a 64-bit
184 * long type; others will have to experiment. The full coverage
185 * tests invoked from qa.sh usually provide a hash collision, but
186 * not always. This is an attempt to provide insurance.
187 */
188 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
189 const char *text
190 = "<doc>\n"
191 "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
192 "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
193 "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
194 "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
195 "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
196 "<d8>This triggers the table growth and collides with b2</d8>\n"
197 "</doc>\n";
198
199 XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
200 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
201 == XML_STATUS_ERROR)
202 xml_failure(g_parser);
203 }
204 END_TEST
205 #undef COLLIDING_HASH_SALT
206
207 /* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)208 START_TEST(test_danish_latin1) {
209 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
210 "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
211 #ifdef XML_UNICODE
212 const XML_Char *expected
213 = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
214 #else
215 const XML_Char *expected
216 = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
217 #endif
218 run_character_check(text, expected);
219 }
220 END_TEST
221
222 /* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)223 START_TEST(test_french_charref_hexidecimal) {
224 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
225 "<doc>éèàçêÈ</doc>";
226 #ifdef XML_UNICODE
227 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
228 #else
229 const XML_Char *expected
230 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
231 #endif
232 run_character_check(text, expected);
233 }
234 END_TEST
235
START_TEST(test_french_charref_decimal)236 START_TEST(test_french_charref_decimal) {
237 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
238 "<doc>éèàçêÈ</doc>";
239 #ifdef XML_UNICODE
240 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
241 #else
242 const XML_Char *expected
243 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
244 #endif
245 run_character_check(text, expected);
246 }
247 END_TEST
248
START_TEST(test_french_latin1)249 START_TEST(test_french_latin1) {
250 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
251 "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
252 #ifdef XML_UNICODE
253 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
254 #else
255 const XML_Char *expected
256 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
257 #endif
258 run_character_check(text, expected);
259 }
260 END_TEST
261
START_TEST(test_french_utf8)262 START_TEST(test_french_utf8) {
263 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
264 "<doc>\xC3\xA9</doc>";
265 #ifdef XML_UNICODE
266 const XML_Char *expected = XCS("\x00e9");
267 #else
268 const XML_Char *expected = XCS("\xC3\xA9");
269 #endif
270 run_character_check(text, expected);
271 }
272 END_TEST
273
274 /* Regression test for SF bug #600479.
275 XXX There should be a test that exercises all legal XML Unicode
276 characters as PCDATA and attribute value content, and XML Name
277 characters as part of element and attribute names.
278 */
START_TEST(test_utf8_false_rejection)279 START_TEST(test_utf8_false_rejection) {
280 const char *text = "<doc>\xEF\xBA\xBF</doc>";
281 #ifdef XML_UNICODE
282 const XML_Char *expected = XCS("\xfebf");
283 #else
284 const XML_Char *expected = XCS("\xEF\xBA\xBF");
285 #endif
286 run_character_check(text, expected);
287 }
288 END_TEST
289
290 /* Regression test for SF bug #477667.
291 This test assures that any 8-bit character followed by a 7-bit
292 character will not be mistakenly interpreted as a valid UTF-8
293 sequence.
294 */
START_TEST(test_illegal_utf8)295 START_TEST(test_illegal_utf8) {
296 char text[100];
297 int i;
298
299 for (i = 128; i <= 255; ++i) {
300 snprintf(text, sizeof(text), "<e>%ccd</e>", i);
301 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
302 == XML_STATUS_OK) {
303 snprintf(text, sizeof(text),
304 "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
305 i);
306 fail(text);
307 } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
308 xml_failure(g_parser);
309 /* Reset the parser since we use the same parser repeatedly. */
310 XML_ParserReset(g_parser, NULL);
311 }
312 }
313 END_TEST
314
315 /* Examples, not masks: */
316 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
317 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
318 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
319 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
320 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
321
START_TEST(test_utf8_auto_align)322 START_TEST(test_utf8_auto_align) {
323 struct TestCase {
324 ptrdiff_t expectedMovementInChars;
325 const char *input;
326 };
327
328 struct TestCase cases[] = {
329 {00, ""},
330
331 {00, UTF8_LEAD_1},
332
333 {-1, UTF8_LEAD_2},
334 {00, UTF8_LEAD_2 UTF8_FOLLOW},
335
336 {-1, UTF8_LEAD_3},
337 {-2, UTF8_LEAD_3 UTF8_FOLLOW},
338 {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
339
340 {-1, UTF8_LEAD_4},
341 {-2, UTF8_LEAD_4 UTF8_FOLLOW},
342 {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
343 {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
344 };
345
346 size_t i = 0;
347 bool success = true;
348 for (; i < sizeof(cases) / sizeof(*cases); i++) {
349 const char *fromLim = cases[i].input + strlen(cases[i].input);
350 const char *const fromLimInitially = fromLim;
351 ptrdiff_t actualMovementInChars;
352
353 _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
354
355 actualMovementInChars = (fromLim - fromLimInitially);
356 if (actualMovementInChars != cases[i].expectedMovementInChars) {
357 size_t j = 0;
358 success = false;
359 printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
360 ", actually moved by %2d chars: \"",
361 (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
362 (int)actualMovementInChars);
363 for (; j < strlen(cases[i].input); j++) {
364 printf("\\x%02x", (unsigned char)cases[i].input[j]);
365 }
366 printf("\"\n");
367 }
368 }
369
370 if (! success) {
371 fail("UTF-8 auto-alignment is not bullet-proof\n");
372 }
373 }
374 END_TEST
375
START_TEST(test_utf16)376 START_TEST(test_utf16) {
377 /* <?xml version="1.0" encoding="UTF-16"?>
378 * <doc a='123'>some {A} text</doc>
379 *
380 * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
381 */
382 char text[]
383 = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
384 "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
385 "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
386 "\000'\000?\000>\000\n"
387 "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
388 "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
389 "<\000/\000d\000o\000c\000>";
390 #ifdef XML_UNICODE
391 const XML_Char *expected = XCS("some \xff21 text");
392 #else
393 const XML_Char *expected = XCS("some \357\274\241 text");
394 #endif
395 CharData storage;
396
397 CharData_Init(&storage);
398 XML_SetUserData(g_parser, &storage);
399 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
400 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
401 == XML_STATUS_ERROR)
402 xml_failure(g_parser);
403 CharData_CheckXMLChars(&storage, expected);
404 }
405 END_TEST
406
START_TEST(test_utf16_le_epilog_newline)407 START_TEST(test_utf16_le_epilog_newline) {
408 unsigned int first_chunk_bytes = 17;
409 char text[] = "\xFF\xFE" /* BOM */
410 "<\000e\000/\000>\000" /* document element */
411 "\r\000\n\000\r\000\n\000"; /* epilog */
412
413 if (first_chunk_bytes >= sizeof(text) - 1)
414 fail("bad value of first_chunk_bytes");
415 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)first_chunk_bytes, XML_FALSE)
416 == XML_STATUS_ERROR)
417 xml_failure(g_parser);
418 else {
419 enum XML_Status rc;
420 rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
421 (int)(sizeof(text) - first_chunk_bytes - 1),
422 XML_TRUE);
423 if (rc == XML_STATUS_ERROR)
424 xml_failure(g_parser);
425 }
426 }
427 END_TEST
428
429 /* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)430 START_TEST(test_not_utf16) {
431 const char *text = "<?xml version='1.0' encoding='utf-16'?>"
432 "<doc>Hi</doc>";
433
434 /* Use a handler to provoke the appropriate code paths */
435 XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
436 expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
437 "UTF-16 declared in UTF-8 not faulted");
438 }
439 END_TEST
440
441 /* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)442 START_TEST(test_bad_encoding) {
443 const char *text = "<doc>Hi</doc>";
444
445 if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
446 fail("XML_SetEncoding failed");
447 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
448 "Unknown encoding not faulted");
449 }
450 END_TEST
451
452 /* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)453 START_TEST(test_latin1_umlauts) {
454 const char *text
455 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
456 "<e a='\xE4 \xF6 \xFC ä ö ü ä ö ü >'\n"
457 " >\xE4 \xF6 \xFC ä ö ü ä ö ü ></e>";
458 #ifdef XML_UNICODE
459 /* Expected results in UTF-16 */
460 const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
461 XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
462 #else
463 /* Expected results in UTF-8 */
464 const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
465 XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
466 #endif
467
468 run_character_check(text, expected);
469 XML_ParserReset(g_parser, NULL);
470 run_attribute_check(text, expected);
471 /* Repeat with a default handler */
472 XML_ParserReset(g_parser, NULL);
473 XML_SetDefaultHandler(g_parser, dummy_default_handler);
474 run_character_check(text, expected);
475 XML_ParserReset(g_parser, NULL);
476 XML_SetDefaultHandler(g_parser, dummy_default_handler);
477 run_attribute_check(text, expected);
478 }
479 END_TEST
480
481 /* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)482 START_TEST(test_long_utf8_character) {
483 const char *text
484 = "<?xml version='1.0' encoding='utf-8'?>\n"
485 /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
486 "<do\xf0\x90\x80\x80/>";
487 expect_failure(text, XML_ERROR_INVALID_TOKEN,
488 "4-byte UTF-8 character in element name not faulted");
489 }
490 END_TEST
491
492 /* Test that a long latin-1 attribute (too long to convert in one go)
493 * is correctly converted
494 */
START_TEST(test_long_latin1_attribute)495 START_TEST(test_long_latin1_attribute) {
496 const char *text
497 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
498 "<doc att='"
499 /* 64 characters per line */
500 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
512 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
513 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
514 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
515 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
516 /* Last character splits across a buffer boundary */
517 "\xe4'>\n</doc>";
518
519 const XML_Char *expected =
520 /* 64 characters per line */
521 /* clang-format off */
522 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
523 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
524 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
525 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
526 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
527 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
528 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
529 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
530 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
531 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
532 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
533 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
534 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
535 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
536 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
537 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
538 /* clang-format on */
539 #ifdef XML_UNICODE
540 XCS("\x00e4");
541 #else
542 XCS("\xc3\xa4");
543 #endif
544
545 run_attribute_check(text, expected);
546 }
547 END_TEST
548
549 /* Test that a long ASCII attribute (too long to convert in one go)
550 * is correctly converted
551 */
START_TEST(test_long_ascii_attribute)552 START_TEST(test_long_ascii_attribute) {
553 const char *text
554 = "<?xml version='1.0' encoding='us-ascii'?>\n"
555 "<doc att='"
556 /* 64 characters per line */
557 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
558 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
559 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
560 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
561 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
562 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
563 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
564 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
565 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
566 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
567 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
568 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
569 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
570 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
571 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
572 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
573 "01234'>\n</doc>";
574 const XML_Char *expected =
575 /* 64 characters per line */
576 /* clang-format off */
577 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
578 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
579 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
580 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
581 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
582 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
583 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
584 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
585 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
586 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
587 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
588 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
589 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
590 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
591 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
592 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
593 XCS("01234");
594 /* clang-format on */
595
596 run_attribute_check(text, expected);
597 }
598 END_TEST
599
600 /* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)601 START_TEST(test_line_number_after_parse) {
602 const char *text = "<tag>\n"
603 "\n"
604 "\n</tag>";
605 XML_Size lineno;
606
607 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
608 == XML_STATUS_ERROR)
609 xml_failure(g_parser);
610 lineno = XML_GetCurrentLineNumber(g_parser);
611 if (lineno != 4) {
612 char buffer[100];
613 snprintf(buffer, sizeof(buffer),
614 "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
615 fail(buffer);
616 }
617 }
618 END_TEST
619
620 /* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)621 START_TEST(test_column_number_after_parse) {
622 const char *text = "<tag></tag>";
623 XML_Size colno;
624
625 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
626 == XML_STATUS_ERROR)
627 xml_failure(g_parser);
628 colno = XML_GetCurrentColumnNumber(g_parser);
629 if (colno != 11) {
630 char buffer[100];
631 snprintf(buffer, sizeof(buffer),
632 "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
633 fail(buffer);
634 }
635 }
636 END_TEST
637
638 /* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)639 START_TEST(test_line_and_column_numbers_inside_handlers) {
640 const char *text = "<a>\n" /* Unix end-of-line */
641 " <b>\r\n" /* Windows end-of-line */
642 " <c/>\r" /* Mac OS end-of-line */
643 " </b>\n"
644 " <d>\n"
645 " <f/>\n"
646 " </d>\n"
647 "</a>";
648 const StructDataEntry expected[]
649 = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
650 {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
651 {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG},
652 {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
653 {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}};
654 const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
655 StructData storage;
656
657 StructData_Init(&storage);
658 XML_SetUserData(g_parser, &storage);
659 XML_SetStartElementHandler(g_parser, start_element_event_handler2);
660 XML_SetEndElementHandler(g_parser, end_element_event_handler2);
661 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
662 == XML_STATUS_ERROR)
663 xml_failure(g_parser);
664
665 StructData_CheckItems(&storage, expected, expected_count);
666 StructData_Dispose(&storage);
667 }
668 END_TEST
669
670 /* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)671 START_TEST(test_line_number_after_error) {
672 const char *text = "<a>\n"
673 " <b>\n"
674 " </a>"; /* missing </b> */
675 XML_Size lineno;
676 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
677 != XML_STATUS_ERROR)
678 fail("Expected a parse error");
679
680 lineno = XML_GetCurrentLineNumber(g_parser);
681 if (lineno != 3) {
682 char buffer[100];
683 snprintf(buffer, sizeof(buffer),
684 "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
685 fail(buffer);
686 }
687 }
688 END_TEST
689
690 /* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)691 START_TEST(test_column_number_after_error) {
692 const char *text = "<a>\n"
693 " <b>\n"
694 " </a>"; /* missing </b> */
695 XML_Size colno;
696 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
697 != XML_STATUS_ERROR)
698 fail("Expected a parse error");
699
700 colno = XML_GetCurrentColumnNumber(g_parser);
701 if (colno != 4) {
702 char buffer[100];
703 snprintf(buffer, sizeof(buffer),
704 "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
705 fail(buffer);
706 }
707 }
708 END_TEST
709
710 /* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)711 START_TEST(test_really_long_lines) {
712 /* This parses an input line longer than INIT_DATA_BUF_SIZE
713 characters long (defined to be 1024 in xmlparse.c). We take a
714 really cheesy approach to building the input buffer, because
715 this avoids writing bugs in buffer-filling code.
716 */
717 const char *text
718 = "<e>"
719 /* 64 chars */
720 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
721 /* until we have at least 1024 characters on the line: */
722 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
723 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
724 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
725 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
726 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
727 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
728 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
729 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
730 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
731 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
732 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
733 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
734 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
735 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
736 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
737 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
738 "</e>";
739 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
740 == XML_STATUS_ERROR)
741 xml_failure(g_parser);
742 }
743 END_TEST
744
745 /* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)746 START_TEST(test_really_long_encoded_lines) {
747 /* As above, except that we want to provoke an output buffer
748 * overflow with a non-trivial encoding. For this we need to pass
749 * the whole cdata in one go, not byte-by-byte.
750 */
751 void *buffer;
752 const char *text
753 = "<?xml version='1.0' encoding='iso-8859-1'?>"
754 "<e>"
755 /* 64 chars */
756 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
757 /* until we have at least 1024 characters on the line: */
758 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
765 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
766 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
767 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
768 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
769 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
770 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
771 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
772 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
773 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
774 "</e>";
775 int parse_len = (int)strlen(text);
776
777 /* Need a cdata handler to provoke the code path we want to test */
778 XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
779 buffer = XML_GetBuffer(g_parser, parse_len);
780 if (buffer == NULL)
781 fail("Could not allocate parse buffer");
782 assert(buffer != NULL);
783 memcpy(buffer, text, parse_len);
784 if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
785 xml_failure(g_parser);
786 }
787 END_TEST
788
789 /*
790 * Element event tests.
791 */
792
START_TEST(test_end_element_events)793 START_TEST(test_end_element_events) {
794 const char *text = "<a><b><c/></b><d><f/></d></a>";
795 const XML_Char *expected = XCS("/c/b/f/d/a");
796 CharData storage;
797
798 CharData_Init(&storage);
799 XML_SetUserData(g_parser, &storage);
800 XML_SetEndElementHandler(g_parser, end_element_event_handler);
801 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
802 == XML_STATUS_ERROR)
803 xml_failure(g_parser);
804 CharData_CheckXMLChars(&storage, expected);
805 }
806 END_TEST
807
808 /*
809 * Attribute tests.
810 */
811
812 /* Helper used by the following tests; this checks any "attr" and "refs"
813 attributes to make sure whitespace has been normalized.
814
815 Return true if whitespace has been normalized in a string, using
816 the rules for attribute value normalization. The 'is_cdata' flag
817 is needed since CDATA attributes don't need to have multiple
818 whitespace characters collapsed to a single space, while other
819 attribute data types do. (Section 3.3.3 of the recommendation.)
820 */
821 static int
is_whitespace_normalized(const XML_Char * s,int is_cdata)822 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
823 int blanks = 0;
824 int at_start = 1;
825 while (*s) {
826 if (*s == XCS(' '))
827 ++blanks;
828 else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
829 return 0;
830 else {
831 if (at_start) {
832 at_start = 0;
833 if (blanks && ! is_cdata)
834 /* illegal leading blanks */
835 return 0;
836 } else if (blanks > 1 && ! is_cdata)
837 return 0;
838 blanks = 0;
839 }
840 ++s;
841 }
842 if (blanks && ! is_cdata)
843 return 0;
844 return 1;
845 }
846
847 /* Check the attribute whitespace checker: */
START_TEST(test_helper_is_whitespace_normalized)848 START_TEST(test_helper_is_whitespace_normalized) {
849 assert(is_whitespace_normalized(XCS("abc"), 0));
850 assert(is_whitespace_normalized(XCS("abc"), 1));
851 assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
852 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
853 assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
854 assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
855 assert(! is_whitespace_normalized(XCS("abc def ghi"), 0));
856 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
857 assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
858 assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
859 assert(! is_whitespace_normalized(XCS(" "), 0));
860 assert(is_whitespace_normalized(XCS(" "), 1));
861 assert(! is_whitespace_normalized(XCS("\t"), 0));
862 assert(! is_whitespace_normalized(XCS("\t"), 1));
863 assert(! is_whitespace_normalized(XCS("\n"), 0));
864 assert(! is_whitespace_normalized(XCS("\n"), 1));
865 assert(! is_whitespace_normalized(XCS("\r"), 0));
866 assert(! is_whitespace_normalized(XCS("\r"), 1));
867 assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
868 }
869 END_TEST
870
871 static void XMLCALL
check_attr_contains_normalized_whitespace(void * userData,const XML_Char * name,const XML_Char ** atts)872 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
873 const XML_Char **atts) {
874 int i;
875 UNUSED_P(userData);
876 UNUSED_P(name);
877 for (i = 0; atts[i] != NULL; i += 2) {
878 const XML_Char *attrname = atts[i];
879 const XML_Char *value = atts[i + 1];
880 if (xcstrcmp(XCS("attr"), attrname) == 0
881 || xcstrcmp(XCS("ents"), attrname) == 0
882 || xcstrcmp(XCS("refs"), attrname) == 0) {
883 if (! is_whitespace_normalized(value, 0)) {
884 char buffer[256];
885 snprintf(buffer, sizeof(buffer),
886 "attribute value not normalized: %" XML_FMT_STR
887 "='%" XML_FMT_STR "'",
888 attrname, value);
889 fail(buffer);
890 }
891 }
892 }
893 }
894
START_TEST(test_attr_whitespace_normalization)895 START_TEST(test_attr_whitespace_normalization) {
896 const char *text
897 = "<!DOCTYPE doc [\n"
898 " <!ATTLIST doc\n"
899 " attr NMTOKENS #REQUIRED\n"
900 " ents ENTITIES #REQUIRED\n"
901 " refs IDREFS #REQUIRED>\n"
902 "]>\n"
903 "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n"
904 " ents=' ent-1 \t\r\n"
905 " ent-2 ' >\n"
906 " <e id='id-1'/>\n"
907 " <e id='id-2'/>\n"
908 "</doc>";
909
910 XML_SetStartElementHandler(g_parser,
911 check_attr_contains_normalized_whitespace);
912 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
913 == XML_STATUS_ERROR)
914 xml_failure(g_parser);
915 }
916 END_TEST
917
918 /*
919 * XML declaration tests.
920 */
921
START_TEST(test_xmldecl_misplaced)922 START_TEST(test_xmldecl_misplaced) {
923 expect_failure("\n"
924 "<?xml version='1.0'?>\n"
925 "<a/>",
926 XML_ERROR_MISPLACED_XML_PI,
927 "failed to report misplaced XML declaration");
928 }
929 END_TEST
930
START_TEST(test_xmldecl_invalid)931 START_TEST(test_xmldecl_invalid) {
932 expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
933 "Failed to report invalid XML declaration");
934 }
935 END_TEST
936
START_TEST(test_xmldecl_missing_attr)937 START_TEST(test_xmldecl_missing_attr) {
938 expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
939 "Failed to report missing XML declaration attribute");
940 }
941 END_TEST
942
START_TEST(test_xmldecl_missing_value)943 START_TEST(test_xmldecl_missing_value) {
944 expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
945 "<doc/>",
946 XML_ERROR_XML_DECL,
947 "Failed to report missing attribute value");
948 }
949 END_TEST
950
951 /* Regression test for SF bug #584832. */
START_TEST(test_unknown_encoding_internal_entity)952 START_TEST(test_unknown_encoding_internal_entity) {
953 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
954 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
955 "<test a='&foo;'/>";
956
957 XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
958 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
959 == XML_STATUS_ERROR)
960 xml_failure(g_parser);
961 }
962 END_TEST
963
964 /* Test unrecognised encoding handler */
START_TEST(test_unrecognised_encoding_internal_entity)965 START_TEST(test_unrecognised_encoding_internal_entity) {
966 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
967 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
968 "<test a='&foo;'/>";
969
970 XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
971 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
972 != XML_STATUS_ERROR)
973 fail("Unrecognised encoding not rejected");
974 }
975 END_TEST
976
977 /* Regression test for SF bug #620106. */
START_TEST(test_ext_entity_set_encoding)978 START_TEST(test_ext_entity_set_encoding) {
979 const char *text = "<!DOCTYPE doc [\n"
980 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
981 "]>\n"
982 "<doc>&en;</doc>";
983 ExtTest test_data
984 = {/* This text says it's an unsupported encoding, but it's really
985 UTF-8, which we tell Expat using XML_SetEncoding().
986 */
987 "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
988 #ifdef XML_UNICODE
989 const XML_Char *expected = XCS("\x00e9");
990 #else
991 const XML_Char *expected = XCS("\xc3\xa9");
992 #endif
993
994 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
995 run_ext_character_check(text, &test_data, expected);
996 }
997 END_TEST
998
999 /* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)1000 START_TEST(test_ext_entity_no_handler) {
1001 const char *text = "<!DOCTYPE doc [\n"
1002 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1003 "]>\n"
1004 "<doc>&en;</doc>";
1005
1006 XML_SetDefaultHandler(g_parser, dummy_default_handler);
1007 run_character_check(text, XCS(""));
1008 }
1009 END_TEST
1010
1011 /* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)1012 START_TEST(test_ext_entity_set_bom) {
1013 const char *text = "<!DOCTYPE doc [\n"
1014 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1015 "]>\n"
1016 "<doc>&en;</doc>";
1017 ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1018 "<?xml encoding='iso-8859-3'?>"
1019 "\xC3\xA9",
1020 XCS("utf-8"), NULL};
1021 #ifdef XML_UNICODE
1022 const XML_Char *expected = XCS("\x00e9");
1023 #else
1024 const XML_Char *expected = XCS("\xc3\xa9");
1025 #endif
1026
1027 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1028 run_ext_character_check(text, &test_data, expected);
1029 }
1030 END_TEST
1031
1032 /* Test that bad encodings are faulted */
START_TEST(test_ext_entity_bad_encoding)1033 START_TEST(test_ext_entity_bad_encoding) {
1034 const char *text = "<!DOCTYPE doc [\n"
1035 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1036 "]>\n"
1037 "<doc>&en;</doc>";
1038 ExtFaults fault
1039 = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1040 XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1041
1042 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1043 XML_SetUserData(g_parser, &fault);
1044 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1045 "Bad encoding should not have been accepted");
1046 }
1047 END_TEST
1048
1049 /* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)1050 START_TEST(test_ext_entity_bad_encoding_2) {
1051 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1052 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1053 "<doc>&entity;</doc>";
1054 ExtFaults fault
1055 = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1056 XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1057
1058 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1059 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1060 XML_SetUserData(g_parser, &fault);
1061 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1062 "Bad encoding not faulted in external entity handler");
1063 }
1064 END_TEST
1065
1066 /* Test that no error is reported for unknown entities if we don't
1067 read an external subset. This was fixed in Expat 1.95.5.
1068 */
START_TEST(test_wfc_undeclared_entity_unread_external_subset)1069 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1070 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1071 "<doc>&entity;</doc>";
1072
1073 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1074 == XML_STATUS_ERROR)
1075 xml_failure(g_parser);
1076 }
1077 END_TEST
1078
1079 /* Test that an error is reported for unknown entities if we don't
1080 have an external subset.
1081 */
START_TEST(test_wfc_undeclared_entity_no_external_subset)1082 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1083 expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1084 "Parser did not report undefined entity w/out a DTD.");
1085 }
1086 END_TEST
1087
1088 /* Test that an error is reported for unknown entities if we don't
1089 read an external subset, but have been declared standalone.
1090 */
START_TEST(test_wfc_undeclared_entity_standalone)1091 START_TEST(test_wfc_undeclared_entity_standalone) {
1092 const char *text
1093 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1094 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1095 "<doc>&entity;</doc>";
1096
1097 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1098 "Parser did not report undefined entity (standalone).");
1099 }
1100 END_TEST
1101
1102 /* Test that an error is reported for unknown entities if we have read
1103 an external subset, and standalone is true.
1104 */
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone)1105 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1106 const char *text
1107 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1108 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1109 "<doc>&entity;</doc>";
1110 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1111
1112 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1113 XML_SetUserData(g_parser, &test_data);
1114 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1115 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1116 "Parser did not report undefined entity (external DTD).");
1117 }
1118 END_TEST
1119
1120 /* Test that external entity handling is not done if the parsing flag
1121 * is set to UNLESS_STANDALONE
1122 */
START_TEST(test_entity_with_external_subset_unless_standalone)1123 START_TEST(test_entity_with_external_subset_unless_standalone) {
1124 const char *text
1125 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1126 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1127 "<doc>&entity;</doc>";
1128 ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1129
1130 XML_SetParamEntityParsing(g_parser,
1131 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1132 XML_SetUserData(g_parser, &test_data);
1133 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1134 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1135 "Parser did not report undefined entity");
1136 }
1137 END_TEST
1138
1139 /* Test that no error is reported for unknown entities if we have read
1140 an external subset, and standalone is false.
1141 */
START_TEST(test_wfc_undeclared_entity_with_external_subset)1142 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1143 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1144 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1145 "<doc>&entity;</doc>";
1146 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1147
1148 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1149 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1150 run_ext_character_check(text, &test_data, XCS(""));
1151 }
1152 END_TEST
1153
1154 /* Test that an error is reported if our NotStandalone handler fails */
START_TEST(test_not_standalone_handler_reject)1155 START_TEST(test_not_standalone_handler_reject) {
1156 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1157 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1158 "<doc>&entity;</doc>";
1159 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1160
1161 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1162 XML_SetUserData(g_parser, &test_data);
1163 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1164 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1165 expect_failure(text, XML_ERROR_NOT_STANDALONE,
1166 "NotStandalone handler failed to reject");
1167
1168 /* Try again but without external entity handling */
1169 XML_ParserReset(g_parser, NULL);
1170 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1171 expect_failure(text, XML_ERROR_NOT_STANDALONE,
1172 "NotStandalone handler failed to reject");
1173 }
1174 END_TEST
1175
1176 /* Test that no error is reported if our NotStandalone handler succeeds */
START_TEST(test_not_standalone_handler_accept)1177 START_TEST(test_not_standalone_handler_accept) {
1178 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1179 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1180 "<doc>&entity;</doc>";
1181 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1182
1183 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1184 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1185 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1186 run_ext_character_check(text, &test_data, XCS(""));
1187
1188 /* Repeat without the external entity handler */
1189 XML_ParserReset(g_parser, NULL);
1190 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1191 run_character_check(text, XCS(""));
1192 }
1193 END_TEST
1194
START_TEST(test_entity_start_tag_level_greater_than_one)1195 START_TEST(test_entity_start_tag_level_greater_than_one) {
1196 const char *const text = "<!DOCTYPE t1 [\n"
1197 " <!ENTITY e1 'hello'>\n"
1198 "]>\n"
1199 "<t1>\n"
1200 " <t2>&e1;</t2>\n"
1201 "</t1>\n";
1202
1203 XML_Parser parser = XML_ParserCreate(NULL);
1204 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
1205 /*isFinal*/ XML_TRUE)
1206 == XML_STATUS_OK);
1207 XML_ParserFree(parser);
1208 }
1209 END_TEST
1210
START_TEST(test_wfc_no_recursive_entity_refs)1211 START_TEST(test_wfc_no_recursive_entity_refs) {
1212 const char *text = "<!DOCTYPE doc [\n"
1213 " <!ENTITY entity '&entity;'>\n"
1214 "]>\n"
1215 "<doc>&entity;</doc>";
1216
1217 expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1218 "Parser did not report recursive entity reference.");
1219 }
1220 END_TEST
1221
START_TEST(test_no_indirectly_recursive_entity_refs)1222 START_TEST(test_no_indirectly_recursive_entity_refs) {
1223 struct TestCase {
1224 const char *doc;
1225 bool usesParameterEntities;
1226 };
1227
1228 const struct TestCase cases[] = {
1229 // general entity + character data
1230 {"<!DOCTYPE a [\n"
1231 " <!ENTITY e1 '&e2;'>\n"
1232 " <!ENTITY e2 '&e1;'>\n"
1233 "]><a>&e2;</a>\n",
1234 false},
1235
1236 // general entity + attribute value
1237 {"<!DOCTYPE a [\n"
1238 " <!ENTITY e1 '&e2;'>\n"
1239 " <!ENTITY e2 '&e1;'>\n"
1240 "]><a k1='&e2;' />\n",
1241 false},
1242
1243 // parameter entity
1244 {"<!DOCTYPE doc [\n"
1245 " <!ENTITY % p1 '%p2;'>\n"
1246 " <!ENTITY % p2 '%p1;'>\n"
1247 " <!ENTITY % define_g \"<!ENTITY g '%p2;'>\">\n"
1248 " %define_g;\n"
1249 "]>\n"
1250 "<doc/>\n",
1251 true},
1252 };
1253 const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE};
1254
1255 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1256 for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]);
1257 j++) {
1258 const XML_Bool reset_wanted = reset_or_not[j];
1259 const char *const doc = cases[i].doc;
1260 const bool usesParameterEntities = cases[i].usesParameterEntities;
1261
1262 set_subtest("[%i,reset=%i] %s", (int)i, (int)j, doc);
1263
1264 #ifdef XML_DTD // both GE and DTD
1265 const bool rejection_expected = true;
1266 #elif XML_GE == 1 // GE but not DTD
1267 const bool rejection_expected = ! usesParameterEntities;
1268 #else // neither DTD nor GE
1269 const bool rejection_expected = false;
1270 #endif
1271
1272 XML_Parser parser = XML_ParserCreate(NULL);
1273
1274 #ifdef XML_DTD
1275 if (usesParameterEntities) {
1276 assert_true(
1277 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS)
1278 == 1);
1279 }
1280 #else
1281 UNUSED_P(usesParameterEntities);
1282 #endif // XML_DTD
1283
1284 const enum XML_Status status
1285 = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
1286 /*isFinal*/ XML_TRUE);
1287
1288 if (rejection_expected) {
1289 assert_true(status == XML_STATUS_ERROR);
1290 assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF);
1291 } else {
1292 assert_true(status == XML_STATUS_OK);
1293 }
1294
1295 if (reset_wanted) {
1296 // This covers free'ing of (eventually) all three open entity lists by
1297 // XML_ParserReset.
1298 XML_ParserReset(parser, NULL);
1299 }
1300
1301 // This covers free'ing of (eventually) all three open entity lists by
1302 // XML_ParserFree (unless XML_ParserReset has already done that above).
1303 XML_ParserFree(parser);
1304 }
1305 }
1306 }
1307 END_TEST
1308
START_TEST(test_recursive_external_parameter_entity_2)1309 START_TEST(test_recursive_external_parameter_entity_2) {
1310 struct TestCase {
1311 const char *doc;
1312 enum XML_Status expectedStatus;
1313 };
1314
1315 struct TestCase cases[] = {
1316 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1317 {"<!ENTITY % p1 '%p1;'>"
1318 "<!ENTITY % p1 'first declaration wins'>",
1319 XML_STATUS_ERROR},
1320 {"<!ENTITY % p1 'first declaration wins'>"
1321 "<!ENTITY % p1 '%p1;'>",
1322 XML_STATUS_OK},
1323 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_OK},
1324 };
1325
1326 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1327 const char *const doc = cases[i].doc;
1328 const enum XML_Status expectedStatus = cases[i].expectedStatus;
1329 set_subtest("%s", doc);
1330
1331 XML_Parser parser = XML_ParserCreate(NULL);
1332 assert_true(parser != NULL);
1333
1334 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1335 assert_true(ext_parser != NULL);
1336
1337 const enum XML_Status actualStatus
1338 = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1339
1340 assert_true(actualStatus == expectedStatus);
1341 if (actualStatus != XML_STATUS_OK) {
1342 assert_true(XML_GetErrorCode(ext_parser)
1343 == XML_ERROR_RECURSIVE_ENTITY_REF);
1344 }
1345
1346 XML_ParserFree(ext_parser);
1347 XML_ParserFree(parser);
1348 }
1349 }
1350 END_TEST
1351
1352 /* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)1353 START_TEST(test_ext_entity_invalid_parse) {
1354 const char *text = "<!DOCTYPE doc [\n"
1355 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1356 "]>\n"
1357 "<doc>&en;</doc>";
1358 const ExtFaults faults[]
1359 = {{"<", "Incomplete element declaration not faulted", NULL,
1360 XML_ERROR_UNCLOSED_TOKEN},
1361 {"<\xe2\x82", /* First two bytes of a three-byte char */
1362 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1363 {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1364 XML_ERROR_PARTIAL_CHAR},
1365 {NULL, NULL, NULL, XML_ERROR_NONE}};
1366 const ExtFaults *fault = faults;
1367
1368 for (; fault->parse_text != NULL; fault++) {
1369 set_subtest("\"%s\"", fault->parse_text);
1370 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1371 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1372 XML_SetUserData(g_parser, (void *)fault);
1373 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1374 "Parser did not report external entity error");
1375 XML_ParserReset(g_parser, NULL);
1376 }
1377 }
1378 END_TEST
1379
1380 /* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)1381 START_TEST(test_dtd_default_handling) {
1382 const char *text = "<!DOCTYPE doc [\n"
1383 "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1384 "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1385 "<!ELEMENT doc EMPTY>\n"
1386 "<!ATTLIST doc a CDATA #IMPLIED>\n"
1387 "<?pi in dtd?>\n"
1388 "<!--comment in dtd-->\n"
1389 "]><doc/>";
1390
1391 XML_SetDefaultHandler(g_parser, accumulate_characters);
1392 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1393 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1394 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1395 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1396 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1397 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1398 XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1399 XML_SetCommentHandler(g_parser, dummy_comment_handler);
1400 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1401 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1402 run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1403 }
1404 END_TEST
1405
1406 /* Test handling of attribute declarations */
START_TEST(test_dtd_attr_handling)1407 START_TEST(test_dtd_attr_handling) {
1408 const char *prolog = "<!DOCTYPE doc [\n"
1409 "<!ELEMENT doc EMPTY>\n";
1410 AttTest attr_data[]
1411 = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1412 "]>"
1413 "<doc a='two'/>",
1414 XCS("doc"), XCS("a"),
1415 XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1416 NULL, XML_TRUE},
1417 {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1418 "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1419 "]>"
1420 "<doc/>",
1421 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1422 {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1423 "]>"
1424 "<doc/>",
1425 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1426 {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1427 "]>"
1428 "<doc/>",
1429 XCS("doc"), XCS("a"), XCS("CDATA"),
1430 #ifdef XML_UNICODE
1431 XCS("\x06f2"),
1432 #else
1433 XCS("\xdb\xb2"),
1434 #endif
1435 XML_FALSE},
1436 {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1437 AttTest *test;
1438
1439 for (test = attr_data; test->definition != NULL; test++) {
1440 set_subtest("%s", test->definition);
1441 XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1442 XML_SetUserData(g_parser, test);
1443 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1444 XML_FALSE)
1445 == XML_STATUS_ERROR)
1446 xml_failure(g_parser);
1447 if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1448 (int)strlen(test->definition), XML_TRUE)
1449 == XML_STATUS_ERROR)
1450 xml_failure(g_parser);
1451 XML_ParserReset(g_parser, NULL);
1452 }
1453 }
1454 END_TEST
1455
1456 /* See related SF bug #673791.
1457 When namespace processing is enabled, setting the namespace URI for
1458 a prefix is not allowed; this test ensures that it *is* allowed
1459 when namespace processing is not enabled.
1460 (See Namespaces in XML, section 2.)
1461 */
START_TEST(test_empty_ns_without_namespaces)1462 START_TEST(test_empty_ns_without_namespaces) {
1463 const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1464 " <e xmlns:prefix=''/>\n"
1465 "</doc>";
1466
1467 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1468 == XML_STATUS_ERROR)
1469 xml_failure(g_parser);
1470 }
1471 END_TEST
1472
1473 /* Regression test for SF bug #824420.
1474 Checks that an xmlns:prefix attribute set in an attribute's default
1475 value isn't misinterpreted.
1476 */
START_TEST(test_ns_in_attribute_default_without_namespaces)1477 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1478 const char *text = "<!DOCTYPE e:element [\n"
1479 " <!ATTLIST e:element\n"
1480 " xmlns:e CDATA 'http://example.org/'>\n"
1481 " ]>\n"
1482 "<e:element/>";
1483
1484 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1485 == XML_STATUS_ERROR)
1486 xml_failure(g_parser);
1487 }
1488 END_TEST
1489
1490 /* Regression test for SF bug #1515266: missing check of stopped
1491 parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)1492 START_TEST(test_stop_parser_between_char_data_calls) {
1493 /* The sample data must be big enough that there are two calls to
1494 the character data handler from within the inner "for" loop of
1495 the XML_TOK_DATA_CHARS case in doContent(), and the character
1496 handler must stop the parser and clear the character data
1497 handler.
1498 */
1499 const char *text = long_character_data_text;
1500
1501 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1502 g_resumable = XML_FALSE;
1503 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1504 != XML_STATUS_ERROR)
1505 xml_failure(g_parser);
1506 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1507 xml_failure(g_parser);
1508 }
1509 END_TEST
1510
1511 /* Regression test for SF bug #1515266: missing check of stopped
1512 parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)1513 START_TEST(test_suspend_parser_between_char_data_calls) {
1514 /* The sample data must be big enough that there are two calls to
1515 the character data handler from within the inner "for" loop of
1516 the XML_TOK_DATA_CHARS case in doContent(), and the character
1517 handler must stop the parser and clear the character data
1518 handler.
1519 */
1520 const char *text = long_character_data_text;
1521
1522 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1523 g_resumable = XML_TRUE;
1524 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1525 // we won't know exactly how much input we actually managed to give Expat.
1526 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
1527 != XML_STATUS_SUSPENDED)
1528 xml_failure(g_parser);
1529 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1530 xml_failure(g_parser);
1531 /* Try parsing directly */
1532 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1533 != XML_STATUS_ERROR)
1534 fail("Attempt to continue parse while suspended not faulted");
1535 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1536 fail("Suspended parse not faulted with correct error");
1537 }
1538 END_TEST
1539
1540 /* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)1541 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1542 const char *text = long_character_data_text;
1543
1544 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1545 g_resumable = XML_FALSE;
1546 g_abortable = XML_FALSE;
1547 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1548 != XML_STATUS_ERROR)
1549 fail("Failed to double-stop parser");
1550
1551 XML_ParserReset(g_parser, NULL);
1552 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1553 g_resumable = XML_TRUE;
1554 g_abortable = XML_FALSE;
1555 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1556 // we won't know exactly how much input we actually managed to give Expat.
1557 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
1558 != XML_STATUS_SUSPENDED)
1559 fail("Failed to double-suspend parser");
1560
1561 XML_ParserReset(g_parser, NULL);
1562 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1563 g_resumable = XML_TRUE;
1564 g_abortable = XML_TRUE;
1565 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1566 != XML_STATUS_ERROR)
1567 fail("Failed to suspend-abort parser");
1568 }
1569 END_TEST
1570
START_TEST(test_good_cdata_ascii)1571 START_TEST(test_good_cdata_ascii) {
1572 const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1573 const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1574
1575 CharData storage;
1576 CharData_Init(&storage);
1577 XML_SetUserData(g_parser, &storage);
1578 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1579 /* Add start and end handlers for coverage */
1580 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1581 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1582
1583 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1584 == XML_STATUS_ERROR)
1585 xml_failure(g_parser);
1586 CharData_CheckXMLChars(&storage, expected);
1587
1588 /* Try again, this time with a default handler */
1589 XML_ParserReset(g_parser, NULL);
1590 CharData_Init(&storage);
1591 XML_SetUserData(g_parser, &storage);
1592 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1593 XML_SetDefaultHandler(g_parser, dummy_default_handler);
1594
1595 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1596 == XML_STATUS_ERROR)
1597 xml_failure(g_parser);
1598 CharData_CheckXMLChars(&storage, expected);
1599 }
1600 END_TEST
1601
START_TEST(test_good_cdata_utf16)1602 START_TEST(test_good_cdata_utf16) {
1603 /* Test data is:
1604 * <?xml version='1.0' encoding='utf-16'?>
1605 * <a><![CDATA[hello]]></a>
1606 */
1607 const char text[]
1608 = "\0<\0?\0x\0m\0l\0"
1609 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1610 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1611 "1\0"
1612 "6\0'"
1613 "\0?\0>\0\n"
1614 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1615 const XML_Char *expected = XCS("hello");
1616
1617 CharData storage;
1618 CharData_Init(&storage);
1619 XML_SetUserData(g_parser, &storage);
1620 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1621
1622 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1623 == XML_STATUS_ERROR)
1624 xml_failure(g_parser);
1625 CharData_CheckXMLChars(&storage, expected);
1626 }
1627 END_TEST
1628
START_TEST(test_good_cdata_utf16_le)1629 START_TEST(test_good_cdata_utf16_le) {
1630 /* Test data is:
1631 * <?xml version='1.0' encoding='utf-16'?>
1632 * <a><![CDATA[hello]]></a>
1633 */
1634 const char text[]
1635 = "<\0?\0x\0m\0l\0"
1636 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1637 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1638 "1\0"
1639 "6\0'"
1640 "\0?\0>\0\n"
1641 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1642 const XML_Char *expected = XCS("hello");
1643
1644 CharData storage;
1645 CharData_Init(&storage);
1646 XML_SetUserData(g_parser, &storage);
1647 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1648
1649 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1650 == XML_STATUS_ERROR)
1651 xml_failure(g_parser);
1652 CharData_CheckXMLChars(&storage, expected);
1653 }
1654 END_TEST
1655
1656 /* Test UTF16 conversion of a long cdata string */
1657
1658 /* 16 characters: handy macro to reduce visual clutter */
1659 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1660
START_TEST(test_long_cdata_utf16)1661 START_TEST(test_long_cdata_utf16) {
1662 /* Test data is:
1663 * <?xlm version='1.0' encoding='utf-16'?>
1664 * <a><![CDATA[
1665 * ABCDEFGHIJKLMNOP
1666 * ]]></a>
1667 */
1668 const char text[]
1669 = "\0<\0?\0x\0m\0l\0 "
1670 "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1671 "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1672 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1673 /* 64 characters per line */
1674 /* clang-format off */
1675 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1676 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1677 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1678 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1679 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1680 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1681 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1682 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1683 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1684 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1685 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1686 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1687 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1688 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1689 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1690 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1691 A_TO_P_IN_UTF16
1692 /* clang-format on */
1693 "\0]\0]\0>\0<\0/\0a\0>";
1694 const XML_Char *expected =
1695 /* clang-format off */
1696 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1697 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1698 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1699 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1700 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1701 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1702 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1703 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1704 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1705 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1706 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1707 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1708 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1709 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1710 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1711 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1712 XCS("ABCDEFGHIJKLMNOP");
1713 /* clang-format on */
1714 CharData storage;
1715 void *buffer;
1716
1717 CharData_Init(&storage);
1718 XML_SetUserData(g_parser, &storage);
1719 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1720 buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1721 if (buffer == NULL)
1722 fail("Could not allocate parse buffer");
1723 assert(buffer != NULL);
1724 memcpy(buffer, text, sizeof(text) - 1);
1725 if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1726 xml_failure(g_parser);
1727 CharData_CheckXMLChars(&storage, expected);
1728 }
1729 END_TEST
1730
1731 /* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)1732 START_TEST(test_multichar_cdata_utf16) {
1733 /* Test data is:
1734 * <?xml version='1.0' encoding='utf-16'?>
1735 * <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1736 *
1737 * where {MINIM} is U+1d15e (a minim or half-note)
1738 * UTF-16: 0xd834 0xdd5e
1739 * UTF-8: 0xf0 0x9d 0x85 0x9e
1740 * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1741 * UTF-16: 0xd834 0xdd5f
1742 * UTF-8: 0xf0 0x9d 0x85 0x9f
1743 */
1744 const char text[] = "\0<\0?\0x\0m\0l\0"
1745 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1746 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1747 "1\0"
1748 "6\0'"
1749 "\0?\0>\0\n"
1750 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1751 "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1752 "\0]\0]\0>\0<\0/\0a\0>";
1753 #ifdef XML_UNICODE
1754 const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1755 #else
1756 const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1757 #endif
1758 CharData storage;
1759
1760 CharData_Init(&storage);
1761 XML_SetUserData(g_parser, &storage);
1762 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1763
1764 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1765 == XML_STATUS_ERROR)
1766 xml_failure(g_parser);
1767 CharData_CheckXMLChars(&storage, expected);
1768 }
1769 END_TEST
1770
1771 /* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)1772 START_TEST(test_utf16_bad_surrogate_pair) {
1773 /* Test data is:
1774 * <?xml version='1.0' encoding='utf-16'?>
1775 * <a><![CDATA[{BADLINB}]]></a>
1776 *
1777 * where {BADLINB} is U+10000 (the first Linear B character)
1778 * with the UTF-16 surrogate pair in the wrong order, i.e.
1779 * 0xdc00 0xd800
1780 */
1781 const char text[] = "\0<\0?\0x\0m\0l\0"
1782 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1783 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1784 "1\0"
1785 "6\0'"
1786 "\0?\0>\0\n"
1787 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1788 "\xdc\x00\xd8\x00"
1789 "\0]\0]\0>\0<\0/\0a\0>";
1790
1791 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1792 != XML_STATUS_ERROR)
1793 fail("Reversed UTF-16 surrogate pair not faulted");
1794 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1795 xml_failure(g_parser);
1796 }
1797 END_TEST
1798
START_TEST(test_bad_cdata)1799 START_TEST(test_bad_cdata) {
1800 struct CaseData {
1801 const char *text;
1802 enum XML_Error expectedError;
1803 };
1804
1805 struct CaseData cases[]
1806 = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1807 {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1808 {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1809 {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1810 {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1811 {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1812 {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1813 {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1814
1815 {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1816 {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1817 {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1818
1819 {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1820 {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1821 {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1822 {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1823 {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1824 {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1825 {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1826
1827 {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1828 {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1829 {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1830
1831 size_t i = 0;
1832 for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1833 set_subtest("%s", cases[i].text);
1834 const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1835 g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1836 const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1837
1838 assert(actualStatus == XML_STATUS_ERROR);
1839
1840 if (actualError != cases[i].expectedError) {
1841 char message[100];
1842 snprintf(message, sizeof(message),
1843 "Expected error %d but got error %d for case %u: \"%s\"\n",
1844 cases[i].expectedError, actualError, (unsigned int)i + 1,
1845 cases[i].text);
1846 fail(message);
1847 }
1848
1849 XML_ParserReset(g_parser, NULL);
1850 }
1851 }
1852 END_TEST
1853
1854 /* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)1855 START_TEST(test_bad_cdata_utf16) {
1856 struct CaseData {
1857 size_t text_bytes;
1858 const char *text;
1859 enum XML_Error expected_error;
1860 };
1861
1862 const char prolog[] = "\0<\0?\0x\0m\0l\0"
1863 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1864 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1865 "1\0"
1866 "6\0'"
1867 "\0?\0>\0\n"
1868 "\0<\0a\0>";
1869 struct CaseData cases[] = {
1870 {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1871 {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1872 {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1873 {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1874 {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1875 {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1876 {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1877 {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1878 {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1879 {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1880 {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1881 {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1882 {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1883 {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1884 {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1885 {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1886 {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1887 {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1888 {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1889 {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1890 /* Now add a four-byte UTF-16 character */
1891 {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1892 XML_ERROR_UNCLOSED_CDATA_SECTION},
1893 {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1894 {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1895 XML_ERROR_PARTIAL_CHAR},
1896 {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1897 XML_ERROR_UNCLOSED_CDATA_SECTION}};
1898 size_t i;
1899
1900 for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1901 set_subtest("case %lu", (long unsigned)(i + 1));
1902 enum XML_Status actual_status;
1903 enum XML_Error actual_error;
1904
1905 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1906 XML_FALSE)
1907 == XML_STATUS_ERROR)
1908 xml_failure(g_parser);
1909 actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1910 (int)cases[i].text_bytes, XML_TRUE);
1911 assert(actual_status == XML_STATUS_ERROR);
1912 actual_error = XML_GetErrorCode(g_parser);
1913 if (actual_error != cases[i].expected_error) {
1914 char message[1024];
1915
1916 snprintf(message, sizeof(message),
1917 "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1918 ") for case %lu\n",
1919 cases[i].expected_error,
1920 XML_ErrorString(cases[i].expected_error), actual_error,
1921 XML_ErrorString(actual_error), (long unsigned)(i + 1));
1922 fail(message);
1923 }
1924 XML_ParserReset(g_parser, NULL);
1925 }
1926 }
1927 END_TEST
1928
1929 /* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)1930 START_TEST(test_stop_parser_between_cdata_calls) {
1931 const char *text = long_cdata_text;
1932
1933 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1934 g_resumable = XML_FALSE;
1935 expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1936 }
1937 END_TEST
1938
1939 /* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)1940 START_TEST(test_suspend_parser_between_cdata_calls) {
1941 if (g_chunkSize != 0) {
1942 // this test does not use SINGLE_BYTES, because of suspension
1943 return;
1944 }
1945
1946 const char *text = long_cdata_text;
1947 enum XML_Status result;
1948
1949 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1950 g_resumable = XML_TRUE;
1951 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1952 // we won't know exactly how much input we actually managed to give Expat.
1953 result = XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE);
1954 if (result != XML_STATUS_SUSPENDED) {
1955 if (result == XML_STATUS_ERROR)
1956 xml_failure(g_parser);
1957 fail("Parse not suspended in CDATA handler");
1958 }
1959 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1960 xml_failure(g_parser);
1961 }
1962 END_TEST
1963
1964 /* Test memory allocation functions */
START_TEST(test_memory_allocation)1965 START_TEST(test_memory_allocation) {
1966 char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1967 char *p;
1968
1969 if (buffer == NULL) {
1970 fail("Allocation failed");
1971 } else {
1972 /* Try writing to memory; some OSes try to cheat! */
1973 buffer[0] = 'T';
1974 buffer[1] = 'E';
1975 buffer[2] = 'S';
1976 buffer[3] = 'T';
1977 buffer[4] = '\0';
1978 if (strcmp(buffer, "TEST") != 0) {
1979 fail("Memory not writable");
1980 } else {
1981 p = (char *)XML_MemRealloc(g_parser, buffer, 512);
1982 if (p == NULL) {
1983 fail("Reallocation failed");
1984 } else {
1985 /* Write again, just to be sure */
1986 buffer = p;
1987 buffer[0] = 'V';
1988 if (strcmp(buffer, "VEST") != 0) {
1989 fail("Reallocated memory not writable");
1990 }
1991 }
1992 }
1993 XML_MemFree(g_parser, buffer);
1994 }
1995 }
1996 END_TEST
1997
1998 /* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)1999 START_TEST(test_default_current) {
2000 const char *text = "<doc>hell]</doc>";
2001 const char *entity_text = "<!DOCTYPE doc [\n"
2002 "<!ENTITY entity '%'>\n"
2003 "]>\n"
2004 "<doc>&entity;</doc>";
2005
2006 set_subtest("with defaulting");
2007 {
2008 struct handler_record_list storage;
2009 storage.count = 0;
2010 XML_SetDefaultHandler(g_parser, record_default_handler);
2011 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2012 XML_SetUserData(g_parser, &storage);
2013 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2014 == XML_STATUS_ERROR)
2015 xml_failure(g_parser);
2016 int i = 0;
2017 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
2018 // we should have gotten one or more cdata callbacks, totaling 5 chars
2019 int cdata_len_remaining = 5;
2020 while (cdata_len_remaining > 0) {
2021 const struct handler_record_entry *c_entry
2022 = handler_record_get(&storage, i++);
2023 assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
2024 assert_true(c_entry->arg > 0);
2025 assert_true(c_entry->arg <= cdata_len_remaining);
2026 cdata_len_remaining -= c_entry->arg;
2027 // default handler must follow, with the exact same len argument.
2028 assert_record_handler_called(&storage, i++, "record_default_handler",
2029 c_entry->arg);
2030 }
2031 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
2032 assert_true(storage.count == i);
2033 }
2034
2035 /* Again, without the defaulting */
2036 set_subtest("no defaulting");
2037 {
2038 struct handler_record_list storage;
2039 storage.count = 0;
2040 XML_ParserReset(g_parser, NULL);
2041 XML_SetDefaultHandler(g_parser, record_default_handler);
2042 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2043 XML_SetUserData(g_parser, &storage);
2044 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2045 == XML_STATUS_ERROR)
2046 xml_failure(g_parser);
2047 int i = 0;
2048 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
2049 // we should have gotten one or more cdata callbacks, totaling 5 chars
2050 int cdata_len_remaining = 5;
2051 while (cdata_len_remaining > 0) {
2052 const struct handler_record_entry *c_entry
2053 = handler_record_get(&storage, i++);
2054 assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
2055 assert_true(c_entry->arg > 0);
2056 assert_true(c_entry->arg <= cdata_len_remaining);
2057 cdata_len_remaining -= c_entry->arg;
2058 }
2059 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
2060 assert_true(storage.count == i);
2061 }
2062
2063 /* Now with an internal entity to complicate matters */
2064 set_subtest("with internal entity");
2065 {
2066 struct handler_record_list storage;
2067 storage.count = 0;
2068 XML_ParserReset(g_parser, NULL);
2069 XML_SetDefaultHandler(g_parser, record_default_handler);
2070 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2071 XML_SetUserData(g_parser, &storage);
2072 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2073 XML_TRUE)
2074 == XML_STATUS_ERROR)
2075 xml_failure(g_parser);
2076 /* The default handler suppresses the entity */
2077 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2078 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2079 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2080 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2081 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2082 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2083 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2084 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2085 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2086 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2087 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2088 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2089 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2090 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2091 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2092 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2093 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2094 assert_record_handler_called(&storage, 17, "record_default_handler", 8);
2095 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2096 assert_true(storage.count == 19);
2097 }
2098
2099 /* Again, with a skip handler */
2100 set_subtest("with skip handler");
2101 {
2102 struct handler_record_list storage;
2103 storage.count = 0;
2104 XML_ParserReset(g_parser, NULL);
2105 XML_SetDefaultHandler(g_parser, record_default_handler);
2106 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2107 XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
2108 XML_SetUserData(g_parser, &storage);
2109 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2110 XML_TRUE)
2111 == XML_STATUS_ERROR)
2112 xml_failure(g_parser);
2113 /* The default handler suppresses the entity */
2114 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2115 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2116 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2117 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2118 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2119 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2120 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2121 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2122 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2123 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2124 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2125 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2126 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2127 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2128 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2129 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2130 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2131 assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2132 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2133 assert_true(storage.count == 19);
2134 }
2135
2136 /* This time, allow the entity through */
2137 set_subtest("allow entity");
2138 {
2139 struct handler_record_list storage;
2140 storage.count = 0;
2141 XML_ParserReset(g_parser, NULL);
2142 XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2143 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2144 XML_SetUserData(g_parser, &storage);
2145 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2146 XML_TRUE)
2147 == XML_STATUS_ERROR)
2148 xml_failure(g_parser);
2149 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2150 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2151 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2152 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2153 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2154 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2155 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2156 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2157 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2158 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2159 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2160 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2161 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2162 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2163 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2164 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2165 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2166 assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2167 assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2168 assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2169 assert_true(storage.count == 20);
2170 }
2171
2172 /* Finally, without passing the cdata to the default handler */
2173 set_subtest("not passing cdata");
2174 {
2175 struct handler_record_list storage;
2176 storage.count = 0;
2177 XML_ParserReset(g_parser, NULL);
2178 XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2179 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2180 XML_SetUserData(g_parser, &storage);
2181 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2182 XML_TRUE)
2183 == XML_STATUS_ERROR)
2184 xml_failure(g_parser);
2185 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2186 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2187 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2188 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2189 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2190 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2191 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2192 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2193 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2194 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2195 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2196 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2197 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2198 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2199 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2200 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2201 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2202 assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2203 1);
2204 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2205 assert_true(storage.count == 19);
2206 }
2207 }
2208 END_TEST
2209
2210 /* Test DTD element parsing code paths */
START_TEST(test_dtd_elements)2211 START_TEST(test_dtd_elements) {
2212 const char *text = "<!DOCTYPE doc [\n"
2213 "<!ELEMENT doc (chapter)>\n"
2214 "<!ELEMENT chapter (#PCDATA)>\n"
2215 "]>\n"
2216 "<doc><chapter>Wombats are go</chapter></doc>";
2217
2218 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2219 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2220 == XML_STATUS_ERROR)
2221 xml_failure(g_parser);
2222 }
2223 END_TEST
2224
2225 static void XMLCALL
element_decl_check_model(void * userData,const XML_Char * name,XML_Content * model)2226 element_decl_check_model(void *userData, const XML_Char *name,
2227 XML_Content *model) {
2228 UNUSED_P(userData);
2229 uint32_t errorFlags = 0;
2230
2231 /* Expected model array structure is this:
2232 * [0] (type 6, quant 0)
2233 * [1] (type 5, quant 0)
2234 * [3] (type 4, quant 0, name "bar")
2235 * [4] (type 4, quant 0, name "foo")
2236 * [5] (type 4, quant 3, name "xyz")
2237 * [2] (type 4, quant 2, name "zebra")
2238 */
2239 errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2240 errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2241
2242 if (model != NULL) {
2243 errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2244 errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2245 errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2246 errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2247 errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2248
2249 errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2250 errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2251 errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2252 errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2253 errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2254
2255 errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2256 errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2257 errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2258 errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2259 errorFlags
2260 |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2261
2262 errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2263 errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2264 errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2265 errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2266 errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2267
2268 errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2269 errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2270 errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2271 errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2272 errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2273
2274 errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2275 errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2276 errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2277 errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2278 errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2279 }
2280
2281 XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2282 XML_FreeContentModel(g_parser, model);
2283 }
2284
START_TEST(test_dtd_elements_nesting)2285 START_TEST(test_dtd_elements_nesting) {
2286 // Payload inspired by a test in Perl's XML::Parser
2287 const char *text = "<!DOCTYPE foo [\n"
2288 "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2289 "]>\n"
2290 "<foo/>";
2291
2292 XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2293
2294 XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2295 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2296 == XML_STATUS_ERROR)
2297 xml_failure(g_parser);
2298
2299 if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2300 fail("Element declaration model regression detected");
2301 }
2302 END_TEST
2303
2304 /* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd)2305 START_TEST(test_set_foreign_dtd) {
2306 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2307 const char *text2 = "<doc>&entity;</doc>";
2308 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2309
2310 /* Check hash salt is passed through too */
2311 XML_SetHashSalt(g_parser, 0x12345678);
2312 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2313 XML_SetUserData(g_parser, &test_data);
2314 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2315 /* Add a default handler to exercise more code paths */
2316 XML_SetDefaultHandler(g_parser, dummy_default_handler);
2317 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2318 fail("Could not set foreign DTD");
2319 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2320 == XML_STATUS_ERROR)
2321 xml_failure(g_parser);
2322
2323 /* Ensure that trying to set the DTD after parsing has started
2324 * is faulted, even if it's the same setting.
2325 */
2326 if (XML_UseForeignDTD(g_parser, XML_TRUE)
2327 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2328 fail("Failed to reject late foreign DTD setting");
2329 /* Ditto for the hash salt */
2330 if (XML_SetHashSalt(g_parser, 0x23456789))
2331 fail("Failed to reject late hash salt change");
2332
2333 /* Now finish the parse */
2334 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2335 == XML_STATUS_ERROR)
2336 xml_failure(g_parser);
2337 }
2338 END_TEST
2339
2340 /* Test foreign DTD handling with a failing NotStandalone handler */
START_TEST(test_foreign_dtd_not_standalone)2341 START_TEST(test_foreign_dtd_not_standalone) {
2342 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2343 "<doc>&entity;</doc>";
2344 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2345
2346 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2347 XML_SetUserData(g_parser, &test_data);
2348 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2349 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2350 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2351 fail("Could not set foreign DTD");
2352 expect_failure(text, XML_ERROR_NOT_STANDALONE,
2353 "NotStandalonehandler failed to reject");
2354 }
2355 END_TEST
2356
2357 /* Test invalid character in a foreign DTD is faulted */
START_TEST(test_invalid_foreign_dtd)2358 START_TEST(test_invalid_foreign_dtd) {
2359 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2360 "<doc>&entity;</doc>";
2361 ExtFaults test_data
2362 = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2363
2364 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2365 XML_SetUserData(g_parser, &test_data);
2366 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2367 XML_UseForeignDTD(g_parser, XML_TRUE);
2368 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2369 "Bad DTD should not have been accepted");
2370 }
2371 END_TEST
2372
2373 /* Test foreign DTD use with a doctype */
START_TEST(test_foreign_dtd_with_doctype)2374 START_TEST(test_foreign_dtd_with_doctype) {
2375 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2376 "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2377 const char *text2 = "<doc>&entity;</doc>";
2378 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2379
2380 /* Check hash salt is passed through too */
2381 XML_SetHashSalt(g_parser, 0x12345678);
2382 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2383 XML_SetUserData(g_parser, &test_data);
2384 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2385 /* Add a default handler to exercise more code paths */
2386 XML_SetDefaultHandler(g_parser, dummy_default_handler);
2387 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2388 fail("Could not set foreign DTD");
2389 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2390 == XML_STATUS_ERROR)
2391 xml_failure(g_parser);
2392
2393 /* Ensure that trying to set the DTD after parsing has started
2394 * is faulted, even if it's the same setting.
2395 */
2396 if (XML_UseForeignDTD(g_parser, XML_TRUE)
2397 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2398 fail("Failed to reject late foreign DTD setting");
2399 /* Ditto for the hash salt */
2400 if (XML_SetHashSalt(g_parser, 0x23456789))
2401 fail("Failed to reject late hash salt change");
2402
2403 /* Now finish the parse */
2404 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2405 == XML_STATUS_ERROR)
2406 xml_failure(g_parser);
2407 }
2408 END_TEST
2409
2410 /* Test XML_UseForeignDTD with no external subset present */
START_TEST(test_foreign_dtd_without_external_subset)2411 START_TEST(test_foreign_dtd_without_external_subset) {
2412 const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2413 "<doc>&foo;</doc>";
2414
2415 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2416 XML_SetUserData(g_parser, NULL);
2417 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2418 XML_UseForeignDTD(g_parser, XML_TRUE);
2419 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2420 == XML_STATUS_ERROR)
2421 xml_failure(g_parser);
2422 }
2423 END_TEST
2424
START_TEST(test_empty_foreign_dtd)2425 START_TEST(test_empty_foreign_dtd) {
2426 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2427 "<doc>&entity;</doc>";
2428
2429 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2430 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2431 XML_UseForeignDTD(g_parser, XML_TRUE);
2432 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2433 "Undefined entity not faulted");
2434 }
2435 END_TEST
2436
2437 /* Test XML Base is set and unset appropriately */
START_TEST(test_set_base)2438 START_TEST(test_set_base) {
2439 const XML_Char *old_base;
2440 const XML_Char *new_base = XCS("/local/file/name.xml");
2441
2442 old_base = XML_GetBase(g_parser);
2443 if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2444 fail("Unable to set base");
2445 if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2446 fail("Base setting not correct");
2447 if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2448 fail("Unable to NULL base");
2449 if (XML_GetBase(g_parser) != NULL)
2450 fail("Base setting not nulled");
2451 XML_SetBase(g_parser, old_base);
2452 }
2453 END_TEST
2454
2455 /* Test attribute counts, indexing, etc */
START_TEST(test_attributes)2456 START_TEST(test_attributes) {
2457 const char *text = "<!DOCTYPE doc [\n"
2458 "<!ELEMENT doc (tag)>\n"
2459 "<!ATTLIST doc id ID #REQUIRED>\n"
2460 "]>"
2461 "<doc a='1' id='one' b='2'>"
2462 "<tag c='3'/>"
2463 "</doc>";
2464 AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2465 {XCS("b"), XCS("2")},
2466 {XCS("id"), XCS("one")},
2467 {NULL, NULL}};
2468 AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2469 ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2470 {XCS("tag"), 1, NULL, NULL},
2471 {NULL, 0, NULL, NULL}};
2472 info[0].attributes = doc_info;
2473 info[1].attributes = tag_info;
2474
2475 XML_Parser parser = XML_ParserCreate(NULL);
2476 assert_true(parser != NULL);
2477 ParserAndElementInfo parserAndElementInfos = {
2478 parser,
2479 info,
2480 };
2481
2482 XML_SetStartElementHandler(parser, counting_start_element_handler);
2483 XML_SetUserData(parser, &parserAndElementInfos);
2484 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2485 == XML_STATUS_ERROR)
2486 xml_failure(parser);
2487
2488 XML_ParserFree(parser);
2489 }
2490 END_TEST
2491
2492 /* Test reset works correctly in the middle of processing an internal
2493 * entity. Exercises some obscure code in XML_ParserReset().
2494 */
START_TEST(test_reset_in_entity)2495 START_TEST(test_reset_in_entity) {
2496 if (g_chunkSize != 0) {
2497 // this test does not use SINGLE_BYTES, because of suspension
2498 return;
2499 }
2500
2501 const char *text = "<!DOCTYPE doc [\n"
2502 "<!ENTITY wombat 'wom'>\n"
2503 "<!ENTITY entity 'hi &wom; there'>\n"
2504 "]>\n"
2505 "<doc>&entity;</doc>";
2506 XML_ParsingStatus status;
2507
2508 g_resumable = XML_TRUE;
2509 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2510 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
2511 // we won't know exactly how much input we actually managed to give Expat.
2512 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2513 == XML_STATUS_ERROR)
2514 xml_failure(g_parser);
2515 XML_GetParsingStatus(g_parser, &status);
2516 if (status.parsing != XML_SUSPENDED)
2517 fail("Parsing status not SUSPENDED");
2518 XML_ParserReset(g_parser, NULL);
2519 XML_GetParsingStatus(g_parser, &status);
2520 if (status.parsing != XML_INITIALIZED)
2521 fail("Parsing status doesn't reset to INITIALIZED");
2522 }
2523 END_TEST
2524
2525 /* Test that resume correctly passes through parse errors */
START_TEST(test_resume_invalid_parse)2526 START_TEST(test_resume_invalid_parse) {
2527 const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2528
2529 g_resumable = XML_TRUE;
2530 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2531 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2532 == XML_STATUS_ERROR)
2533 xml_failure(g_parser);
2534 if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2535 fail("Resumed invalid parse not faulted");
2536 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2537 fail("Invalid parse not correctly faulted");
2538 }
2539 END_TEST
2540
2541 /* Test that re-suspended parses are correctly passed through */
START_TEST(test_resume_resuspended)2542 START_TEST(test_resume_resuspended) {
2543 const char *text = "<doc>Hello<meep/>world</doc>";
2544
2545 g_resumable = XML_TRUE;
2546 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2547 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2548 == XML_STATUS_ERROR)
2549 xml_failure(g_parser);
2550 g_resumable = XML_TRUE;
2551 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2552 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2553 fail("Resumption not suspended");
2554 /* This one should succeed and finish up */
2555 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2556 xml_failure(g_parser);
2557 }
2558 END_TEST
2559
2560 /* Test that CDATA shows up correctly through a default handler */
START_TEST(test_cdata_default)2561 START_TEST(test_cdata_default) {
2562 const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2563 const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2564 CharData storage;
2565
2566 CharData_Init(&storage);
2567 XML_SetUserData(g_parser, &storage);
2568 XML_SetDefaultHandler(g_parser, accumulate_characters);
2569
2570 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2571 == XML_STATUS_ERROR)
2572 xml_failure(g_parser);
2573 CharData_CheckXMLChars(&storage, expected);
2574 }
2575 END_TEST
2576
2577 /* Test resetting a subordinate parser does exactly nothing */
START_TEST(test_subordinate_reset)2578 START_TEST(test_subordinate_reset) {
2579 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2580 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2581 "<doc>&entity;</doc>";
2582
2583 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2584 XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2585 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2586 == XML_STATUS_ERROR)
2587 xml_failure(g_parser);
2588 }
2589 END_TEST
2590
2591 /* Test suspending a subordinate parser */
START_TEST(test_subordinate_suspend)2592 START_TEST(test_subordinate_suspend) {
2593 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2594 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2595 "<doc>&entity;</doc>";
2596
2597 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2598 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2599 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2600 == XML_STATUS_ERROR)
2601 xml_failure(g_parser);
2602 }
2603 END_TEST
2604
2605 /* Test suspending a subordinate parser from an XML declaration */
2606 /* Increases code coverage of the tests */
2607
START_TEST(test_subordinate_xdecl_suspend)2608 START_TEST(test_subordinate_xdecl_suspend) {
2609 const char *text
2610 = "<!DOCTYPE doc [\n"
2611 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2612 "]>\n"
2613 "<doc>&entity;</doc>";
2614
2615 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2616 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2617 g_resumable = XML_TRUE;
2618 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2619 == XML_STATUS_ERROR)
2620 xml_failure(g_parser);
2621 }
2622 END_TEST
2623
START_TEST(test_subordinate_xdecl_abort)2624 START_TEST(test_subordinate_xdecl_abort) {
2625 const char *text
2626 = "<!DOCTYPE doc [\n"
2627 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2628 "]>\n"
2629 "<doc>&entity;</doc>";
2630
2631 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2632 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2633 g_resumable = XML_FALSE;
2634 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2635 == XML_STATUS_ERROR)
2636 xml_failure(g_parser);
2637 }
2638 END_TEST
2639
2640 /* Test external entity fault handling with suspension */
START_TEST(test_ext_entity_invalid_suspended_parse)2641 START_TEST(test_ext_entity_invalid_suspended_parse) {
2642 const char *text = "<!DOCTYPE doc [\n"
2643 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2644 "]>\n"
2645 "<doc>&en;</doc>";
2646 ExtFaults faults[]
2647 = {{"<?xml version='1.0' encoding='us-ascii'?><",
2648 "Incomplete element declaration not faulted", NULL,
2649 XML_ERROR_UNCLOSED_TOKEN},
2650 {/* First two bytes of a three-byte char */
2651 "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2652 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2653 {NULL, NULL, NULL, XML_ERROR_NONE}};
2654 ExtFaults *fault;
2655
2656 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2657 set_subtest("%s", fault->parse_text);
2658 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2659 XML_SetExternalEntityRefHandler(g_parser,
2660 external_entity_suspending_faulter);
2661 XML_SetUserData(g_parser, fault);
2662 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2663 "Parser did not report external entity error");
2664 XML_ParserReset(g_parser, NULL);
2665 }
2666 }
2667 END_TEST
2668
2669 /* Test setting an explicit encoding */
START_TEST(test_explicit_encoding)2670 START_TEST(test_explicit_encoding) {
2671 const char *text1 = "<doc>Hello ";
2672 const char *text2 = " World</doc>";
2673
2674 /* Just check that we can set the encoding to NULL before starting */
2675 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2676 fail("Failed to initialise encoding to NULL");
2677 /* Say we are UTF-8 */
2678 if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2679 fail("Failed to set explicit encoding");
2680 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2681 == XML_STATUS_ERROR)
2682 xml_failure(g_parser);
2683 /* Try to switch encodings mid-parse */
2684 if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2685 fail("Allowed encoding change");
2686 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2687 == XML_STATUS_ERROR)
2688 xml_failure(g_parser);
2689 /* Try now the parse is over */
2690 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2691 fail("Failed to unset encoding");
2692 }
2693 END_TEST
2694
2695 /* Test handling of trailing CR (rather than newline) */
START_TEST(test_trailing_cr)2696 START_TEST(test_trailing_cr) {
2697 const char *text = "<doc>\r";
2698 int found_cr;
2699
2700 /* Try with a character handler, for code coverage */
2701 XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2702 XML_SetUserData(g_parser, &found_cr);
2703 found_cr = 0;
2704 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2705 == XML_STATUS_OK)
2706 fail("Failed to fault unclosed doc");
2707 if (found_cr == 0)
2708 fail("Did not catch the carriage return");
2709 XML_ParserReset(g_parser, NULL);
2710
2711 /* Now with a default handler instead */
2712 XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2713 XML_SetUserData(g_parser, &found_cr);
2714 found_cr = 0;
2715 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2716 == XML_STATUS_OK)
2717 fail("Failed to fault unclosed doc");
2718 if (found_cr == 0)
2719 fail("Did not catch default carriage return");
2720 }
2721 END_TEST
2722
2723 /* Test trailing CR in an external entity parse */
START_TEST(test_ext_entity_trailing_cr)2724 START_TEST(test_ext_entity_trailing_cr) {
2725 const char *text = "<!DOCTYPE doc [\n"
2726 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2727 "]>\n"
2728 "<doc>&en;</doc>";
2729 int found_cr;
2730
2731 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2732 XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2733 XML_SetUserData(g_parser, &found_cr);
2734 found_cr = 0;
2735 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2736 != XML_STATUS_OK)
2737 xml_failure(g_parser);
2738 if (found_cr == 0)
2739 fail("No carriage return found");
2740 XML_ParserReset(g_parser, NULL);
2741
2742 /* Try again with a different trailing CR */
2743 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2744 XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2745 XML_SetUserData(g_parser, &found_cr);
2746 found_cr = 0;
2747 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2748 != XML_STATUS_OK)
2749 xml_failure(g_parser);
2750 if (found_cr == 0)
2751 fail("No carriage return found");
2752 }
2753 END_TEST
2754
2755 /* Test handling of trailing square bracket */
START_TEST(test_trailing_rsqb)2756 START_TEST(test_trailing_rsqb) {
2757 const char *text8 = "<doc>]";
2758 const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2759 int found_rsqb;
2760 int text8_len = (int)strlen(text8);
2761
2762 XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2763 XML_SetUserData(g_parser, &found_rsqb);
2764 found_rsqb = 0;
2765 if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2766 == XML_STATUS_OK)
2767 fail("Failed to fault unclosed doc");
2768 if (found_rsqb == 0)
2769 fail("Did not catch the right square bracket");
2770
2771 /* Try again with a different encoding */
2772 XML_ParserReset(g_parser, NULL);
2773 XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2774 XML_SetUserData(g_parser, &found_rsqb);
2775 found_rsqb = 0;
2776 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2777 XML_TRUE)
2778 == XML_STATUS_OK)
2779 fail("Failed to fault unclosed doc");
2780 if (found_rsqb == 0)
2781 fail("Did not catch the right square bracket");
2782
2783 /* And finally with a default handler */
2784 XML_ParserReset(g_parser, NULL);
2785 XML_SetDefaultHandler(g_parser, rsqb_handler);
2786 XML_SetUserData(g_parser, &found_rsqb);
2787 found_rsqb = 0;
2788 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2789 XML_TRUE)
2790 == XML_STATUS_OK)
2791 fail("Failed to fault unclosed doc");
2792 if (found_rsqb == 0)
2793 fail("Did not catch the right square bracket");
2794 }
2795 END_TEST
2796
2797 /* Test trailing right square bracket in an external entity parse */
START_TEST(test_ext_entity_trailing_rsqb)2798 START_TEST(test_ext_entity_trailing_rsqb) {
2799 const char *text = "<!DOCTYPE doc [\n"
2800 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2801 "]>\n"
2802 "<doc>&en;</doc>";
2803 int found_rsqb;
2804
2805 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2806 XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2807 XML_SetUserData(g_parser, &found_rsqb);
2808 found_rsqb = 0;
2809 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2810 != XML_STATUS_OK)
2811 xml_failure(g_parser);
2812 if (found_rsqb == 0)
2813 fail("No right square bracket found");
2814 }
2815 END_TEST
2816
2817 /* Test CDATA handling in an external entity */
START_TEST(test_ext_entity_good_cdata)2818 START_TEST(test_ext_entity_good_cdata) {
2819 const char *text = "<!DOCTYPE doc [\n"
2820 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2821 "]>\n"
2822 "<doc>&en;</doc>";
2823
2824 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2825 XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2826 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2827 != XML_STATUS_OK)
2828 xml_failure(g_parser);
2829 }
2830 END_TEST
2831
2832 /* Test user parameter settings */
START_TEST(test_user_parameters)2833 START_TEST(test_user_parameters) {
2834 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2835 "<!-- Primary parse -->\n"
2836 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2837 "<doc>&entity;";
2838 const char *epilog = "<!-- Back to primary parser -->\n"
2839 "</doc>";
2840
2841 g_comment_count = 0;
2842 g_skip_count = 0;
2843 g_xdecl_count = 0;
2844 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2845 XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2846 XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2847 XML_SetCommentHandler(g_parser, data_check_comment_handler);
2848 XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2849 XML_UseParserAsHandlerArg(g_parser);
2850 XML_SetUserData(g_parser, (void *)1);
2851 g_handler_data = g_parser;
2852 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2853 == XML_STATUS_ERROR)
2854 xml_failure(g_parser);
2855 /* Ensure we can't change policy mid-parse */
2856 if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2857 fail("Changed param entity parsing policy while parsing");
2858 if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2859 == XML_STATUS_ERROR)
2860 xml_failure(g_parser);
2861 if (g_comment_count != 3)
2862 fail("Comment handler not invoked enough times");
2863 if (g_skip_count != 1)
2864 fail("Skip handler not invoked enough times");
2865 if (g_xdecl_count != 1)
2866 fail("XML declaration handler not invoked");
2867 }
2868 END_TEST
2869
2870 /* Test that an explicit external entity handler argument replaces
2871 * the parser as the first argument.
2872 *
2873 * We do not call the first parameter to the external entity handler
2874 * 'parser' for once, since the first time the handler is called it
2875 * will actually be a text string. We need to be able to access the
2876 * global 'parser' variable to create our external entity parser from,
2877 * since there are code paths we need to ensure get executed.
2878 */
START_TEST(test_ext_entity_ref_parameter)2879 START_TEST(test_ext_entity_ref_parameter) {
2880 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2881 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2882 "<doc>&entity;</doc>";
2883
2884 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2885 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2886 /* Set a handler arg that is not NULL and not parser (which is
2887 * what NULL would cause to be passed.
2888 */
2889 XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2890 g_handler_data = text;
2891 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2892 == XML_STATUS_ERROR)
2893 xml_failure(g_parser);
2894
2895 /* Now try again with unset args */
2896 XML_ParserReset(g_parser, NULL);
2897 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2898 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2899 XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2900 g_handler_data = g_parser;
2901 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2902 == XML_STATUS_ERROR)
2903 xml_failure(g_parser);
2904 }
2905 END_TEST
2906
2907 /* Test the parsing of an empty string */
START_TEST(test_empty_parse)2908 START_TEST(test_empty_parse) {
2909 const char *text = "<doc></doc>";
2910 const char *partial = "<doc>";
2911
2912 if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2913 fail("Parsing empty string faulted");
2914 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2915 fail("Parsing final empty string not faulted");
2916 if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2917 fail("Parsing final empty string faulted for wrong reason");
2918
2919 /* Now try with valid text before the empty end */
2920 XML_ParserReset(g_parser, NULL);
2921 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2922 == XML_STATUS_ERROR)
2923 xml_failure(g_parser);
2924 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2925 fail("Parsing final empty string faulted");
2926
2927 /* Now try with invalid text before the empty end */
2928 XML_ParserReset(g_parser, NULL);
2929 if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2930 XML_FALSE)
2931 == XML_STATUS_ERROR)
2932 xml_failure(g_parser);
2933 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2934 fail("Parsing final incomplete empty string not faulted");
2935 }
2936 END_TEST
2937
2938 /* Test XML_Parse for len < 0 */
START_TEST(test_negative_len_parse)2939 START_TEST(test_negative_len_parse) {
2940 const char *const doc = "<root/>";
2941 for (int isFinal = 0; isFinal < 2; isFinal++) {
2942 set_subtest("isFinal=%d", isFinal);
2943
2944 XML_Parser parser = XML_ParserCreate(NULL);
2945
2946 if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2947 fail("There was not supposed to be any initial parse error.");
2948
2949 const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
2950
2951 if (status != XML_STATUS_ERROR)
2952 fail("Negative len was expected to fail the parse but did not.");
2953
2954 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2955 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2956
2957 XML_ParserFree(parser);
2958 }
2959 }
2960 END_TEST
2961
2962 /* Test XML_ParseBuffer for len < 0 */
START_TEST(test_negative_len_parse_buffer)2963 START_TEST(test_negative_len_parse_buffer) {
2964 const char *const doc = "<root/>";
2965 for (int isFinal = 0; isFinal < 2; isFinal++) {
2966 set_subtest("isFinal=%d", isFinal);
2967
2968 XML_Parser parser = XML_ParserCreate(NULL);
2969
2970 if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2971 fail("There was not supposed to be any initial parse error.");
2972
2973 void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
2974
2975 if (buffer == NULL)
2976 fail("XML_GetBuffer failed.");
2977
2978 memcpy(buffer, doc, strlen(doc));
2979
2980 const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
2981
2982 if (status != XML_STATUS_ERROR)
2983 fail("Negative len was expected to fail the parse but did not.");
2984
2985 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2986 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2987
2988 XML_ParserFree(parser);
2989 }
2990 }
2991 END_TEST
2992
2993 /* Test odd corners of the XML_GetBuffer interface */
2994 static enum XML_Status
get_feature(enum XML_FeatureEnum feature_id,long * presult)2995 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
2996 const XML_Feature *feature = XML_GetFeatureList();
2997
2998 if (feature == NULL)
2999 return XML_STATUS_ERROR;
3000 for (; feature->feature != XML_FEATURE_END; feature++) {
3001 if (feature->feature == feature_id) {
3002 *presult = feature->value;
3003 return XML_STATUS_OK;
3004 }
3005 }
3006 return XML_STATUS_ERROR;
3007 }
3008
3009 /* Test odd corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_1)3010 START_TEST(test_get_buffer_1) {
3011 const char *text = get_buffer_test_text;
3012 void *buffer;
3013 long context_bytes;
3014
3015 /* Attempt to allocate a negative length buffer */
3016 if (XML_GetBuffer(g_parser, -12) != NULL)
3017 fail("Negative length buffer not failed");
3018
3019 /* Now get a small buffer and extend it past valid length */
3020 buffer = XML_GetBuffer(g_parser, 1536);
3021 if (buffer == NULL)
3022 fail("1.5K buffer failed");
3023 assert(buffer != NULL);
3024 memcpy(buffer, text, strlen(text));
3025 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
3026 == XML_STATUS_ERROR)
3027 xml_failure(g_parser);
3028 if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
3029 fail("INT_MAX buffer not failed");
3030
3031 /* Now try extending it a more reasonable but still too large
3032 * amount. The allocator in XML_GetBuffer() doubles the buffer
3033 * size until it exceeds the requested amount or INT_MAX. If it
3034 * exceeds INT_MAX, it rejects the request, so we want a request
3035 * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable,
3036 * with an extra byte just to ensure that the request is off any
3037 * boundary. The request will be inflated internally by
3038 * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
3039 * request.
3040 */
3041 if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
3042 context_bytes = 0;
3043 if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
3044 fail("INT_MAX- buffer not failed");
3045
3046 /* Now try extending it a carefully crafted amount */
3047 if (XML_GetBuffer(g_parser, 1000) == NULL)
3048 fail("1000 buffer failed");
3049 }
3050 END_TEST
3051
3052 /* Test more corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_2)3053 START_TEST(test_get_buffer_2) {
3054 const char *text = get_buffer_test_text;
3055 void *buffer;
3056
3057 /* Now get a decent buffer */
3058 buffer = XML_GetBuffer(g_parser, 1536);
3059 if (buffer == NULL)
3060 fail("1.5K buffer failed");
3061 assert(buffer != NULL);
3062 memcpy(buffer, text, strlen(text));
3063 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
3064 == XML_STATUS_ERROR)
3065 xml_failure(g_parser);
3066
3067 /* Extend it, to catch a different code path */
3068 if (XML_GetBuffer(g_parser, 1024) == NULL)
3069 fail("1024 buffer failed");
3070 }
3071 END_TEST
3072
3073 /* Test for signed integer overflow CVE-2022-23852 */
3074 #if XML_CONTEXT_BYTES > 0
START_TEST(test_get_buffer_3_overflow)3075 START_TEST(test_get_buffer_3_overflow) {
3076 XML_Parser parser = XML_ParserCreate(NULL);
3077 assert(parser != NULL);
3078
3079 const char *const text = "\n";
3080 const int expectedKeepValue = (int)strlen(text);
3081
3082 // After this call, variable "keep" in XML_GetBuffer will
3083 // have value expectedKeepValue
3084 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
3085 XML_FALSE /* isFinal */)
3086 == XML_STATUS_ERROR)
3087 xml_failure(parser);
3088
3089 assert(expectedKeepValue > 0);
3090 if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
3091 fail("enlarging buffer not failed");
3092
3093 XML_ParserFree(parser);
3094 }
3095 END_TEST
3096 #endif // XML_CONTEXT_BYTES > 0
3097
START_TEST(test_buffer_can_grow_to_max)3098 START_TEST(test_buffer_can_grow_to_max) {
3099 const char *const prefixes[] = {
3100 "",
3101 "<",
3102 "<x a='",
3103 "<doc><x a='",
3104 "<document><x a='",
3105 "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
3106 "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
3107 "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
3108 "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
3109 "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
3110 const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
3111 int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
3112 #if defined(__MINGW32__) && ! defined(__MINGW64__)
3113 // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
3114 // Can we make a big allocation?
3115 void *big = malloc(maxbuf);
3116 if (! big) {
3117 // The big allocation failed. Let's be a little lenient.
3118 maxbuf = maxbuf / 2;
3119 }
3120 free(big);
3121 #endif
3122
3123 for (int i = 0; i < num_prefixes; ++i) {
3124 set_subtest("\"%s\"", prefixes[i]);
3125 XML_Parser parser = XML_ParserCreate(NULL);
3126 #if XML_GE == 1
3127 assert_true(XML_SetAllocTrackerActivationThreshold(parser, (size_t)-1)
3128 == XML_TRUE); // i.e. deactivate
3129 #endif
3130 const int prefix_len = (int)strlen(prefixes[i]);
3131 const enum XML_Status s
3132 = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
3133 if (s != XML_STATUS_OK)
3134 xml_failure(parser);
3135
3136 // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
3137 // subtracting the whole prefix is easiest, and close enough.
3138 assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
3139 // The limit should be consistent; no prefix should allow us to
3140 // reach above the max buffer size.
3141 assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
3142 XML_ParserFree(parser);
3143 }
3144 }
3145 END_TEST
3146
START_TEST(test_getbuffer_allocates_on_zero_len)3147 START_TEST(test_getbuffer_allocates_on_zero_len) {
3148 for (int first_len = 1; first_len >= 0; first_len--) {
3149 set_subtest("with len=%d first", first_len);
3150 XML_Parser parser = XML_ParserCreate(NULL);
3151 assert_true(parser != NULL);
3152 assert_true(XML_GetBuffer(parser, first_len) != NULL);
3153 assert_true(XML_GetBuffer(parser, 0) != NULL);
3154 if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
3155 xml_failure(parser);
3156 XML_ParserFree(parser);
3157 }
3158 }
3159 END_TEST
3160
3161 /* Test position information macros */
START_TEST(test_byte_info_at_end)3162 START_TEST(test_byte_info_at_end) {
3163 const char *text = "<doc></doc>";
3164
3165 if (XML_GetCurrentByteIndex(g_parser) != -1
3166 || XML_GetCurrentByteCount(g_parser) != 0)
3167 fail("Byte index/count incorrect at start of parse");
3168 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3169 == XML_STATUS_ERROR)
3170 xml_failure(g_parser);
3171 /* At end, the count will be zero and the index the end of string */
3172 if (XML_GetCurrentByteCount(g_parser) != 0)
3173 fail("Terminal byte count incorrect");
3174 if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
3175 fail("Terminal byte index incorrect");
3176 }
3177 END_TEST
3178
3179 /* Test position information from errors */
3180 #define PRE_ERROR_STR "<doc></"
3181 #define POST_ERROR_STR "wombat></doc>"
START_TEST(test_byte_info_at_error)3182 START_TEST(test_byte_info_at_error) {
3183 const char *text = PRE_ERROR_STR POST_ERROR_STR;
3184
3185 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3186 == XML_STATUS_OK)
3187 fail("Syntax error not faulted");
3188 if (XML_GetCurrentByteCount(g_parser) != 0)
3189 fail("Error byte count incorrect");
3190 if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3191 fail("Error byte index incorrect");
3192 }
3193 END_TEST
3194 #undef PRE_ERROR_STR
3195 #undef POST_ERROR_STR
3196
3197 /* Test position information in handler */
3198 #define START_ELEMENT "<e>"
3199 #define CDATA_TEXT "Hello"
3200 #define END_ELEMENT "</e>"
START_TEST(test_byte_info_at_cdata)3201 START_TEST(test_byte_info_at_cdata) {
3202 const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3203 int offset, size;
3204 ByteTestData data;
3205
3206 /* Check initial context is empty */
3207 if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3208 fail("Unexpected context at start of parse");
3209
3210 data.start_element_len = (int)strlen(START_ELEMENT);
3211 data.cdata_len = (int)strlen(CDATA_TEXT);
3212 data.total_string_len = (int)strlen(text);
3213 XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3214 XML_SetUserData(g_parser, &data);
3215 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3216 xml_failure(g_parser);
3217 }
3218 END_TEST
3219 #undef START_ELEMENT
3220 #undef CDATA_TEXT
3221 #undef END_ELEMENT
3222
3223 /* Test predefined entities are correctly recognised */
START_TEST(test_predefined_entities)3224 START_TEST(test_predefined_entities) {
3225 const char *text = "<doc><>&"'</doc>";
3226 const XML_Char *expected = XCS("<doc><>&"'</doc>");
3227 const XML_Char *result = XCS("<>&\"'");
3228 CharData storage;
3229
3230 XML_SetDefaultHandler(g_parser, accumulate_characters);
3231 /* run_character_check uses XML_SetCharacterDataHandler(), which
3232 * unfortunately heads off a code path that we need to exercise.
3233 */
3234 CharData_Init(&storage);
3235 XML_SetUserData(g_parser, &storage);
3236 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3237 == XML_STATUS_ERROR)
3238 xml_failure(g_parser);
3239 /* The default handler doesn't translate the entities */
3240 CharData_CheckXMLChars(&storage, expected);
3241
3242 /* Now try again and check the translation */
3243 XML_ParserReset(g_parser, NULL);
3244 run_character_check(text, result);
3245 }
3246 END_TEST
3247
3248 /* Regression test that an invalid tag in an external parameter
3249 * reference in an external DTD is correctly faulted.
3250 *
3251 * Only a few specific tags are legal in DTDs ignoring comments and
3252 * processing instructions, all of which begin with an exclamation
3253 * mark. "<el/>" is not one of them, so the parser should raise an
3254 * error on encountering it.
3255 */
START_TEST(test_invalid_tag_in_dtd)3256 START_TEST(test_invalid_tag_in_dtd) {
3257 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3258 "<doc></doc>\n";
3259
3260 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3261 XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3262 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3263 "Invalid tag IN DTD external param not rejected");
3264 }
3265 END_TEST
3266
3267 /* Test entities not quite the predefined ones are not mis-recognised */
START_TEST(test_not_predefined_entities)3268 START_TEST(test_not_predefined_entities) {
3269 const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3270 "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3271 int i = 0;
3272
3273 while (text[i] != NULL) {
3274 expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3275 "Undefined entity not rejected");
3276 XML_ParserReset(g_parser, NULL);
3277 i++;
3278 }
3279 }
3280 END_TEST
3281
3282 /* Test conditional inclusion (IGNORE) */
START_TEST(test_ignore_section)3283 START_TEST(test_ignore_section) {
3284 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3285 "<doc><e>&entity;</e></doc>";
3286 const XML_Char *expected
3287 = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3288 CharData storage;
3289
3290 CharData_Init(&storage);
3291 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3292 XML_SetUserData(g_parser, &storage);
3293 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3294 XML_SetDefaultHandler(g_parser, accumulate_characters);
3295 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3296 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3297 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3298 XML_SetStartElementHandler(g_parser, dummy_start_element);
3299 XML_SetEndElementHandler(g_parser, dummy_end_element);
3300 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3301 == XML_STATUS_ERROR)
3302 xml_failure(g_parser);
3303 CharData_CheckXMLChars(&storage, expected);
3304 }
3305 END_TEST
3306
START_TEST(test_ignore_section_utf16)3307 START_TEST(test_ignore_section_utf16) {
3308 const char text[] =
3309 /* <!DOCTYPE d SYSTEM 's'> */
3310 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3311 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3312 /* <d><e>&en;</e></d> */
3313 "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3314 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3315 CharData storage;
3316
3317 CharData_Init(&storage);
3318 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3319 XML_SetUserData(g_parser, &storage);
3320 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3321 XML_SetDefaultHandler(g_parser, accumulate_characters);
3322 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3323 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3324 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3325 XML_SetStartElementHandler(g_parser, dummy_start_element);
3326 XML_SetEndElementHandler(g_parser, dummy_end_element);
3327 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3328 == XML_STATUS_ERROR)
3329 xml_failure(g_parser);
3330 CharData_CheckXMLChars(&storage, expected);
3331 }
3332 END_TEST
3333
START_TEST(test_ignore_section_utf16_be)3334 START_TEST(test_ignore_section_utf16_be) {
3335 const char text[] =
3336 /* <!DOCTYPE d SYSTEM 's'> */
3337 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3338 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3339 /* <d><e>&en;</e></d> */
3340 "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3341 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3342 CharData storage;
3343
3344 CharData_Init(&storage);
3345 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3346 XML_SetUserData(g_parser, &storage);
3347 XML_SetExternalEntityRefHandler(g_parser,
3348 external_entity_load_ignore_utf16_be);
3349 XML_SetDefaultHandler(g_parser, accumulate_characters);
3350 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3351 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3352 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3353 XML_SetStartElementHandler(g_parser, dummy_start_element);
3354 XML_SetEndElementHandler(g_parser, dummy_end_element);
3355 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3356 == XML_STATUS_ERROR)
3357 xml_failure(g_parser);
3358 CharData_CheckXMLChars(&storage, expected);
3359 }
3360 END_TEST
3361
3362 /* Test mis-formatted conditional exclusion */
START_TEST(test_bad_ignore_section)3363 START_TEST(test_bad_ignore_section) {
3364 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3365 "<doc><e>&entity;</e></doc>";
3366 ExtFaults faults[]
3367 = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3368 XML_ERROR_SYNTAX},
3369 {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3370 XML_ERROR_INVALID_TOKEN},
3371 {/* FIrst two bytes of a three-byte char */
3372 "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3373 XML_ERROR_PARTIAL_CHAR},
3374 {NULL, NULL, NULL, XML_ERROR_NONE}};
3375 ExtFaults *fault;
3376
3377 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3378 set_subtest("%s", fault->parse_text);
3379 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3380 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3381 XML_SetUserData(g_parser, fault);
3382 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3383 "Incomplete IGNORE section not failed");
3384 XML_ParserReset(g_parser, NULL);
3385 }
3386 }
3387 END_TEST
3388
3389 struct bom_testdata {
3390 const char *external;
3391 int split;
3392 XML_Bool nested_callback_happened;
3393 };
3394
3395 static int XMLCALL
external_bom_checker(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)3396 external_bom_checker(XML_Parser parser, const XML_Char *context,
3397 const XML_Char *base, const XML_Char *systemId,
3398 const XML_Char *publicId) {
3399 const char *text;
3400 UNUSED_P(base);
3401 UNUSED_P(systemId);
3402 UNUSED_P(publicId);
3403
3404 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3405 if (ext_parser == NULL)
3406 fail("Could not create external entity parser");
3407
3408 if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3409 struct bom_testdata *const testdata
3410 = (struct bom_testdata *)XML_GetUserData(parser);
3411 const char *const external = testdata->external;
3412 const int split = testdata->split;
3413 testdata->nested_callback_happened = XML_TRUE;
3414
3415 if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3416 != XML_STATUS_OK) {
3417 xml_failure(ext_parser);
3418 }
3419 text = external + split; // the parse below will continue where we left off.
3420 } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3421 text = "<!ELEMENT doc EMPTY>\n"
3422 "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3423 "<!ENTITY % e2 '%e1;'>\n";
3424 } else {
3425 fail("unknown systemId");
3426 }
3427
3428 if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3429 != XML_STATUS_OK)
3430 xml_failure(ext_parser);
3431
3432 XML_ParserFree(ext_parser);
3433 return XML_STATUS_OK;
3434 }
3435
3436 /* regression test: BOM should be consumed when followed by a partial token. */
START_TEST(test_external_bom_consumed)3437 START_TEST(test_external_bom_consumed) {
3438 const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3439 "<doc></doc>\n";
3440 const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3441 const int len = (int)strlen(external);
3442 for (int split = 0; split <= len; ++split) {
3443 set_subtest("split at byte %d", split);
3444
3445 struct bom_testdata testdata;
3446 testdata.external = external;
3447 testdata.split = split;
3448 testdata.nested_callback_happened = XML_FALSE;
3449
3450 XML_Parser parser = XML_ParserCreate(NULL);
3451 if (parser == NULL) {
3452 fail("Couldn't create parser");
3453 }
3454 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3455 XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3456 XML_SetUserData(parser, &testdata);
3457 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3458 == XML_STATUS_ERROR)
3459 xml_failure(parser);
3460 if (! testdata.nested_callback_happened) {
3461 fail("ref handler not called");
3462 }
3463 XML_ParserFree(parser);
3464 }
3465 }
3466 END_TEST
3467
3468 /* Test recursive parsing */
START_TEST(test_external_entity_values)3469 START_TEST(test_external_entity_values) {
3470 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3471 "<doc></doc>\n";
3472 ExtFaults data_004_2[] = {
3473 {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3474 {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3475 XML_ERROR_INVALID_TOKEN},
3476 {"'wombat", "Unterminated string not faulted", NULL,
3477 XML_ERROR_UNCLOSED_TOKEN},
3478 {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3479 XML_ERROR_PARTIAL_CHAR},
3480 {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3481 {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3482 XML_ERROR_XML_DECL},
3483 {/* UTF-8 BOM */
3484 "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3485 XML_ERROR_NONE},
3486 {"<?xml version='1.0' encoding='utf-8'?>\n$",
3487 "Invalid token after text declaration not faulted", NULL,
3488 XML_ERROR_INVALID_TOKEN},
3489 {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3490 "Unterminated string after text decl not faulted", NULL,
3491 XML_ERROR_UNCLOSED_TOKEN},
3492 {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3493 "Partial UTF-8 character after text decl not faulted", NULL,
3494 XML_ERROR_PARTIAL_CHAR},
3495 {"%e1;", "Recursive parameter entity not faulted", NULL,
3496 XML_ERROR_RECURSIVE_ENTITY_REF},
3497 {NULL, NULL, NULL, XML_ERROR_NONE}};
3498 int i;
3499
3500 for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3501 set_subtest("%s", data_004_2[i].parse_text);
3502 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3503 XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3504 XML_SetUserData(g_parser, &data_004_2[i]);
3505 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3506 == XML_STATUS_ERROR)
3507 xml_failure(g_parser);
3508 XML_ParserReset(g_parser, NULL);
3509 }
3510 }
3511 END_TEST
3512
3513 /* Test the recursive parse interacts with a not standalone handler */
START_TEST(test_ext_entity_not_standalone)3514 START_TEST(test_ext_entity_not_standalone) {
3515 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3516 "<doc></doc>";
3517
3518 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3519 XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3520 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3521 "Standalone rejection not caught");
3522 }
3523 END_TEST
3524
START_TEST(test_ext_entity_value_abort)3525 START_TEST(test_ext_entity_value_abort) {
3526 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3527 "<doc></doc>\n";
3528
3529 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3530 XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3531 g_resumable = XML_FALSE;
3532 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3533 == XML_STATUS_ERROR)
3534 xml_failure(g_parser);
3535 }
3536 END_TEST
3537
START_TEST(test_bad_public_doctype)3538 START_TEST(test_bad_public_doctype) {
3539 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3540 "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3541 "<doc></doc>";
3542
3543 /* Setting a handler provokes a particular code path */
3544 XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3545 dummy_end_doctype_handler);
3546 expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3547 }
3548 END_TEST
3549
3550 /* Test based on ibm/valid/P32/ibm32v04.xml */
START_TEST(test_attribute_enum_value)3551 START_TEST(test_attribute_enum_value) {
3552 const char *text = "<?xml version='1.0' standalone='no'?>\n"
3553 "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3554 "<animal>This is a \n <a/> \n\nyellow tiger</animal>";
3555 ExtTest dtd_data
3556 = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3557 "<!ELEMENT a EMPTY>\n"
3558 "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3559 NULL, NULL};
3560 const XML_Char *expected = XCS("This is a \n \n\nyellow tiger");
3561
3562 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3563 XML_SetUserData(g_parser, &dtd_data);
3564 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3565 /* An attribute list handler provokes a different code path */
3566 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3567 run_ext_character_check(text, &dtd_data, expected);
3568 }
3569 END_TEST
3570
3571 /* Slightly bizarrely, the library seems to silently ignore entity
3572 * definitions for predefined entities, even when they are wrong. The
3573 * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3574 * to happen, so this is currently treated as acceptable.
3575 */
START_TEST(test_predefined_entity_redefinition)3576 START_TEST(test_predefined_entity_redefinition) {
3577 const char *text = "<!DOCTYPE doc [\n"
3578 "<!ENTITY apos 'foo'>\n"
3579 "]>\n"
3580 "<doc>'</doc>";
3581 run_character_check(text, XCS("'"));
3582 }
3583 END_TEST
3584
3585 /* Test that the parser stops processing the DTD after an unresolved
3586 * parameter entity is encountered.
3587 */
START_TEST(test_dtd_stop_processing)3588 START_TEST(test_dtd_stop_processing) {
3589 const char *text = "<!DOCTYPE doc [\n"
3590 "%foo;\n"
3591 "<!ENTITY bar 'bas'>\n"
3592 "]><doc/>";
3593
3594 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3595 init_dummy_handlers();
3596 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3597 == XML_STATUS_ERROR)
3598 xml_failure(g_parser);
3599 if (get_dummy_handler_flags() != 0)
3600 fail("DTD processing still going after undefined PE");
3601 }
3602 END_TEST
3603
3604 /* Test public notations with no system ID */
START_TEST(test_public_notation_no_sysid)3605 START_TEST(test_public_notation_no_sysid) {
3606 const char *text = "<!DOCTYPE doc [\n"
3607 "<!NOTATION note PUBLIC 'foo'>\n"
3608 "<!ELEMENT doc EMPTY>\n"
3609 "]>\n<doc/>";
3610
3611 init_dummy_handlers();
3612 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3613 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3614 == XML_STATUS_ERROR)
3615 xml_failure(g_parser);
3616 if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3617 fail("Notation declaration handler not called");
3618 }
3619 END_TEST
3620
START_TEST(test_nested_groups)3621 START_TEST(test_nested_groups) {
3622 const char *text
3623 = "<!DOCTYPE doc [\n"
3624 "<!ELEMENT doc "
3625 /* Sixteen elements per line */
3626 "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3627 "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3628 "))))))))))))))))))))))))))))))))>\n"
3629 "<!ELEMENT e EMPTY>"
3630 "]>\n"
3631 "<doc><e/></doc>";
3632 CharData storage;
3633
3634 CharData_Init(&storage);
3635 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3636 XML_SetStartElementHandler(g_parser, record_element_start_handler);
3637 XML_SetUserData(g_parser, &storage);
3638 init_dummy_handlers();
3639 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3640 == XML_STATUS_ERROR)
3641 xml_failure(g_parser);
3642 CharData_CheckXMLChars(&storage, XCS("doce"));
3643 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3644 fail("Element handler not fired");
3645 }
3646 END_TEST
3647
START_TEST(test_group_choice)3648 START_TEST(test_group_choice) {
3649 const char *text = "<!DOCTYPE doc [\n"
3650 "<!ELEMENT doc (a|b|c)+>\n"
3651 "<!ELEMENT a EMPTY>\n"
3652 "<!ELEMENT b (#PCDATA)>\n"
3653 "<!ELEMENT c ANY>\n"
3654 "]>\n"
3655 "<doc>\n"
3656 "<a/>\n"
3657 "<b attr='foo'>This is a foo</b>\n"
3658 "<c></c>\n"
3659 "</doc>\n";
3660
3661 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3662 init_dummy_handlers();
3663 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3664 == XML_STATUS_ERROR)
3665 xml_failure(g_parser);
3666 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3667 fail("Element handler flag not raised");
3668 }
3669 END_TEST
3670
START_TEST(test_standalone_parameter_entity)3671 START_TEST(test_standalone_parameter_entity) {
3672 const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3673 "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3674 "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3675 "%entity;\n"
3676 "]>\n"
3677 "<doc></doc>";
3678 char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3679
3680 XML_SetUserData(g_parser, dtd_data);
3681 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3682 XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3683 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3684 == XML_STATUS_ERROR)
3685 xml_failure(g_parser);
3686 }
3687 END_TEST
3688
3689 /* Test skipping of parameter entity in an external DTD */
3690 /* Derived from ibm/invalid/P69/ibm69i01.xml */
START_TEST(test_skipped_parameter_entity)3691 START_TEST(test_skipped_parameter_entity) {
3692 const char *text = "<?xml version='1.0'?>\n"
3693 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3694 "<!ELEMENT root (#PCDATA|a)* >\n"
3695 "]>\n"
3696 "<root></root>";
3697 ExtTest dtd_data = {"%pe2;", NULL, NULL};
3698
3699 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3700 XML_SetUserData(g_parser, &dtd_data);
3701 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3702 XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3703 init_dummy_handlers();
3704 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3705 == XML_STATUS_ERROR)
3706 xml_failure(g_parser);
3707 if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3708 fail("Skip handler not executed");
3709 }
3710 END_TEST
3711
3712 /* Test recursive parameter entity definition rejected in external DTD */
START_TEST(test_recursive_external_parameter_entity)3713 START_TEST(test_recursive_external_parameter_entity) {
3714 const char *text = "<?xml version='1.0'?>\n"
3715 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3716 "<!ELEMENT root (#PCDATA|a)* >\n"
3717 "]>\n"
3718 "<root></root>";
3719 ExtFaults dtd_data = {"<!ENTITY % pe2 '%pe2;'>\n%pe2;",
3720 "Recursive external parameter entity not faulted", NULL,
3721 XML_ERROR_RECURSIVE_ENTITY_REF};
3722
3723 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3724 XML_SetUserData(g_parser, &dtd_data);
3725 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3726 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3727 "Recursive external parameter not spotted");
3728 }
3729 END_TEST
3730
3731 /* Test undefined parameter entity in external entity handler */
START_TEST(test_undefined_ext_entity_in_external_dtd)3732 START_TEST(test_undefined_ext_entity_in_external_dtd) {
3733 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3734 "<doc></doc>\n";
3735
3736 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3737 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3738 XML_SetUserData(g_parser, NULL);
3739 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3740 == XML_STATUS_ERROR)
3741 xml_failure(g_parser);
3742
3743 /* Now repeat without the external entity ref handler invoking
3744 * another copy of itself.
3745 */
3746 XML_ParserReset(g_parser, NULL);
3747 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3748 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3749 XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3750 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3751 == XML_STATUS_ERROR)
3752 xml_failure(g_parser);
3753 }
3754 END_TEST
3755
3756 /* Test suspending the parse on receiving an XML declaration works */
START_TEST(test_suspend_xdecl)3757 START_TEST(test_suspend_xdecl) {
3758 const char *text = long_character_data_text;
3759
3760 XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3761 XML_SetUserData(g_parser, g_parser);
3762 g_resumable = XML_TRUE;
3763 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3764 // we won't know exactly how much input we actually managed to give Expat.
3765 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3766 != XML_STATUS_SUSPENDED)
3767 xml_failure(g_parser);
3768 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3769 xml_failure(g_parser);
3770 /* Attempt to start a new parse while suspended */
3771 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3772 != XML_STATUS_ERROR)
3773 fail("Attempt to parse while suspended not faulted");
3774 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3775 fail("Suspended parse not faulted with correct error");
3776 }
3777 END_TEST
3778
3779 /* Test aborting the parse in an epilog works */
START_TEST(test_abort_epilog)3780 START_TEST(test_abort_epilog) {
3781 const char *text = "<doc></doc>\n\r\n";
3782 XML_Char trigger_char = XCS('\r');
3783
3784 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3785 XML_SetUserData(g_parser, &trigger_char);
3786 g_resumable = XML_FALSE;
3787 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3788 != XML_STATUS_ERROR)
3789 fail("Abort not triggered");
3790 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3791 xml_failure(g_parser);
3792 }
3793 END_TEST
3794
3795 /* Test a different code path for abort in the epilog */
START_TEST(test_abort_epilog_2)3796 START_TEST(test_abort_epilog_2) {
3797 const char *text = "<doc></doc>\n";
3798 XML_Char trigger_char = XCS('\n');
3799
3800 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3801 XML_SetUserData(g_parser, &trigger_char);
3802 g_resumable = XML_FALSE;
3803 expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3804 }
3805 END_TEST
3806
3807 /* Test suspension from the epilog */
START_TEST(test_suspend_epilog)3808 START_TEST(test_suspend_epilog) {
3809 const char *text = "<doc></doc>\n";
3810 XML_Char trigger_char = XCS('\n');
3811
3812 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3813 XML_SetUserData(g_parser, &trigger_char);
3814 g_resumable = XML_TRUE;
3815 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3816 != XML_STATUS_SUSPENDED)
3817 xml_failure(g_parser);
3818 }
3819 END_TEST
3820
START_TEST(test_suspend_in_sole_empty_tag)3821 START_TEST(test_suspend_in_sole_empty_tag) {
3822 const char *text = "<doc/>";
3823 enum XML_Status rc;
3824
3825 XML_SetEndElementHandler(g_parser, suspending_end_handler);
3826 XML_SetUserData(g_parser, g_parser);
3827 rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3828 if (rc == XML_STATUS_ERROR)
3829 xml_failure(g_parser);
3830 else if (rc != XML_STATUS_SUSPENDED)
3831 fail("Suspend not triggered");
3832 rc = XML_ResumeParser(g_parser);
3833 if (rc == XML_STATUS_ERROR)
3834 xml_failure(g_parser);
3835 else if (rc != XML_STATUS_OK)
3836 fail("Resume failed");
3837 }
3838 END_TEST
3839
START_TEST(test_unfinished_epilog)3840 START_TEST(test_unfinished_epilog) {
3841 const char *text = "<doc></doc><";
3842
3843 expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3844 "Incomplete epilog entry not faulted");
3845 }
3846 END_TEST
3847
START_TEST(test_partial_char_in_epilog)3848 START_TEST(test_partial_char_in_epilog) {
3849 const char *text = "<doc></doc>\xe2\x82";
3850
3851 /* First check that no fault is raised if the parse is not finished */
3852 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3853 == XML_STATUS_ERROR)
3854 xml_failure(g_parser);
3855 /* Now check that it is faulted once we finish */
3856 if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3857 fail("Partial character in epilog not faulted");
3858 if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3859 xml_failure(g_parser);
3860 }
3861 END_TEST
3862
3863 /* Test resuming a parse suspended in entity substitution */
START_TEST(test_suspend_resume_internal_entity)3864 START_TEST(test_suspend_resume_internal_entity) {
3865 const char *text
3866 = "<!DOCTYPE doc [\n"
3867 "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3868 "]>\n"
3869 "<doc>&foo;</doc>\n";
3870 const XML_Char *expected1 = XCS("Hi");
3871 const XML_Char *expected2 = XCS("HiHo");
3872 CharData storage;
3873
3874 CharData_Init(&storage);
3875 XML_SetStartElementHandler(g_parser, start_element_suspender);
3876 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3877 XML_SetUserData(g_parser, &storage);
3878 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3879 // we won't know exactly how much input we actually managed to give Expat.
3880 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3881 != XML_STATUS_SUSPENDED)
3882 xml_failure(g_parser);
3883 CharData_CheckXMLChars(&storage, XCS(""));
3884 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3885 xml_failure(g_parser);
3886 CharData_CheckXMLChars(&storage, expected1);
3887 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3888 xml_failure(g_parser);
3889 CharData_CheckXMLChars(&storage, expected2);
3890 }
3891 END_TEST
3892
START_TEST(test_suspend_resume_internal_entity_issue_629)3893 START_TEST(test_suspend_resume_internal_entity_issue_629) {
3894 const char *const text
3895 = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3896 "<"
3897 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3898 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3899 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3900 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3901 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3902 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3903 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3904 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3905 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3906 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3907 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3908 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3909 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3910 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3911 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3912 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3913 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3914 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3915 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3916 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3917 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3918 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3919 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3920 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3921 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3922 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3923 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3924 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3925 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3926 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3927 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3928 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3929 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3930 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3931 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3932 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3933 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3934 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3935 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3936 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3937 "/>"
3938 "</b></a>";
3939 const size_t firstChunkSizeBytes = 54;
3940
3941 XML_Parser parser = XML_ParserCreate(NULL);
3942 XML_SetUserData(parser, parser);
3943 XML_SetCommentHandler(parser, suspending_comment_handler);
3944
3945 if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3946 != XML_STATUS_SUSPENDED)
3947 xml_failure(parser);
3948 if (XML_ResumeParser(parser) != XML_STATUS_OK)
3949 xml_failure(parser);
3950 if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3951 (int)(strlen(text) - firstChunkSizeBytes),
3952 XML_TRUE)
3953 != XML_STATUS_OK)
3954 xml_failure(parser);
3955 XML_ParserFree(parser);
3956 }
3957 END_TEST
3958
3959 /* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error)3960 START_TEST(test_resume_entity_with_syntax_error) {
3961 if (g_chunkSize != 0) {
3962 // this test does not use SINGLE_BYTES, because of suspension
3963 return;
3964 }
3965
3966 const char *text = "<!DOCTYPE doc [\n"
3967 "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3968 "]>\n"
3969 "<doc>&foo;</doc>\n";
3970
3971 XML_SetStartElementHandler(g_parser, start_element_suspender);
3972 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3973 // we won't know exactly how much input we actually managed to give Expat.
3974 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3975 != XML_STATUS_SUSPENDED)
3976 xml_failure(g_parser);
3977 if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
3978 fail("Syntax error in entity not faulted");
3979 if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
3980 xml_failure(g_parser);
3981 }
3982 END_TEST
3983
3984 /* Test suspending and resuming in a parameter entity substitution */
START_TEST(test_suspend_resume_parameter_entity)3985 START_TEST(test_suspend_resume_parameter_entity) {
3986 const char *text = "<!DOCTYPE doc [\n"
3987 "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
3988 "%foo;\n"
3989 "]>\n"
3990 "<doc>Hello, world</doc>";
3991 const XML_Char *expected = XCS("Hello, world");
3992 CharData storage;
3993
3994 CharData_Init(&storage);
3995 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3996 XML_SetElementDeclHandler(g_parser, element_decl_suspender);
3997 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3998 XML_SetUserData(g_parser, &storage);
3999 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4000 != XML_STATUS_SUSPENDED)
4001 xml_failure(g_parser);
4002 CharData_CheckXMLChars(&storage, XCS(""));
4003 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
4004 xml_failure(g_parser);
4005 CharData_CheckXMLChars(&storage, expected);
4006 }
4007 END_TEST
4008
4009 /* Test attempting to use parser after an error is faulted */
START_TEST(test_restart_on_error)4010 START_TEST(test_restart_on_error) {
4011 const char *text = "<$doc><doc></doc>";
4012
4013 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4014 != XML_STATUS_ERROR)
4015 fail("Invalid tag name not faulted");
4016 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
4017 xml_failure(g_parser);
4018 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
4019 fail("Restarting invalid parse not faulted");
4020 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
4021 xml_failure(g_parser);
4022 }
4023 END_TEST
4024
4025 /* Test that angle brackets in an attribute default value are faulted */
START_TEST(test_reject_lt_in_attribute_value)4026 START_TEST(test_reject_lt_in_attribute_value) {
4027 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
4028 "<doc></doc>";
4029
4030 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4031 "Bad attribute default not faulted");
4032 }
4033 END_TEST
4034
START_TEST(test_reject_unfinished_param_in_att_value)4035 START_TEST(test_reject_unfinished_param_in_att_value) {
4036 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
4037 "<doc></doc>";
4038
4039 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4040 "Bad attribute default not faulted");
4041 }
4042 END_TEST
4043
START_TEST(test_trailing_cr_in_att_value)4044 START_TEST(test_trailing_cr_in_att_value) {
4045 const char *text = "<doc a='value\r'/>";
4046
4047 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4048 == XML_STATUS_ERROR)
4049 xml_failure(g_parser);
4050 }
4051 END_TEST
4052
4053 /* Try parsing a general entity within a parameter entity in a
4054 * standalone internal DTD. Covers a corner case in the parser.
4055 */
START_TEST(test_standalone_internal_entity)4056 START_TEST(test_standalone_internal_entity) {
4057 const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
4058 "<!DOCTYPE doc [\n"
4059 " <!ELEMENT doc (#PCDATA)>\n"
4060 " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"≥\">'>\n"
4061 " <!ENTITY ge 'AttDefaultValue'>\n"
4062 " %pe;\n"
4063 "]>\n"
4064 "<doc att2='any'/>";
4065
4066 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4067 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4068 == XML_STATUS_ERROR)
4069 xml_failure(g_parser);
4070 }
4071 END_TEST
4072
4073 /* Test that a reference to an unknown external entity is skipped */
START_TEST(test_skipped_external_entity)4074 START_TEST(test_skipped_external_entity) {
4075 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
4076 "<doc></doc>\n";
4077 ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
4078 "<!ENTITY % e2 '%e1;'>\n",
4079 NULL, NULL};
4080
4081 XML_SetUserData(g_parser, &test_data);
4082 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4083 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4084 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4085 == XML_STATUS_ERROR)
4086 xml_failure(g_parser);
4087 }
4088 END_TEST
4089
4090 /* Test a different form of unknown external entity */
START_TEST(test_skipped_null_loaded_ext_entity)4091 START_TEST(test_skipped_null_loaded_ext_entity) {
4092 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
4093 "<doc />";
4094 ExtHdlrData test_data
4095 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
4096 "<!ENTITY % pe2 '%pe1;'>\n"
4097 "%pe2;\n",
4098 external_entity_null_loader, NULL};
4099
4100 XML_SetUserData(g_parser, &test_data);
4101 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4102 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
4103 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4104 == XML_STATUS_ERROR)
4105 xml_failure(g_parser);
4106 }
4107 END_TEST
4108
START_TEST(test_skipped_unloaded_ext_entity)4109 START_TEST(test_skipped_unloaded_ext_entity) {
4110 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
4111 "<doc />";
4112 ExtHdlrData test_data
4113 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
4114 "<!ENTITY % pe2 '%pe1;'>\n"
4115 "%pe2;\n",
4116 NULL, NULL};
4117
4118 XML_SetUserData(g_parser, &test_data);
4119 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4120 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
4121 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4122 == XML_STATUS_ERROR)
4123 xml_failure(g_parser);
4124 }
4125 END_TEST
4126
4127 /* Test that a parameter entity value ending with a carriage return
4128 * has it translated internally into a newline.
4129 */
START_TEST(test_param_entity_with_trailing_cr)4130 START_TEST(test_param_entity_with_trailing_cr) {
4131 #define PARAM_ENTITY_NAME "pe"
4132 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
4133 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
4134 "<doc/>";
4135 ExtTest test_data
4136 = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
4137 "%" PARAM_ENTITY_NAME ";\n",
4138 NULL, NULL};
4139
4140 XML_SetUserData(g_parser, &test_data);
4141 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4142 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4143 XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
4144 param_entity_match_init(XCS(PARAM_ENTITY_NAME),
4145 XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
4146 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4147 == XML_STATUS_ERROR)
4148 xml_failure(g_parser);
4149 int entity_match_flag = get_param_entity_match_flag();
4150 if (entity_match_flag == ENTITY_MATCH_FAIL)
4151 fail("Parameter entity CR->NEWLINE conversion failed");
4152 else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
4153 fail("Parameter entity not parsed");
4154 }
4155 #undef PARAM_ENTITY_NAME
4156 #undef PARAM_ENTITY_CORE_VALUE
4157 END_TEST
4158
START_TEST(test_invalid_character_entity)4159 START_TEST(test_invalid_character_entity) {
4160 const char *text = "<!DOCTYPE doc [\n"
4161 " <!ENTITY entity '�'>\n"
4162 "]>\n"
4163 "<doc>&entity;</doc>";
4164
4165 expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4166 "Out of range character reference not faulted");
4167 }
4168 END_TEST
4169
START_TEST(test_invalid_character_entity_2)4170 START_TEST(test_invalid_character_entity_2) {
4171 const char *text = "<!DOCTYPE doc [\n"
4172 " <!ENTITY entity '&#xg0;'>\n"
4173 "]>\n"
4174 "<doc>&entity;</doc>";
4175
4176 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4177 "Out of range character reference not faulted");
4178 }
4179 END_TEST
4180
START_TEST(test_invalid_character_entity_3)4181 START_TEST(test_invalid_character_entity_3) {
4182 const char text[] =
4183 /* <!DOCTYPE doc [\n */
4184 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4185 /* U+0E04 = KHO KHWAI
4186 * U+0E08 = CHO CHAN */
4187 /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
4188 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
4189 "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
4190 /* ]>\n */
4191 "\0]\0>\0\n"
4192 /* <doc>&entity;</doc> */
4193 "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
4194
4195 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4196 != XML_STATUS_ERROR)
4197 fail("Invalid start of entity name not faulted");
4198 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4199 xml_failure(g_parser);
4200 }
4201 END_TEST
4202
START_TEST(test_invalid_character_entity_4)4203 START_TEST(test_invalid_character_entity_4) {
4204 const char *text = "<!DOCTYPE doc [\n"
4205 " <!ENTITY entity '�'>\n" /* = � */
4206 "]>\n"
4207 "<doc>&entity;</doc>";
4208
4209 expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4210 "Out of range character reference not faulted");
4211 }
4212 END_TEST
4213
4214 /* Test that processing instructions are picked up by a default handler */
START_TEST(test_pi_handled_in_default)4215 START_TEST(test_pi_handled_in_default) {
4216 const char *text = "<?test processing instruction?>\n<doc/>";
4217 const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4218 CharData storage;
4219
4220 CharData_Init(&storage);
4221 XML_SetDefaultHandler(g_parser, accumulate_characters);
4222 XML_SetUserData(g_parser, &storage);
4223 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4224 == XML_STATUS_ERROR)
4225 xml_failure(g_parser);
4226 CharData_CheckXMLChars(&storage, expected);
4227 }
4228 END_TEST
4229
4230 /* Test that comments are picked up by a default handler */
START_TEST(test_comment_handled_in_default)4231 START_TEST(test_comment_handled_in_default) {
4232 const char *text = "<!-- This is a comment -->\n<doc/>";
4233 const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4234 CharData storage;
4235
4236 CharData_Init(&storage);
4237 XML_SetDefaultHandler(g_parser, accumulate_characters);
4238 XML_SetUserData(g_parser, &storage);
4239 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4240 == XML_STATUS_ERROR)
4241 xml_failure(g_parser);
4242 CharData_CheckXMLChars(&storage, expected);
4243 }
4244 END_TEST
4245
4246 /* Test PIs that look almost but not quite like XML declarations */
START_TEST(test_pi_yml)4247 START_TEST(test_pi_yml) {
4248 const char *text = "<?yml something like data?><doc/>";
4249 const XML_Char *expected = XCS("yml: something like data\n");
4250 CharData storage;
4251
4252 CharData_Init(&storage);
4253 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4254 XML_SetUserData(g_parser, &storage);
4255 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4256 == XML_STATUS_ERROR)
4257 xml_failure(g_parser);
4258 CharData_CheckXMLChars(&storage, expected);
4259 }
4260 END_TEST
4261
START_TEST(test_pi_xnl)4262 START_TEST(test_pi_xnl) {
4263 const char *text = "<?xnl nothing like data?><doc/>";
4264 const XML_Char *expected = XCS("xnl: nothing like data\n");
4265 CharData storage;
4266
4267 CharData_Init(&storage);
4268 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4269 XML_SetUserData(g_parser, &storage);
4270 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4271 == XML_STATUS_ERROR)
4272 xml_failure(g_parser);
4273 CharData_CheckXMLChars(&storage, expected);
4274 }
4275 END_TEST
4276
START_TEST(test_pi_xmm)4277 START_TEST(test_pi_xmm) {
4278 const char *text = "<?xmm everything like data?><doc/>";
4279 const XML_Char *expected = XCS("xmm: everything like data\n");
4280 CharData storage;
4281
4282 CharData_Init(&storage);
4283 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4284 XML_SetUserData(g_parser, &storage);
4285 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4286 == XML_STATUS_ERROR)
4287 xml_failure(g_parser);
4288 CharData_CheckXMLChars(&storage, expected);
4289 }
4290 END_TEST
4291
START_TEST(test_utf16_pi)4292 START_TEST(test_utf16_pi) {
4293 const char text[] =
4294 /* <?{KHO KHWAI}{CHO CHAN}?>
4295 * where {KHO KHWAI} = U+0E04
4296 * and {CHO CHAN} = U+0E08
4297 */
4298 "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4299 /* <q/> */
4300 "<\0q\0/\0>\0";
4301 #ifdef XML_UNICODE
4302 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4303 #else
4304 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4305 #endif
4306 CharData storage;
4307
4308 CharData_Init(&storage);
4309 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4310 XML_SetUserData(g_parser, &storage);
4311 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4312 == XML_STATUS_ERROR)
4313 xml_failure(g_parser);
4314 CharData_CheckXMLChars(&storage, expected);
4315 }
4316 END_TEST
4317
START_TEST(test_utf16_be_pi)4318 START_TEST(test_utf16_be_pi) {
4319 const char text[] =
4320 /* <?{KHO KHWAI}{CHO CHAN}?>
4321 * where {KHO KHWAI} = U+0E04
4322 * and {CHO CHAN} = U+0E08
4323 */
4324 "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4325 /* <q/> */
4326 "\0<\0q\0/\0>";
4327 #ifdef XML_UNICODE
4328 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4329 #else
4330 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4331 #endif
4332 CharData storage;
4333
4334 CharData_Init(&storage);
4335 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4336 XML_SetUserData(g_parser, &storage);
4337 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4338 == XML_STATUS_ERROR)
4339 xml_failure(g_parser);
4340 CharData_CheckXMLChars(&storage, expected);
4341 }
4342 END_TEST
4343
4344 /* Test that comments can be picked up and translated */
START_TEST(test_utf16_be_comment)4345 START_TEST(test_utf16_be_comment) {
4346 const char text[] =
4347 /* <!-- Comment A --> */
4348 "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4349 /* <doc/> */
4350 "\0<\0d\0o\0c\0/\0>";
4351 const XML_Char *expected = XCS(" Comment A ");
4352 CharData storage;
4353
4354 CharData_Init(&storage);
4355 XML_SetCommentHandler(g_parser, accumulate_comment);
4356 XML_SetUserData(g_parser, &storage);
4357 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4358 == XML_STATUS_ERROR)
4359 xml_failure(g_parser);
4360 CharData_CheckXMLChars(&storage, expected);
4361 }
4362 END_TEST
4363
START_TEST(test_utf16_le_comment)4364 START_TEST(test_utf16_le_comment) {
4365 const char text[] =
4366 /* <!-- Comment B --> */
4367 "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4368 /* <doc/> */
4369 "<\0d\0o\0c\0/\0>\0";
4370 const XML_Char *expected = XCS(" Comment B ");
4371 CharData storage;
4372
4373 CharData_Init(&storage);
4374 XML_SetCommentHandler(g_parser, accumulate_comment);
4375 XML_SetUserData(g_parser, &storage);
4376 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4377 == XML_STATUS_ERROR)
4378 xml_failure(g_parser);
4379 CharData_CheckXMLChars(&storage, expected);
4380 }
4381 END_TEST
4382
4383 /* Test that the unknown encoding handler with map entries that expect
4384 * conversion but no conversion function is faulted
4385 */
START_TEST(test_missing_encoding_conversion_fn)4386 START_TEST(test_missing_encoding_conversion_fn) {
4387 const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4388 "<doc>\x81</doc>";
4389
4390 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4391 /* MiscEncodingHandler sets up an encoding with every top-bit-set
4392 * character introducing a two-byte sequence. For this, it
4393 * requires a convert function. The above function call doesn't
4394 * pass one through, so when BadEncodingHandler actually gets
4395 * called it should supply an invalid encoding.
4396 */
4397 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4398 "Encoding with missing convert() not faulted");
4399 }
4400 END_TEST
4401
START_TEST(test_failing_encoding_conversion_fn)4402 START_TEST(test_failing_encoding_conversion_fn) {
4403 const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4404 "<doc>\x81</doc>";
4405
4406 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4407 /* BadEncodingHandler sets up an encoding with every top-bit-set
4408 * character introducing a two-byte sequence. For this, it
4409 * requires a convert function. The above function call passes
4410 * one that insists all possible sequences are invalid anyway.
4411 */
4412 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4413 "Encoding with failing convert() not faulted");
4414 }
4415 END_TEST
4416
4417 /* Test unknown encoding conversions */
START_TEST(test_unknown_encoding_success)4418 START_TEST(test_unknown_encoding_success) {
4419 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4420 /* Equivalent to <eoc>Hello, world</eoc> */
4421 "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4422
4423 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4424 run_character_check(text, XCS("Hello, world"));
4425 }
4426 END_TEST
4427
4428 /* Test bad name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name)4429 START_TEST(test_unknown_encoding_bad_name) {
4430 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4431 "<\xff\x64oc>Hello, world</\xff\x64oc>";
4432
4433 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4434 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4435 "Bad name start in unknown encoding not faulted");
4436 }
4437 END_TEST
4438
4439 /* Test bad mid-name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name_2)4440 START_TEST(test_unknown_encoding_bad_name_2) {
4441 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4442 "<d\xffoc>Hello, world</d\xffoc>";
4443
4444 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4445 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4446 "Bad name in unknown encoding not faulted");
4447 }
4448 END_TEST
4449
4450 /* Test element name that is long enough to fill the conversion buffer
4451 * in an unknown encoding, finishing with an encoded character.
4452 */
START_TEST(test_unknown_encoding_long_name_1)4453 START_TEST(test_unknown_encoding_long_name_1) {
4454 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4455 "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4456 "Hi"
4457 "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4458 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4459 CharData storage;
4460
4461 CharData_Init(&storage);
4462 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4463 XML_SetStartElementHandler(g_parser, record_element_start_handler);
4464 XML_SetUserData(g_parser, &storage);
4465 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4466 == XML_STATUS_ERROR)
4467 xml_failure(g_parser);
4468 CharData_CheckXMLChars(&storage, expected);
4469 }
4470 END_TEST
4471
4472 /* Test element name that is long enough to fill the conversion buffer
4473 * in an unknown encoding, finishing with an simple character.
4474 */
START_TEST(test_unknown_encoding_long_name_2)4475 START_TEST(test_unknown_encoding_long_name_2) {
4476 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4477 "<abcdefghabcdefghabcdefghijklmnop>"
4478 "Hi"
4479 "</abcdefghabcdefghabcdefghijklmnop>";
4480 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4481 CharData storage;
4482
4483 CharData_Init(&storage);
4484 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4485 XML_SetStartElementHandler(g_parser, record_element_start_handler);
4486 XML_SetUserData(g_parser, &storage);
4487 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4488 == XML_STATUS_ERROR)
4489 xml_failure(g_parser);
4490 CharData_CheckXMLChars(&storage, expected);
4491 }
4492 END_TEST
4493
START_TEST(test_invalid_unknown_encoding)4494 START_TEST(test_invalid_unknown_encoding) {
4495 const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4496 "<doc>Hello world</doc>";
4497
4498 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4499 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4500 "Invalid unknown encoding not faulted");
4501 }
4502 END_TEST
4503
START_TEST(test_unknown_ascii_encoding_ok)4504 START_TEST(test_unknown_ascii_encoding_ok) {
4505 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4506 "<doc>Hello, world</doc>";
4507
4508 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4509 run_character_check(text, XCS("Hello, world"));
4510 }
4511 END_TEST
4512
START_TEST(test_unknown_ascii_encoding_fail)4513 START_TEST(test_unknown_ascii_encoding_fail) {
4514 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4515 "<doc>Hello, \x80 world</doc>";
4516
4517 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4518 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4519 "Invalid character not faulted");
4520 }
4521 END_TEST
4522
START_TEST(test_unknown_encoding_invalid_length)4523 START_TEST(test_unknown_encoding_invalid_length) {
4524 const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4525 "<doc>Hello, world</doc>";
4526
4527 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4528 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4529 "Invalid unknown encoding not faulted");
4530 }
4531 END_TEST
4532
START_TEST(test_unknown_encoding_invalid_topbit)4533 START_TEST(test_unknown_encoding_invalid_topbit) {
4534 const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4535 "<doc>Hello, world</doc>";
4536
4537 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4538 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4539 "Invalid unknown encoding not faulted");
4540 }
4541 END_TEST
4542
START_TEST(test_unknown_encoding_invalid_surrogate)4543 START_TEST(test_unknown_encoding_invalid_surrogate) {
4544 const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4545 "<doc>Hello, \x82 world</doc>";
4546
4547 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4548 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4549 "Invalid unknown encoding not faulted");
4550 }
4551 END_TEST
4552
START_TEST(test_unknown_encoding_invalid_high)4553 START_TEST(test_unknown_encoding_invalid_high) {
4554 const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4555 "<doc>Hello, world</doc>";
4556
4557 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4558 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4559 "Invalid unknown encoding not faulted");
4560 }
4561 END_TEST
4562
START_TEST(test_unknown_encoding_invalid_attr_value)4563 START_TEST(test_unknown_encoding_invalid_attr_value) {
4564 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4565 "<doc attr='\xff\x30'/>";
4566
4567 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4568 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4569 "Invalid attribute valid not faulted");
4570 }
4571 END_TEST
4572
4573 /* Test an external entity parser set to use latin-1 detects UTF-16
4574 * BOMs correctly.
4575 */
4576 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
START_TEST(test_ext_entity_latin1_utf16le_bom)4577 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4578 const char *text = "<!DOCTYPE doc [\n"
4579 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4580 "]>\n"
4581 "<doc>&en;</doc>";
4582 ExtTest2 test_data
4583 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4584 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4585 * 0x4c = L and 0x20 is a space
4586 */
4587 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4588 #ifdef XML_UNICODE
4589 const XML_Char *expected = XCS("\x00ff\x00feL ");
4590 #else
4591 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4592 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4593 #endif
4594 CharData storage;
4595
4596 CharData_Init(&storage);
4597 test_data.storage = &storage;
4598 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4599 XML_SetUserData(g_parser, &test_data);
4600 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4601 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4602 == XML_STATUS_ERROR)
4603 xml_failure(g_parser);
4604 CharData_CheckXMLChars(&storage, expected);
4605 }
4606 END_TEST
4607
START_TEST(test_ext_entity_latin1_utf16be_bom)4608 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4609 const char *text = "<!DOCTYPE doc [\n"
4610 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4611 "]>\n"
4612 "<doc>&en;</doc>";
4613 ExtTest2 test_data
4614 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4615 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4616 * 0x4c = L and 0x20 is a space
4617 */
4618 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4619 #ifdef XML_UNICODE
4620 const XML_Char *expected = XCS("\x00fe\x00ff L");
4621 #else
4622 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4623 const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4624 #endif
4625 CharData storage;
4626
4627 CharData_Init(&storage);
4628 test_data.storage = &storage;
4629 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4630 XML_SetUserData(g_parser, &test_data);
4631 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4632 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4633 == XML_STATUS_ERROR)
4634 xml_failure(g_parser);
4635 CharData_CheckXMLChars(&storage, expected);
4636 }
4637 END_TEST
4638
4639 /* Parsing the full buffer rather than a byte at a time makes a
4640 * difference to the encoding scanning code, so repeat the above tests
4641 * without breaking them down by byte.
4642 */
START_TEST(test_ext_entity_latin1_utf16le_bom2)4643 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4644 const char *text = "<!DOCTYPE doc [\n"
4645 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4646 "]>\n"
4647 "<doc>&en;</doc>";
4648 ExtTest2 test_data
4649 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4650 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4651 * 0x4c = L and 0x20 is a space
4652 */
4653 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4654 #ifdef XML_UNICODE
4655 const XML_Char *expected = XCS("\x00ff\x00feL ");
4656 #else
4657 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4658 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4659 #endif
4660 CharData storage;
4661
4662 CharData_Init(&storage);
4663 test_data.storage = &storage;
4664 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4665 XML_SetUserData(g_parser, &test_data);
4666 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4667 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4668 == XML_STATUS_ERROR)
4669 xml_failure(g_parser);
4670 CharData_CheckXMLChars(&storage, expected);
4671 }
4672 END_TEST
4673
START_TEST(test_ext_entity_latin1_utf16be_bom2)4674 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4675 const char *text = "<!DOCTYPE doc [\n"
4676 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4677 "]>\n"
4678 "<doc>&en;</doc>";
4679 ExtTest2 test_data
4680 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4681 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4682 * 0x4c = L and 0x20 is a space
4683 */
4684 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4685 #ifdef XML_UNICODE
4686 const XML_Char *expected = XCS("\x00fe\x00ff L");
4687 #else
4688 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4689 const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4690 #endif
4691 CharData storage;
4692
4693 CharData_Init(&storage);
4694 test_data.storage = &storage;
4695 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4696 XML_SetUserData(g_parser, &test_data);
4697 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4698 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4699 == XML_STATUS_ERROR)
4700 xml_failure(g_parser);
4701 CharData_CheckXMLChars(&storage, expected);
4702 }
4703 END_TEST
4704
4705 /* Test little-endian UTF-16 given an explicit big-endian encoding */
START_TEST(test_ext_entity_utf16_be)4706 START_TEST(test_ext_entity_utf16_be) {
4707 const char *text = "<!DOCTYPE doc [\n"
4708 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4709 "]>\n"
4710 "<doc>&en;</doc>";
4711 ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4712 #ifdef XML_UNICODE
4713 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4714 #else
4715 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
4716 "\xe6\x94\x80" /* U+6500 */
4717 "\xe2\xbc\x80" /* U+2F00 */
4718 "\xe3\xb8\x80"); /* U+3E00 */
4719 #endif
4720 CharData storage;
4721
4722 CharData_Init(&storage);
4723 test_data.storage = &storage;
4724 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4725 XML_SetUserData(g_parser, &test_data);
4726 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4727 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4728 == XML_STATUS_ERROR)
4729 xml_failure(g_parser);
4730 CharData_CheckXMLChars(&storage, expected);
4731 }
4732 END_TEST
4733
4734 /* Test big-endian UTF-16 given an explicit little-endian encoding */
START_TEST(test_ext_entity_utf16_le)4735 START_TEST(test_ext_entity_utf16_le) {
4736 const char *text = "<!DOCTYPE doc [\n"
4737 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4738 "]>\n"
4739 "<doc>&en;</doc>";
4740 ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4741 #ifdef XML_UNICODE
4742 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4743 #else
4744 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
4745 "\xe6\x94\x80" /* U+6500 */
4746 "\xe2\xbc\x80" /* U+2F00 */
4747 "\xe3\xb8\x80"); /* U+3E00 */
4748 #endif
4749 CharData storage;
4750
4751 CharData_Init(&storage);
4752 test_data.storage = &storage;
4753 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4754 XML_SetUserData(g_parser, &test_data);
4755 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4756 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4757 == XML_STATUS_ERROR)
4758 xml_failure(g_parser);
4759 CharData_CheckXMLChars(&storage, expected);
4760 }
4761 END_TEST
4762
4763 /* Test little-endian UTF-16 given no explicit encoding.
4764 * The existing default encoding (UTF-8) is assumed to hold without a
4765 * BOM to contradict it, so the entity value will in fact provoke an
4766 * error because 0x00 is not a valid XML character. We parse the
4767 * whole buffer in one go rather than feeding it in byte by byte to
4768 * exercise different code paths in the initial scanning routines.
4769 */
START_TEST(test_ext_entity_utf16_unknown)4770 START_TEST(test_ext_entity_utf16_unknown) {
4771 const char *text = "<!DOCTYPE doc [\n"
4772 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4773 "]>\n"
4774 "<doc>&en;</doc>";
4775 ExtFaults2 test_data
4776 = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4777 XML_ERROR_INVALID_TOKEN};
4778
4779 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4780 XML_SetUserData(g_parser, &test_data);
4781 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4782 "Invalid character should not have been accepted");
4783 }
4784 END_TEST
4785
4786 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
START_TEST(test_ext_entity_utf8_non_bom)4787 START_TEST(test_ext_entity_utf8_non_bom) {
4788 const char *text = "<!DOCTYPE doc [\n"
4789 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4790 "]>\n"
4791 "<doc>&en;</doc>";
4792 ExtTest2 test_data
4793 = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4794 3, NULL, NULL};
4795 #ifdef XML_UNICODE
4796 const XML_Char *expected = XCS("\xfec0");
4797 #else
4798 const XML_Char *expected = XCS("\xef\xbb\x80");
4799 #endif
4800 CharData storage;
4801
4802 CharData_Init(&storage);
4803 test_data.storage = &storage;
4804 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4805 XML_SetUserData(g_parser, &test_data);
4806 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4807 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4808 == XML_STATUS_ERROR)
4809 xml_failure(g_parser);
4810 CharData_CheckXMLChars(&storage, expected);
4811 }
4812 END_TEST
4813
4814 /* Test that UTF-8 in a CDATA section is correctly passed through */
START_TEST(test_utf8_in_cdata_section)4815 START_TEST(test_utf8_in_cdata_section) {
4816 const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4817 #ifdef XML_UNICODE
4818 const XML_Char *expected = XCS("one \x00e9 two");
4819 #else
4820 const XML_Char *expected = XCS("one \xc3\xa9 two");
4821 #endif
4822
4823 run_character_check(text, expected);
4824 }
4825 END_TEST
4826
4827 /* Test that little-endian UTF-16 in a CDATA section is handled */
START_TEST(test_utf8_in_cdata_section_2)4828 START_TEST(test_utf8_in_cdata_section_2) {
4829 const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4830 #ifdef XML_UNICODE
4831 const XML_Char *expected = XCS("\x00e9]\x00e9two");
4832 #else
4833 const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4834 #endif
4835
4836 run_character_check(text, expected);
4837 }
4838 END_TEST
4839
START_TEST(test_utf8_in_start_tags)4840 START_TEST(test_utf8_in_start_tags) {
4841 struct test_case {
4842 bool goodName;
4843 bool goodNameStart;
4844 const char *tagName;
4845 };
4846
4847 // The idea with the tests below is this:
4848 // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4849 // go to isNever and are hence not a concern.
4850 //
4851 // We start with a character that is a valid name character
4852 // (or even name-start character, see XML 1.0r4 spec) and then we flip
4853 // single bits at places where (1) the result leaves the UTF-8 encoding space
4854 // and (2) we stay in the same n-byte sequence family.
4855 //
4856 // The flipped bits are highlighted in angle brackets in comments,
4857 // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4858 // the most significant bit to 1 to leave UTF-8 encoding space.
4859 struct test_case cases[] = {
4860 // 1-byte UTF-8: [0xxx xxxx]
4861 {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
4862 {false, false, "\xBA"}, // [<1>011 1010]
4863 {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
4864 {false, false, "\xB9"}, // [<1>011 1001]
4865
4866 // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4867 {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
4868 // Arabic small waw U+06E5
4869 {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4870 {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4871 {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4872 {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
4873 // combining char U+0301
4874 {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4875 {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4876 {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4877
4878 // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4879 {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
4880 // Devanagari Letter A U+0905
4881 {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4882 {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4883 {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4884 {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4885 {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4886 {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
4887 // combining char U+0901
4888 {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4889 {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4890 {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4891 {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4892 {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4893 };
4894 const bool atNameStart[] = {true, false};
4895
4896 size_t i = 0;
4897 char doc[1024];
4898 size_t failCount = 0;
4899
4900 // we need all the bytes to be parsed, but we don't want the errors that can
4901 // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4902 if (g_reparseDeferralEnabledDefault) {
4903 return;
4904 }
4905
4906 for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4907 size_t j = 0;
4908 for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4909 const bool expectedSuccess
4910 = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4911 snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4912 cases[i].tagName);
4913 XML_Parser parser = XML_ParserCreate(NULL);
4914
4915 const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4916 parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4917
4918 bool success = true;
4919 if ((status == XML_STATUS_OK) != expectedSuccess) {
4920 success = false;
4921 }
4922 if ((status == XML_STATUS_ERROR)
4923 && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4924 success = false;
4925 }
4926
4927 if (! success) {
4928 fprintf(
4929 stderr,
4930 "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
4931 (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
4932 (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
4933 failCount++;
4934 }
4935
4936 XML_ParserFree(parser);
4937 }
4938 }
4939
4940 if (failCount > 0) {
4941 fail("UTF-8 regression detected");
4942 }
4943 }
4944 END_TEST
4945
4946 /* Test trailing spaces in elements are accepted */
START_TEST(test_trailing_spaces_in_elements)4947 START_TEST(test_trailing_spaces_in_elements) {
4948 const char *text = "<doc >Hi</doc >";
4949 const XML_Char *expected = XCS("doc/doc");
4950 CharData storage;
4951
4952 CharData_Init(&storage);
4953 XML_SetElementHandler(g_parser, record_element_start_handler,
4954 record_element_end_handler);
4955 XML_SetUserData(g_parser, &storage);
4956 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4957 == XML_STATUS_ERROR)
4958 xml_failure(g_parser);
4959 CharData_CheckXMLChars(&storage, expected);
4960 }
4961 END_TEST
4962
START_TEST(test_utf16_attribute)4963 START_TEST(test_utf16_attribute) {
4964 const char text[] =
4965 /* <d {KHO KHWAI}{CHO CHAN}='a'/>
4966 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4967 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4968 */
4969 "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
4970 const XML_Char *expected = XCS("a");
4971 CharData storage;
4972
4973 CharData_Init(&storage);
4974 XML_SetStartElementHandler(g_parser, accumulate_attribute);
4975 XML_SetUserData(g_parser, &storage);
4976 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4977 == XML_STATUS_ERROR)
4978 xml_failure(g_parser);
4979 CharData_CheckXMLChars(&storage, expected);
4980 }
4981 END_TEST
4982
START_TEST(test_utf16_second_attr)4983 START_TEST(test_utf16_second_attr) {
4984 /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
4985 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4986 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4987 */
4988 const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
4989 "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
4990 const XML_Char *expected = XCS("1");
4991 CharData storage;
4992
4993 CharData_Init(&storage);
4994 XML_SetStartElementHandler(g_parser, accumulate_attribute);
4995 XML_SetUserData(g_parser, &storage);
4996 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4997 == XML_STATUS_ERROR)
4998 xml_failure(g_parser);
4999 CharData_CheckXMLChars(&storage, expected);
5000 }
5001 END_TEST
5002
START_TEST(test_attr_after_solidus)5003 START_TEST(test_attr_after_solidus) {
5004 const char *text = "<doc attr1='a' / attr2='b'>";
5005
5006 expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
5007 }
5008 END_TEST
5009
START_TEST(test_utf16_pe)5010 START_TEST(test_utf16_pe) {
5011 /* <!DOCTYPE doc [
5012 * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
5013 * %{KHO KHWAI}{CHO CHAN};
5014 * ]>
5015 * <doc></doc>
5016 *
5017 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5018 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5019 */
5020 const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
5021 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
5022 "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
5023 "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
5024 "\0%\x0e\x04\x0e\x08\0;\0\n"
5025 "\0]\0>\0\n"
5026 "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
5027 #ifdef XML_UNICODE
5028 const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
5029 #else
5030 const XML_Char *expected
5031 = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
5032 #endif
5033 CharData storage;
5034
5035 CharData_Init(&storage);
5036 XML_SetUserData(g_parser, &storage);
5037 XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
5038 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5039 == XML_STATUS_ERROR)
5040 xml_failure(g_parser);
5041 CharData_CheckXMLChars(&storage, expected);
5042 }
5043 END_TEST
5044
5045 /* Test that duff attribute description keywords are rejected */
START_TEST(test_bad_attr_desc_keyword)5046 START_TEST(test_bad_attr_desc_keyword) {
5047 const char *text = "<!DOCTYPE doc [\n"
5048 " <!ATTLIST doc attr CDATA #!IMPLIED>\n"
5049 "]>\n"
5050 "<doc />";
5051
5052 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5053 "Bad keyword !IMPLIED not faulted");
5054 }
5055 END_TEST
5056
5057 /* Test that an invalid attribute description keyword consisting of
5058 * UTF-16 characters with their top bytes non-zero are correctly
5059 * faulted
5060 */
START_TEST(test_bad_attr_desc_keyword_utf16)5061 START_TEST(test_bad_attr_desc_keyword_utf16) {
5062 /* <!DOCTYPE d [
5063 * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
5064 * ]><d/>
5065 *
5066 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5067 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5068 */
5069 const char text[]
5070 = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5071 "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
5072 "\0#\x0e\x04\x0e\x08\0>\0\n"
5073 "\0]\0>\0<\0d\0/\0>";
5074
5075 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5076 != XML_STATUS_ERROR)
5077 fail("Invalid UTF16 attribute keyword not faulted");
5078 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
5079 xml_failure(g_parser);
5080 }
5081 END_TEST
5082
5083 /* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this
5084 * using prefix-encoding (see above) to trigger specific code paths
5085 */
START_TEST(test_bad_doctype)5086 START_TEST(test_bad_doctype) {
5087 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
5088 "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
5089
5090 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5091 expect_failure(text, XML_ERROR_SYNTAX,
5092 "Invalid bytes in DOCTYPE not faulted");
5093 }
5094 END_TEST
5095
START_TEST(test_bad_doctype_utf8)5096 START_TEST(test_bad_doctype_utf8) {
5097 const char *text = "<!DOCTYPE \xDB\x25"
5098 "doc><doc/>"; // [1101 1011] [<0>010 0101]
5099 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5100 "Invalid UTF-8 in DOCTYPE not faulted");
5101 }
5102 END_TEST
5103
START_TEST(test_bad_doctype_utf16)5104 START_TEST(test_bad_doctype_utf16) {
5105 const char text[] =
5106 /* <!DOCTYPE doc [ \x06f2 ]><doc/>
5107 *
5108 * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
5109 * (name character) but not a valid letter (name start character)
5110 */
5111 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
5112 "\x06\xf2"
5113 "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
5114
5115 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5116 != XML_STATUS_ERROR)
5117 fail("Invalid bytes in DOCTYPE not faulted");
5118 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
5119 xml_failure(g_parser);
5120 }
5121 END_TEST
5122
START_TEST(test_bad_doctype_plus)5123 START_TEST(test_bad_doctype_plus) {
5124 const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
5125 "<1+>&foo;</1+>";
5126
5127 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5128 "'+' in document name not faulted");
5129 }
5130 END_TEST
5131
START_TEST(test_bad_doctype_star)5132 START_TEST(test_bad_doctype_star) {
5133 const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
5134 "<1*>&foo;</1*>";
5135
5136 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5137 "'*' in document name not faulted");
5138 }
5139 END_TEST
5140
START_TEST(test_bad_doctype_query)5141 START_TEST(test_bad_doctype_query) {
5142 const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
5143 "<1?>&foo;</1?>";
5144
5145 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5146 "'?' in document name not faulted");
5147 }
5148 END_TEST
5149
START_TEST(test_unknown_encoding_bad_ignore)5150 START_TEST(test_unknown_encoding_bad_ignore) {
5151 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
5152 "<!DOCTYPE doc SYSTEM 'foo'>"
5153 "<doc><e>&entity;</e></doc>";
5154 ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
5155 "Invalid character not faulted", XCS("prefix-conv"),
5156 XML_ERROR_INVALID_TOKEN};
5157
5158 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5159 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5160 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
5161 XML_SetUserData(g_parser, &fault);
5162 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5163 "Bad IGNORE section with unknown encoding not failed");
5164 }
5165 END_TEST
5166
START_TEST(test_entity_in_utf16_be_attr)5167 START_TEST(test_entity_in_utf16_be_attr) {
5168 const char text[] =
5169 /* <e a='ä ä'></e> */
5170 "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
5171 "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
5172 #ifdef XML_UNICODE
5173 const XML_Char *expected = XCS("\x00e4 \x00e4");
5174 #else
5175 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5176 #endif
5177 CharData storage;
5178
5179 CharData_Init(&storage);
5180 XML_SetUserData(g_parser, &storage);
5181 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5182 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5183 == XML_STATUS_ERROR)
5184 xml_failure(g_parser);
5185 CharData_CheckXMLChars(&storage, expected);
5186 }
5187 END_TEST
5188
START_TEST(test_entity_in_utf16_le_attr)5189 START_TEST(test_entity_in_utf16_le_attr) {
5190 const char text[] =
5191 /* <e a='ä ä'></e> */
5192 "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
5193 "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
5194 #ifdef XML_UNICODE
5195 const XML_Char *expected = XCS("\x00e4 \x00e4");
5196 #else
5197 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5198 #endif
5199 CharData storage;
5200
5201 CharData_Init(&storage);
5202 XML_SetUserData(g_parser, &storage);
5203 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5204 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5205 == XML_STATUS_ERROR)
5206 xml_failure(g_parser);
5207 CharData_CheckXMLChars(&storage, expected);
5208 }
5209 END_TEST
5210
START_TEST(test_entity_public_utf16_be)5211 START_TEST(test_entity_public_utf16_be) {
5212 const char text[] =
5213 /* <!DOCTYPE d [ */
5214 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5215 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5216 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5217 "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5218 /* %e; */
5219 "\0%\0e\0;\0\n"
5220 /* ]> */
5221 "\0]\0>\0\n"
5222 /* <d>&j;</d> */
5223 "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5224 ExtTest2 test_data
5225 = {/* <!ENTITY j 'baz'> */
5226 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5227 const XML_Char *expected = XCS("baz");
5228 CharData storage;
5229
5230 CharData_Init(&storage);
5231 test_data.storage = &storage;
5232 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5233 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5234 XML_SetUserData(g_parser, &test_data);
5235 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5236 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5237 == XML_STATUS_ERROR)
5238 xml_failure(g_parser);
5239 CharData_CheckXMLChars(&storage, expected);
5240 }
5241 END_TEST
5242
START_TEST(test_entity_public_utf16_le)5243 START_TEST(test_entity_public_utf16_le) {
5244 const char text[] =
5245 /* <!DOCTYPE d [ */
5246 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5247 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5248 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5249 "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5250 /* %e; */
5251 "%\0e\0;\0\n\0"
5252 /* ]> */
5253 "]\0>\0\n\0"
5254 /* <d>&j;</d> */
5255 "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5256 ExtTest2 test_data
5257 = {/* <!ENTITY j 'baz'> */
5258 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5259 const XML_Char *expected = XCS("baz");
5260 CharData storage;
5261
5262 CharData_Init(&storage);
5263 test_data.storage = &storage;
5264 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5265 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5266 XML_SetUserData(g_parser, &test_data);
5267 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5268 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5269 == XML_STATUS_ERROR)
5270 xml_failure(g_parser);
5271 CharData_CheckXMLChars(&storage, expected);
5272 }
5273 END_TEST
5274
5275 /* Test that a doctype with neither an internal nor external subset is
5276 * faulted
5277 */
START_TEST(test_short_doctype)5278 START_TEST(test_short_doctype) {
5279 const char *text = "<!DOCTYPE doc></doc>";
5280 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5281 "DOCTYPE without subset not rejected");
5282 }
5283 END_TEST
5284
START_TEST(test_short_doctype_2)5285 START_TEST(test_short_doctype_2) {
5286 const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5287 expect_failure(text, XML_ERROR_SYNTAX,
5288 "DOCTYPE without Public ID not rejected");
5289 }
5290 END_TEST
5291
START_TEST(test_short_doctype_3)5292 START_TEST(test_short_doctype_3) {
5293 const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5294 expect_failure(text, XML_ERROR_SYNTAX,
5295 "DOCTYPE without System ID not rejected");
5296 }
5297 END_TEST
5298
START_TEST(test_long_doctype)5299 START_TEST(test_long_doctype) {
5300 const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5301 expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5302 }
5303 END_TEST
5304
START_TEST(test_bad_entity)5305 START_TEST(test_bad_entity) {
5306 const char *text = "<!DOCTYPE doc [\n"
5307 " <!ENTITY foo PUBLIC>\n"
5308 "]>\n"
5309 "<doc/>";
5310 expect_failure(text, XML_ERROR_SYNTAX,
5311 "ENTITY without Public ID is not rejected");
5312 }
5313 END_TEST
5314
5315 /* Test unquoted value is faulted */
START_TEST(test_bad_entity_2)5316 START_TEST(test_bad_entity_2) {
5317 const char *text = "<!DOCTYPE doc [\n"
5318 " <!ENTITY % foo bar>\n"
5319 "]>\n"
5320 "<doc/>";
5321 expect_failure(text, XML_ERROR_SYNTAX,
5322 "ENTITY without Public ID is not rejected");
5323 }
5324 END_TEST
5325
START_TEST(test_bad_entity_3)5326 START_TEST(test_bad_entity_3) {
5327 const char *text = "<!DOCTYPE doc [\n"
5328 " <!ENTITY % foo PUBLIC>\n"
5329 "]>\n"
5330 "<doc/>";
5331 expect_failure(text, XML_ERROR_SYNTAX,
5332 "Parameter ENTITY without Public ID is not rejected");
5333 }
5334 END_TEST
5335
START_TEST(test_bad_entity_4)5336 START_TEST(test_bad_entity_4) {
5337 const char *text = "<!DOCTYPE doc [\n"
5338 " <!ENTITY % foo SYSTEM>\n"
5339 "]>\n"
5340 "<doc/>";
5341 expect_failure(text, XML_ERROR_SYNTAX,
5342 "Parameter ENTITY without Public ID is not rejected");
5343 }
5344 END_TEST
5345
START_TEST(test_bad_notation)5346 START_TEST(test_bad_notation) {
5347 const char *text = "<!DOCTYPE doc [\n"
5348 " <!NOTATION n SYSTEM>\n"
5349 "]>\n"
5350 "<doc/>";
5351 expect_failure(text, XML_ERROR_SYNTAX,
5352 "Notation without System ID is not rejected");
5353 }
5354 END_TEST
5355
5356 /* Test for issue #11, wrongly suppressed default handler */
START_TEST(test_default_doctype_handler)5357 START_TEST(test_default_doctype_handler) {
5358 const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5359 " <!ENTITY foo 'bar'>\n"
5360 "]>\n"
5361 "<doc>&foo;</doc>";
5362 DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5363 {XCS("'test.dtd'"), 10, XML_FALSE},
5364 {NULL, 0, XML_FALSE}};
5365 int i;
5366
5367 XML_SetUserData(g_parser, &test_data);
5368 XML_SetDefaultHandler(g_parser, checking_default_handler);
5369 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5370 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5371 == XML_STATUS_ERROR)
5372 xml_failure(g_parser);
5373 for (i = 0; test_data[i].expected != NULL; i++)
5374 if (! test_data[i].seen)
5375 fail("Default handler not run for public !DOCTYPE");
5376 }
5377 END_TEST
5378
START_TEST(test_empty_element_abort)5379 START_TEST(test_empty_element_abort) {
5380 const char *text = "<abort/>";
5381
5382 XML_SetStartElementHandler(g_parser, start_element_suspender);
5383 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5384 != XML_STATUS_ERROR)
5385 fail("Expected to error on abort");
5386 }
5387 END_TEST
5388
5389 /* Regression test for GH issue #612: unfinished m_declAttributeType
5390 * allocation in ->m_tempPool can corrupt following allocation.
5391 */
START_TEST(test_pool_integrity_with_unfinished_attr)5392 START_TEST(test_pool_integrity_with_unfinished_attr) {
5393 const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5394 "<!DOCTYPE foo [\n"
5395 "<!ELEMENT foo ANY>\n"
5396 "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5397 "%entp;\n"
5398 "]>\n"
5399 "<a></a>\n";
5400 const XML_Char *expected = XCS("COMMENT");
5401 CharData storage;
5402
5403 CharData_Init(&storage);
5404 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5405 XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5406 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5407 XML_SetCommentHandler(g_parser, accumulate_comment);
5408 XML_SetUserData(g_parser, &storage);
5409 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5410 == XML_STATUS_ERROR)
5411 xml_failure(g_parser);
5412 CharData_CheckXMLChars(&storage, expected);
5413 }
5414 END_TEST
5415
5416 /* Test a possible early return location in internalEntityProcessor */
START_TEST(test_entity_ref_no_elements)5417 START_TEST(test_entity_ref_no_elements) {
5418 const char *const text = "<!DOCTYPE foo [\n"
5419 "<!ENTITY e1 \"test\">\n"
5420 "]> <foo>&e1;"; // intentionally missing newline
5421
5422 XML_Parser parser = XML_ParserCreate(NULL);
5423 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5424 == XML_STATUS_ERROR);
5425 assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS);
5426 XML_ParserFree(parser);
5427 }
5428 END_TEST
5429
5430 /* Tests if chained entity references lead to unbounded recursion */
START_TEST(test_deep_nested_entity)5431 START_TEST(test_deep_nested_entity) {
5432 const size_t N_LINES = 60000;
5433 const size_t SIZE_PER_LINE = 50;
5434
5435 char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5436 if (text == NULL) {
5437 fail("malloc failed");
5438 }
5439
5440 char *textPtr = text;
5441
5442 // Create the XML
5443 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5444 "<!DOCTYPE foo [\n"
5445 " <!ENTITY s0 'deepText'>\n");
5446
5447 for (size_t i = 1; i < N_LINES; ++i) {
5448 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n",
5449 (long unsigned)i, (long unsigned)(i - 1));
5450 }
5451
5452 snprintf(textPtr, SIZE_PER_LINE, "]> <foo>&s%lu;</foo>\n",
5453 (long unsigned)(N_LINES - 1));
5454
5455 const XML_Char *const expected = XCS("deepText");
5456
5457 CharData storage;
5458 CharData_Init(&storage);
5459
5460 XML_Parser parser = XML_ParserCreate(NULL);
5461
5462 XML_SetCharacterDataHandler(parser, accumulate_characters);
5463 XML_SetUserData(parser, &storage);
5464
5465 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5466 == XML_STATUS_ERROR)
5467 xml_failure(parser);
5468
5469 CharData_CheckXMLChars(&storage, expected);
5470 XML_ParserFree(parser);
5471 free(text);
5472 }
5473 END_TEST
5474
5475 /* Tests if chained entity references in attributes
5476 lead to unbounded recursion */
START_TEST(test_deep_nested_attribute_entity)5477 START_TEST(test_deep_nested_attribute_entity) {
5478 const size_t N_LINES = 60000;
5479 const size_t SIZE_PER_LINE = 100;
5480
5481 char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5482 if (text == NULL) {
5483 fail("malloc failed");
5484 }
5485
5486 char *textPtr = text;
5487
5488 // Create the XML
5489 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5490 "<!DOCTYPE foo [\n"
5491 " <!ENTITY s0 'deepText'>\n");
5492
5493 for (size_t i = 1; i < N_LINES; ++i) {
5494 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n",
5495 (long unsigned)i, (long unsigned)(i - 1));
5496 }
5497
5498 snprintf(textPtr, SIZE_PER_LINE, "]> <foo name='&s%lu;'>mainText</foo>\n",
5499 (long unsigned)(N_LINES - 1));
5500
5501 AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}};
5502 ElementInfo info[] = {{XCS("foo"), 1, NULL, NULL}, {NULL, 0, NULL, NULL}};
5503 info[0].attributes = doc_info;
5504
5505 XML_Parser parser = XML_ParserCreate(NULL);
5506 ParserAndElementInfo parserPlusElemenInfo = {parser, info};
5507
5508 XML_SetStartElementHandler(parser, counting_start_element_handler);
5509 XML_SetUserData(parser, &parserPlusElemenInfo);
5510
5511 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5512 == XML_STATUS_ERROR)
5513 xml_failure(parser);
5514
5515 XML_ParserFree(parser);
5516 free(text);
5517 }
5518 END_TEST
5519
START_TEST(test_deep_nested_entity_delayed_interpretation)5520 START_TEST(test_deep_nested_entity_delayed_interpretation) {
5521 const size_t N_LINES = 70000;
5522 const size_t SIZE_PER_LINE = 100;
5523
5524 char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5525 if (text == NULL) {
5526 fail("malloc failed");
5527 }
5528
5529 char *textPtr = text;
5530
5531 // Create the XML
5532 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5533 "<!DOCTYPE foo [\n"
5534 " <!ENTITY %% s0 'deepText'>\n");
5535
5536 for (size_t i = 1; i < N_LINES; ++i) {
5537 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5538 " <!ENTITY %% s%lu '%s%lu;'>\n", (long unsigned)i,
5539 (long unsigned)(i - 1));
5540 }
5541
5542 snprintf(textPtr, SIZE_PER_LINE,
5543 " <!ENTITY %% define_g \"<!ENTITY g '%s%lu;'>\">\n"
5544 " %%define_g;\n"
5545 "]>\n"
5546 "<foo/>\n",
5547 (long unsigned)(N_LINES - 1));
5548
5549 XML_Parser parser = XML_ParserCreate(NULL);
5550
5551 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5552 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5553 == XML_STATUS_ERROR)
5554 xml_failure(parser);
5555
5556 XML_ParserFree(parser);
5557 free(text);
5558 }
5559 END_TEST
5560
START_TEST(test_nested_entity_suspend)5561 START_TEST(test_nested_entity_suspend) {
5562 const char *const text = "<!DOCTYPE a [\n"
5563 " <!ENTITY e1 '<!--e1-->'>\n"
5564 " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5565 " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5566 "]>\n"
5567 "<a><!--start-->&e3;<!--end--></a>";
5568 const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5569 XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5570 CharData storage;
5571 CharData_Init(&storage);
5572 XML_Parser parser = XML_ParserCreate(NULL);
5573 ParserPlusStorage parserPlusStorage = {parser, &storage};
5574
5575 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5576 XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5577 XML_SetUserData(parser, &parserPlusStorage);
5578
5579 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5580 while (status == XML_STATUS_SUSPENDED) {
5581 status = XML_ResumeParser(parser);
5582 }
5583 if (status != XML_STATUS_OK)
5584 xml_failure(parser);
5585
5586 CharData_CheckXMLChars(&storage, expected);
5587 XML_ParserFree(parser);
5588 }
5589 END_TEST
5590
START_TEST(test_nested_entity_suspend_2)5591 START_TEST(test_nested_entity_suspend_2) {
5592 const char *const text = "<!DOCTYPE doc [\n"
5593 " <!ENTITY ge1 'head1Ztail1'>\n"
5594 " <!ENTITY ge2 'head2&ge1;tail2'>\n"
5595 " <!ENTITY ge3 'head3&ge2;tail3'>\n"
5596 "]>\n"
5597 "<doc>&ge3;</doc>";
5598 const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1")
5599 XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3");
5600 CharData storage;
5601 CharData_Init(&storage);
5602 XML_Parser parser = XML_ParserCreate(NULL);
5603 ParserPlusStorage parserPlusStorage = {parser, &storage};
5604
5605 XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend);
5606 XML_SetUserData(parser, &parserPlusStorage);
5607
5608 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5609 while (status == XML_STATUS_SUSPENDED) {
5610 status = XML_ResumeParser(parser);
5611 }
5612 if (status != XML_STATUS_OK)
5613 xml_failure(parser);
5614
5615 CharData_CheckXMLChars(&storage, expected);
5616 XML_ParserFree(parser);
5617 }
5618 END_TEST
5619
5620 /* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_scale_linearly)5621 START_TEST(test_big_tokens_scale_linearly) {
5622 const struct {
5623 const char *pre;
5624 const char *post;
5625 } text[] = {
5626 {"<a>", "</a>"}, // assumed good, used as baseline
5627 {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5628 {"<c attr='", "'></c>"}, // big attribute, used to be O(N²)
5629 {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²)
5630 {"<e><", "/></e>"}, // big elem name, used to be O(N²)
5631 };
5632 const int num_cases = sizeof(text) / sizeof(text[0]);
5633 char aaaaaa[4096];
5634 const int fillsize = (int)sizeof(aaaaaa);
5635 const int fillcount = 100;
5636 const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5637 const unsigned max_factor = 4;
5638 const unsigned max_scanned = max_factor * approx_bytes;
5639
5640 memset(aaaaaa, 'a', fillsize);
5641
5642 if (! g_reparseDeferralEnabledDefault) {
5643 return; // heuristic is disabled; we would get O(n^2) and fail.
5644 }
5645
5646 for (int i = 0; i < num_cases; ++i) {
5647 XML_Parser parser = XML_ParserCreate(NULL);
5648 assert_true(parser != NULL);
5649 enum XML_Status status;
5650 set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
5651
5652 // parse the start text
5653 g_bytesScanned = 0;
5654 status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5655 (int)strlen(text[i].pre), XML_FALSE);
5656 if (status != XML_STATUS_OK) {
5657 xml_failure(parser);
5658 }
5659
5660 // parse lots of 'a', failing the test early if it takes too long
5661 unsigned past_max_count = 0;
5662 for (int f = 0; f < fillcount; ++f) {
5663 status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5664 if (status != XML_STATUS_OK) {
5665 xml_failure(parser);
5666 }
5667 if (g_bytesScanned > max_scanned) {
5668 // We're not done, and have already passed the limit -- the test will
5669 // definitely fail. This block allows us to save time by failing early.
5670 const unsigned pushed
5671 = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
5672 fprintf(
5673 stderr,
5674 "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5675 f + 1, fillcount, pushed, g_bytesScanned,
5676 g_bytesScanned / (double)pushed, max_scanned, max_factor);
5677 past_max_count++;
5678 // We are failing, but allow a few log prints first. If we don't reach
5679 // a count of five, the test will fail after the loop instead.
5680 assert_true(past_max_count < 5);
5681 }
5682 }
5683
5684 // parse the end text
5685 status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5686 (int)strlen(text[i].post), XML_TRUE);
5687 if (status != XML_STATUS_OK) {
5688 xml_failure(parser);
5689 }
5690
5691 assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
5692 if (g_bytesScanned > max_scanned) {
5693 fprintf(
5694 stderr,
5695 "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5696 g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
5697 max_factor);
5698 fail("scanned too many bytes");
5699 }
5700
5701 XML_ParserFree(parser);
5702 }
5703 }
5704 END_TEST
5705
START_TEST(test_set_reparse_deferral)5706 START_TEST(test_set_reparse_deferral) {
5707 const char *const pre = "<d>";
5708 const char *const start = "<x attr='";
5709 const char *const end = "'></x>";
5710 char eeeeee[100];
5711 const int fillsize = (int)sizeof(eeeeee);
5712 memset(eeeeee, 'e', fillsize);
5713
5714 for (int enabled = 0; enabled <= 1; enabled += 1) {
5715 set_subtest("deferral=%d", enabled);
5716
5717 XML_Parser parser = XML_ParserCreate(NULL);
5718 assert_true(parser != NULL);
5719 assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5720 // pre-grow the buffer to avoid reparsing due to almost-fullness
5721 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5722
5723 CharData storage;
5724 CharData_Init(&storage);
5725 XML_SetUserData(parser, &storage);
5726 XML_SetStartElementHandler(parser, start_element_event_handler);
5727
5728 enum XML_Status status;
5729 // parse the start text
5730 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5731 if (status != XML_STATUS_OK) {
5732 xml_failure(parser);
5733 }
5734 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5735
5736 // ..and the start of the token
5737 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5738 if (status != XML_STATUS_OK) {
5739 xml_failure(parser);
5740 }
5741 CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5742
5743 // try to parse lots of 'e', but the token isn't finished
5744 for (int c = 0; c < 100; ++c) {
5745 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5746 if (status != XML_STATUS_OK) {
5747 xml_failure(parser);
5748 }
5749 }
5750 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5751
5752 // end the <x> token.
5753 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5754 if (status != XML_STATUS_OK) {
5755 xml_failure(parser);
5756 }
5757
5758 if (enabled) {
5759 // In general, we may need to push more data to trigger a reparse attempt,
5760 // but in this test, the data is constructed to always require it.
5761 CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5762 // 2x the token length should suffice; the +1 covers the start and end.
5763 for (int c = 0; c < 101; ++c) {
5764 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5765 if (status != XML_STATUS_OK) {
5766 xml_failure(parser);
5767 }
5768 }
5769 }
5770 CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5771
5772 XML_ParserFree(parser);
5773 }
5774 }
5775 END_TEST
5776
5777 struct element_decl_data {
5778 XML_Parser parser;
5779 int count;
5780 };
5781
5782 static void
element_decl_counter(void * userData,const XML_Char * name,XML_Content * model)5783 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5784 UNUSED_P(name);
5785 struct element_decl_data *testdata = (struct element_decl_data *)userData;
5786 testdata->count += 1;
5787 XML_FreeContentModel(testdata->parser, model);
5788 }
5789
5790 static int
external_inherited_parser(XML_Parser p,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)5791 external_inherited_parser(XML_Parser p, const XML_Char *context,
5792 const XML_Char *base, const XML_Char *systemId,
5793 const XML_Char *publicId) {
5794 UNUSED_P(base);
5795 UNUSED_P(systemId);
5796 UNUSED_P(publicId);
5797 const char *const pre = "<!ELEMENT document ANY>\n";
5798 const char *const start = "<!ELEMENT ";
5799 const char *const end = " ANY>\n";
5800 const char *const post = "<!ELEMENT xyz ANY>\n";
5801 const int enabled = *(int *)XML_GetUserData(p);
5802 char eeeeee[100];
5803 char spaces[100];
5804 const int fillsize = (int)sizeof(eeeeee);
5805 assert_true(fillsize == (int)sizeof(spaces));
5806 memset(eeeeee, 'e', fillsize);
5807 memset(spaces, ' ', fillsize);
5808
5809 XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5810 assert_true(parser != NULL);
5811 // pre-grow the buffer to avoid reparsing due to almost-fullness
5812 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5813
5814 struct element_decl_data testdata;
5815 testdata.parser = parser;
5816 testdata.count = 0;
5817 XML_SetUserData(parser, &testdata);
5818 XML_SetElementDeclHandler(parser, element_decl_counter);
5819
5820 enum XML_Status status;
5821 // parse the initial text
5822 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5823 if (status != XML_STATUS_OK) {
5824 xml_failure(parser);
5825 }
5826 assert_true(testdata.count == 1); // first element should be done
5827
5828 // ..and the start of the big token
5829 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5830 if (status != XML_STATUS_OK) {
5831 xml_failure(parser);
5832 }
5833 assert_true(testdata.count == 1); // still just the first one
5834
5835 // try to parse lots of 'e', but the token isn't finished
5836 for (int c = 0; c < 100; ++c) {
5837 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5838 if (status != XML_STATUS_OK) {
5839 xml_failure(parser);
5840 }
5841 }
5842 assert_true(testdata.count == 1); // *still* just the first one
5843
5844 // end the big token.
5845 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5846 if (status != XML_STATUS_OK) {
5847 xml_failure(parser);
5848 }
5849
5850 if (enabled) {
5851 // In general, we may need to push more data to trigger a reparse attempt,
5852 // but in this test, the data is constructed to always require it.
5853 assert_true(testdata.count == 1); // or the test is incorrect
5854 // 2x the token length should suffice; the +1 covers the start and end.
5855 for (int c = 0; c < 101; ++c) {
5856 status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5857 if (status != XML_STATUS_OK) {
5858 xml_failure(parser);
5859 }
5860 }
5861 }
5862 assert_true(testdata.count == 2); // the big token should be done
5863
5864 // parse the final text
5865 status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5866 if (status != XML_STATUS_OK) {
5867 xml_failure(parser);
5868 }
5869 assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5870
5871 XML_ParserFree(parser);
5872 return XML_STATUS_OK;
5873 }
5874
START_TEST(test_reparse_deferral_is_inherited)5875 START_TEST(test_reparse_deferral_is_inherited) {
5876 const char *const text
5877 = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5878 for (int enabled = 0; enabled <= 1; ++enabled) {
5879 set_subtest("deferral=%d", enabled);
5880
5881 XML_Parser parser = XML_ParserCreate(NULL);
5882 assert_true(parser != NULL);
5883 XML_SetUserData(parser, (void *)&enabled);
5884 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5885 // this handler creates a sub-parser and checks that its deferral behavior
5886 // is what we expected, based on the value of `enabled` (in userdata).
5887 XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5888 assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5889 if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5890 xml_failure(parser);
5891
5892 XML_ParserFree(parser);
5893 }
5894 }
5895 END_TEST
5896
START_TEST(test_set_reparse_deferral_on_null_parser)5897 START_TEST(test_set_reparse_deferral_on_null_parser) {
5898 assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5899 assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5900 assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5901 assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5902 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5903 == XML_FALSE);
5904 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5905 == XML_FALSE);
5906 }
5907 END_TEST
5908
START_TEST(test_set_reparse_deferral_on_the_fly)5909 START_TEST(test_set_reparse_deferral_on_the_fly) {
5910 const char *const pre = "<d><x attr='";
5911 const char *const end = "'></x>";
5912 char iiiiii[100];
5913 const int fillsize = (int)sizeof(iiiiii);
5914 memset(iiiiii, 'i', fillsize);
5915
5916 XML_Parser parser = XML_ParserCreate(NULL);
5917 assert_true(parser != NULL);
5918 assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5919
5920 CharData storage;
5921 CharData_Init(&storage);
5922 XML_SetUserData(parser, &storage);
5923 XML_SetStartElementHandler(parser, start_element_event_handler);
5924
5925 enum XML_Status status;
5926 // parse the start text
5927 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5928 if (status != XML_STATUS_OK) {
5929 xml_failure(parser);
5930 }
5931 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5932
5933 // try to parse some 'i', but the token isn't finished
5934 status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
5935 if (status != XML_STATUS_OK) {
5936 xml_failure(parser);
5937 }
5938 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5939
5940 // end the <x> token.
5941 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5942 if (status != XML_STATUS_OK) {
5943 xml_failure(parser);
5944 }
5945 CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
5946
5947 // now change the heuristic setting and add *no* data
5948 assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
5949 // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
5950 status = XML_Parse(parser, "", 0, XML_FALSE);
5951 if (status != XML_STATUS_OK) {
5952 xml_failure(parser);
5953 }
5954 CharData_CheckXMLChars(&storage, XCS("dx"));
5955
5956 XML_ParserFree(parser);
5957 }
5958 END_TEST
5959
START_TEST(test_set_bad_reparse_option)5960 START_TEST(test_set_bad_reparse_option) {
5961 XML_Parser parser = XML_ParserCreate(NULL);
5962 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
5963 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
5964 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
5965 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
5966 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
5967 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
5968 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
5969 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
5970 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
5971 XML_ParserFree(parser);
5972 }
5973 END_TEST
5974
5975 static size_t g_totalAlloc = 0;
5976 static size_t g_biggestAlloc = 0;
5977
5978 static void *
counting_realloc(void * ptr,size_t size)5979 counting_realloc(void *ptr, size_t size) {
5980 g_totalAlloc += size;
5981 if (size > g_biggestAlloc) {
5982 g_biggestAlloc = size;
5983 }
5984 return realloc(ptr, size);
5985 }
5986
5987 static void *
counting_malloc(size_t size)5988 counting_malloc(size_t size) {
5989 return counting_realloc(NULL, size);
5990 }
5991
START_TEST(test_bypass_heuristic_when_close_to_bufsize)5992 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
5993 if (g_chunkSize != 0) {
5994 // this test does not use SINGLE_BYTES, because it depends on very precise
5995 // buffer fills.
5996 return;
5997 }
5998 if (! g_reparseDeferralEnabledDefault) {
5999 return; // this test is irrelevant when the deferral heuristic is disabled.
6000 }
6001
6002 const int document_length = 65536;
6003 char *const document = (char *)malloc(document_length);
6004
6005 const XML_Memory_Handling_Suite memfuncs = {
6006 counting_malloc,
6007 counting_realloc,
6008 free,
6009 };
6010
6011 const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
6012 const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
6013 const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
6014
6015 for (const int *leading = leading_list; *leading >= 0; leading++) {
6016 for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
6017 for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
6018 set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
6019 *fillsize);
6020 // start by checking that the test looks reasonably valid
6021 assert_true(*leading + *bigtoken <= document_length);
6022
6023 // put 'x' everywhere; some will be overwritten by elements.
6024 memset(document, 'x', document_length);
6025 // maybe add an initial tag
6026 if (*leading) {
6027 assert_true(*leading >= 3); // or the test case is invalid
6028 memcpy(document, "<a>", 3);
6029 }
6030 // add the large token
6031 document[*leading + 0] = '<';
6032 document[*leading + 1] = 'b';
6033 memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
6034 document[*leading + *bigtoken - 1] = '>';
6035
6036 // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
6037 const int expected_elem_total = 1 + (*leading ? 1 : 0);
6038
6039 XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
6040 assert_true(parser != NULL);
6041
6042 CharData storage;
6043 CharData_Init(&storage);
6044 XML_SetUserData(parser, &storage);
6045 XML_SetStartElementHandler(parser, start_element_event_handler);
6046
6047 g_biggestAlloc = 0;
6048 g_totalAlloc = 0;
6049 int offset = 0;
6050 // fill data until the big token is covered (but not necessarily parsed)
6051 while (offset < *leading + *bigtoken) {
6052 assert_true(offset + *fillsize <= document_length);
6053 const enum XML_Status status
6054 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6055 if (status != XML_STATUS_OK) {
6056 xml_failure(parser);
6057 }
6058 offset += *fillsize;
6059 }
6060 // Now, check that we've had a buffer allocation that could fit the
6061 // context bytes and our big token. In order to detect a special case,
6062 // we need to know how many bytes of our big token were included in the
6063 // first push that contained _any_ bytes of the big token:
6064 const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
6065 if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
6066 // Special case: we aren't saving any context, and the whole big token
6067 // was covered by a single fill, so Expat may have parsed directly
6068 // from our input pointer, without allocating an internal buffer.
6069 } else if (*leading < XML_CONTEXT_BYTES) {
6070 assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
6071 } else {
6072 assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
6073 }
6074 // fill data until the big token is actually parsed
6075 while (storage.count < expected_elem_total) {
6076 const size_t alloc_before = g_totalAlloc;
6077 assert_true(offset + *fillsize <= document_length);
6078 const enum XML_Status status
6079 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6080 if (status != XML_STATUS_OK) {
6081 xml_failure(parser);
6082 }
6083 offset += *fillsize;
6084 // since all the bytes of the big token are already in the buffer,
6085 // the bufsize ceiling should make us finish its parsing without any
6086 // further buffer allocations. We assume that there will be no other
6087 // large allocations in this test.
6088 assert_true(g_totalAlloc - alloc_before < 4096);
6089 }
6090 // test-the-test: was our alloc even called?
6091 assert_true(g_totalAlloc > 0);
6092 // test-the-test: there shouldn't be any extra start elements
6093 assert_true(storage.count == expected_elem_total);
6094
6095 XML_ParserFree(parser);
6096 }
6097 }
6098 }
6099 free(document);
6100 }
6101 END_TEST
6102
START_TEST(test_varying_buffer_fills)6103 START_TEST(test_varying_buffer_fills) {
6104 const int KiB = 1024;
6105 const int MiB = 1024 * KiB;
6106 const int document_length = 16 * MiB;
6107 const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
6108
6109 if (g_chunkSize != 0) {
6110 return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
6111 }
6112
6113 char *const document = (char *)malloc(document_length);
6114 assert_true(document != NULL);
6115 memset(document, 'x', document_length);
6116 document[0] = '<';
6117 document[1] = 't';
6118 memset(&document[2], ' ', big - 2); // a very spacy token
6119 document[big - 1] = '>';
6120
6121 // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
6122 // When reparse deferral is enabled, the final (negated) value is the expected
6123 // maximum number of bytes scanned in parse attempts.
6124 const int testcases[][30] = {
6125 {8 * MiB, -8 * MiB},
6126 {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
6127 // zero-size fills shouldn't trigger the bypass
6128 {4 * MiB, 0, 4 * MiB, -12 * MiB},
6129 {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
6130 {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
6131 // try to hit the buffer ceiling only once (at the end)
6132 {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
6133 // try to hit the same buffer ceiling multiple times
6134 {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
6135
6136 // try to hit every ceiling, by always landing 1K shy of the buffer size
6137 {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
6138 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
6139
6140 // try to avoid every ceiling, by always landing 1B past the buffer size
6141 // the normal 2x heuristic threshold still forces parse attempts.
6142 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
6143 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
6144 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
6145 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
6146 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6147 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6148 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7
6149 -(10 * MiB + 682 * KiB + 7)},
6150 // try to avoid every ceiling again, except on our last fill.
6151 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
6152 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
6153 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
6154 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
6155 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6156 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6157 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
6158 -(10 * MiB + 682 * KiB + 6)},
6159
6160 // try to hit ceilings on the way multiple times
6161 {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
6162 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
6163 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer
6164 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer
6165 // we'll make a parse attempt at every parse call
6166 -(45 * MiB + 12)},
6167 };
6168 const int testcount = sizeof(testcases) / sizeof(testcases[0]);
6169 for (int test_i = 0; test_i < testcount; test_i++) {
6170 const int *fillsize = testcases[test_i];
6171 set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
6172 fillsize[2], fillsize[3]);
6173 XML_Parser parser = XML_ParserCreate(NULL);
6174 assert_true(parser != NULL);
6175
6176 CharData storage;
6177 CharData_Init(&storage);
6178 XML_SetUserData(parser, &storage);
6179 XML_SetStartElementHandler(parser, start_element_event_handler);
6180
6181 g_bytesScanned = 0;
6182 int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
6183 int offset = 0;
6184 while (*fillsize >= 0) {
6185 assert_true(offset + *fillsize <= document_length); // or test is invalid
6186 const enum XML_Status status
6187 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6188 if (status != XML_STATUS_OK) {
6189 xml_failure(parser);
6190 }
6191 offset += *fillsize;
6192 fillsize++;
6193 assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
6194 worstcase_bytes += offset; // we might've tried to parse all pending bytes
6195 }
6196 assert_true(storage.count == 1); // the big token should've been parsed
6197 assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
6198 if (g_reparseDeferralEnabledDefault) {
6199 // heuristic is enabled; some XML_Parse calls may have deferred reparsing
6200 const unsigned max_bytes_scanned = -*fillsize;
6201 if (g_bytesScanned > max_bytes_scanned) {
6202 fprintf(stderr,
6203 "bytes scanned in parse attempts: actual=%u limit=%u \n",
6204 g_bytesScanned, max_bytes_scanned);
6205 fail("too many bytes scanned in parse attempts");
6206 }
6207 }
6208 assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
6209
6210 XML_ParserFree(parser);
6211 }
6212 free(document);
6213 }
6214 END_TEST
6215
6216 void
make_basic_test_case(Suite * s)6217 make_basic_test_case(Suite *s) {
6218 TCase *tc_basic = tcase_create("basic tests");
6219
6220 suite_add_tcase(s, tc_basic);
6221 tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
6222
6223 tcase_add_test(tc_basic, test_nul_byte);
6224 tcase_add_test(tc_basic, test_u0000_char);
6225 tcase_add_test(tc_basic, test_siphash_self);
6226 tcase_add_test(tc_basic, test_siphash_spec);
6227 tcase_add_test(tc_basic, test_bom_utf8);
6228 tcase_add_test(tc_basic, test_bom_utf16_be);
6229 tcase_add_test(tc_basic, test_bom_utf16_le);
6230 tcase_add_test(tc_basic, test_nobom_utf16_le);
6231 tcase_add_test(tc_basic, test_hash_collision);
6232 tcase_add_test(tc_basic, test_illegal_utf8);
6233 tcase_add_test(tc_basic, test_utf8_auto_align);
6234 tcase_add_test(tc_basic, test_utf16);
6235 tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
6236 tcase_add_test(tc_basic, test_not_utf16);
6237 tcase_add_test(tc_basic, test_bad_encoding);
6238 tcase_add_test(tc_basic, test_latin1_umlauts);
6239 tcase_add_test(tc_basic, test_long_utf8_character);
6240 tcase_add_test(tc_basic, test_long_latin1_attribute);
6241 tcase_add_test(tc_basic, test_long_ascii_attribute);
6242 /* Regression test for SF bug #491986. */
6243 tcase_add_test(tc_basic, test_danish_latin1);
6244 /* Regression test for SF bug #514281. */
6245 tcase_add_test(tc_basic, test_french_charref_hexidecimal);
6246 tcase_add_test(tc_basic, test_french_charref_decimal);
6247 tcase_add_test(tc_basic, test_french_latin1);
6248 tcase_add_test(tc_basic, test_french_utf8);
6249 tcase_add_test(tc_basic, test_utf8_false_rejection);
6250 tcase_add_test(tc_basic, test_line_number_after_parse);
6251 tcase_add_test(tc_basic, test_column_number_after_parse);
6252 tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
6253 tcase_add_test(tc_basic, test_line_number_after_error);
6254 tcase_add_test(tc_basic, test_column_number_after_error);
6255 tcase_add_test(tc_basic, test_really_long_lines);
6256 tcase_add_test(tc_basic, test_really_long_encoded_lines);
6257 tcase_add_test(tc_basic, test_end_element_events);
6258 tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
6259 tcase_add_test(tc_basic, test_attr_whitespace_normalization);
6260 tcase_add_test(tc_basic, test_xmldecl_misplaced);
6261 tcase_add_test(tc_basic, test_xmldecl_invalid);
6262 tcase_add_test(tc_basic, test_xmldecl_missing_attr);
6263 tcase_add_test(tc_basic, test_xmldecl_missing_value);
6264 tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
6265 tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
6266 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
6267 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
6268 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
6269 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
6270 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
6271 tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
6272 tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
6273 tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
6274 tcase_add_test(tc_basic,
6275 test_wfc_undeclared_entity_with_external_subset_standalone);
6276 tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
6277 tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
6278 tcase_add_test(tc_basic, test_not_standalone_handler_reject);
6279 tcase_add_test(tc_basic, test_not_standalone_handler_accept);
6280 tcase_add_test(tc_basic, test_entity_start_tag_level_greater_than_one);
6281 tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
6282 tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs);
6283 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
6284 tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
6285 tcase_add_test(tc_basic, test_dtd_attr_handling);
6286 tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
6287 tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
6288 tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
6289 tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
6290 tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
6291 tcase_add_test(tc_basic, test_good_cdata_ascii);
6292 tcase_add_test(tc_basic, test_good_cdata_utf16);
6293 tcase_add_test(tc_basic, test_good_cdata_utf16_le);
6294 tcase_add_test(tc_basic, test_long_cdata_utf16);
6295 tcase_add_test(tc_basic, test_multichar_cdata_utf16);
6296 tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
6297 tcase_add_test(tc_basic, test_bad_cdata);
6298 tcase_add_test(tc_basic, test_bad_cdata_utf16);
6299 tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
6300 tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
6301 tcase_add_test(tc_basic, test_memory_allocation);
6302 tcase_add_test__if_xml_ge(tc_basic, test_default_current);
6303 tcase_add_test(tc_basic, test_dtd_elements);
6304 tcase_add_test(tc_basic, test_dtd_elements_nesting);
6305 tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
6306 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
6307 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
6308 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
6309 tcase_add_test__ifdef_xml_dtd(tc_basic,
6310 test_foreign_dtd_without_external_subset);
6311 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
6312 tcase_add_test(tc_basic, test_set_base);
6313 tcase_add_test(tc_basic, test_attributes);
6314 tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
6315 tcase_add_test(tc_basic, test_resume_invalid_parse);
6316 tcase_add_test(tc_basic, test_resume_resuspended);
6317 tcase_add_test(tc_basic, test_cdata_default);
6318 tcase_add_test(tc_basic, test_subordinate_reset);
6319 tcase_add_test(tc_basic, test_subordinate_suspend);
6320 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
6321 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
6322 tcase_add_test__ifdef_xml_dtd(tc_basic,
6323 test_ext_entity_invalid_suspended_parse);
6324 tcase_add_test(tc_basic, test_explicit_encoding);
6325 tcase_add_test(tc_basic, test_trailing_cr);
6326 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
6327 tcase_add_test(tc_basic, test_trailing_rsqb);
6328 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
6329 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
6330 tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
6331 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
6332 tcase_add_test(tc_basic, test_empty_parse);
6333 tcase_add_test(tc_basic, test_negative_len_parse);
6334 tcase_add_test(tc_basic, test_negative_len_parse_buffer);
6335 tcase_add_test(tc_basic, test_get_buffer_1);
6336 tcase_add_test(tc_basic, test_get_buffer_2);
6337 #if XML_CONTEXT_BYTES > 0
6338 tcase_add_test(tc_basic, test_get_buffer_3_overflow);
6339 #endif
6340 tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
6341 tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
6342 tcase_add_test(tc_basic, test_byte_info_at_end);
6343 tcase_add_test(tc_basic, test_byte_info_at_error);
6344 tcase_add_test(tc_basic, test_byte_info_at_cdata);
6345 tcase_add_test(tc_basic, test_predefined_entities);
6346 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
6347 tcase_add_test(tc_basic, test_not_predefined_entities);
6348 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
6349 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
6350 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
6351 tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
6352 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
6353 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
6354 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
6355 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
6356 tcase_add_test(tc_basic, test_bad_public_doctype);
6357 tcase_add_test(tc_basic, test_attribute_enum_value);
6358 tcase_add_test(tc_basic, test_predefined_entity_redefinition);
6359 tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
6360 tcase_add_test(tc_basic, test_public_notation_no_sysid);
6361 tcase_add_test(tc_basic, test_nested_groups);
6362 tcase_add_test(tc_basic, test_group_choice);
6363 tcase_add_test(tc_basic, test_standalone_parameter_entity);
6364 tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
6365 tcase_add_test__ifdef_xml_dtd(tc_basic,
6366 test_recursive_external_parameter_entity);
6367 tcase_add_test__ifdef_xml_dtd(tc_basic,
6368 test_recursive_external_parameter_entity_2);
6369 tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
6370 tcase_add_test(tc_basic, test_suspend_xdecl);
6371 tcase_add_test(tc_basic, test_abort_epilog);
6372 tcase_add_test(tc_basic, test_abort_epilog_2);
6373 tcase_add_test(tc_basic, test_suspend_epilog);
6374 tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
6375 tcase_add_test(tc_basic, test_unfinished_epilog);
6376 tcase_add_test(tc_basic, test_partial_char_in_epilog);
6377 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6378 tcase_add_test__ifdef_xml_dtd(tc_basic,
6379 test_suspend_resume_internal_entity_issue_629);
6380 tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6381 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6382 tcase_add_test(tc_basic, test_restart_on_error);
6383 tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6384 tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6385 tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6386 tcase_add_test(tc_basic, test_standalone_internal_entity);
6387 tcase_add_test(tc_basic, test_skipped_external_entity);
6388 tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6389 tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6390 tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6391 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6392 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6393 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6394 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6395 tcase_add_test(tc_basic, test_pi_handled_in_default);
6396 tcase_add_test(tc_basic, test_comment_handled_in_default);
6397 tcase_add_test(tc_basic, test_pi_yml);
6398 tcase_add_test(tc_basic, test_pi_xnl);
6399 tcase_add_test(tc_basic, test_pi_xmm);
6400 tcase_add_test(tc_basic, test_utf16_pi);
6401 tcase_add_test(tc_basic, test_utf16_be_pi);
6402 tcase_add_test(tc_basic, test_utf16_be_comment);
6403 tcase_add_test(tc_basic, test_utf16_le_comment);
6404 tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6405 tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6406 tcase_add_test(tc_basic, test_unknown_encoding_success);
6407 tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6408 tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6409 tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6410 tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6411 tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6412 tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6413 tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6414 tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6415 tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6416 tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6417 tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6418 tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6419 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6420 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6421 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6422 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6423 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6424 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6425 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6426 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6427 tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6428 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6429 tcase_add_test(tc_basic, test_utf8_in_start_tags);
6430 tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6431 tcase_add_test(tc_basic, test_utf16_attribute);
6432 tcase_add_test(tc_basic, test_utf16_second_attr);
6433 tcase_add_test(tc_basic, test_attr_after_solidus);
6434 tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6435 tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6436 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6437 tcase_add_test(tc_basic, test_bad_doctype);
6438 tcase_add_test(tc_basic, test_bad_doctype_utf8);
6439 tcase_add_test(tc_basic, test_bad_doctype_utf16);
6440 tcase_add_test(tc_basic, test_bad_doctype_plus);
6441 tcase_add_test(tc_basic, test_bad_doctype_star);
6442 tcase_add_test(tc_basic, test_bad_doctype_query);
6443 tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6444 tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6445 tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6446 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6447 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6448 tcase_add_test(tc_basic, test_short_doctype);
6449 tcase_add_test(tc_basic, test_short_doctype_2);
6450 tcase_add_test(tc_basic, test_short_doctype_3);
6451 tcase_add_test(tc_basic, test_long_doctype);
6452 tcase_add_test(tc_basic, test_bad_entity);
6453 tcase_add_test(tc_basic, test_bad_entity_2);
6454 tcase_add_test(tc_basic, test_bad_entity_3);
6455 tcase_add_test(tc_basic, test_bad_entity_4);
6456 tcase_add_test(tc_basic, test_bad_notation);
6457 tcase_add_test(tc_basic, test_default_doctype_handler);
6458 tcase_add_test(tc_basic, test_empty_element_abort);
6459 tcase_add_test__ifdef_xml_dtd(tc_basic,
6460 test_pool_integrity_with_unfinished_attr);
6461 tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements);
6462 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity);
6463 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity);
6464 tcase_add_test__if_xml_ge(tc_basic,
6465 test_deep_nested_entity_delayed_interpretation);
6466 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6467 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2);
6468 tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6469 tcase_add_test(tc_basic, test_set_reparse_deferral);
6470 tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6471 tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6472 tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6473 tcase_add_test(tc_basic, test_set_bad_reparse_option);
6474 tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6475 tcase_add_test(tc_basic, test_varying_buffer_fills);
6476 }
6477