1 /* Tests in the "basic" test case for the Expat test suite
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net>
11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15 Copyright (c) 2017 Joe Orton <jorton@redhat.com>
16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com>
17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com>
20 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
21 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22 Licensed under the MIT license:
23
24 Permission is hereby granted, free of charge, to any person obtaining
25 a copy of this software and associated documentation files (the
26 "Software"), to deal in the Software without restriction, including
27 without limitation the rights to use, copy, modify, merge, publish,
28 distribute, sublicense, and/or sell copies of the Software, and to permit
29 persons to whom the Software is furnished to do so, subject to the
30 following conditions:
31
32 The above copyright notice and this permission notice shall be included
33 in all copies or substantial portions of the Software.
34
35 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
36 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
40 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41 USE OR OTHER DEALINGS IN THE SOFTWARE.
42 */
43
44 #if defined(NDEBUG)
45 # undef NDEBUG /* because test suite relies on assert(...) at the moment */
46 #endif
47
48 #include <assert.h>
49
50 #include <stdio.h>
51 #include <string.h>
52 #include <time.h>
53
54 #if ! defined(__cplusplus)
55 # include <stdbool.h>
56 #endif
57
58 #include "expat_config.h"
59
60 #include "expat.h"
61 #include "internal.h"
62 #include "minicheck.h"
63 #include "structdata.h"
64 #include "common.h"
65 #include "dummy.h"
66 #include "handlers.h"
67 #include "siphash.h"
68 #include "basic_tests.h"
69
70 static void
basic_setup(void)71 basic_setup(void) {
72 g_parser = XML_ParserCreate(NULL);
73 if (g_parser == NULL)
74 fail("Parser not created.");
75 }
76
77 /*
78 * Character & encoding tests.
79 */
80
START_TEST(test_nul_byte)81 START_TEST(test_nul_byte) {
82 char text[] = "<doc>\0</doc>";
83
84 /* test that a NUL byte (in US-ASCII data) is an error */
85 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
86 == XML_STATUS_OK)
87 fail("Parser did not report error on NUL-byte.");
88 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
89 xml_failure(g_parser);
90 }
91 END_TEST
92
START_TEST(test_u0000_char)93 START_TEST(test_u0000_char) {
94 /* test that a NUL byte (in US-ASCII data) is an error */
95 expect_failure("<doc>�</doc>", XML_ERROR_BAD_CHAR_REF,
96 "Parser did not report error on NUL-byte.");
97 }
98 END_TEST
99
START_TEST(test_siphash_self)100 START_TEST(test_siphash_self) {
101 if (! sip24_valid())
102 fail("SipHash self-test failed");
103 }
104 END_TEST
105
START_TEST(test_siphash_spec)106 START_TEST(test_siphash_spec) {
107 /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
108 const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
109 "\x0a\x0b\x0c\x0d\x0e";
110 const size_t len = sizeof(message) - 1;
111 const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
112 struct siphash state;
113 struct sipkey key;
114
115 sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
116 "\x0a\x0b\x0c\x0d\x0e\x0f");
117 sip24_init(&state, &key);
118
119 /* Cover spread across calls */
120 sip24_update(&state, message, 4);
121 sip24_update(&state, message + 4, len - 4);
122
123 /* Cover null length */
124 sip24_update(&state, message, 0);
125
126 if (sip24_final(&state) != expected)
127 fail("sip24_final failed spec test\n");
128
129 /* Cover wrapper */
130 if (siphash24(message, len, &key) != expected)
131 fail("siphash24 failed spec test\n");
132 }
133 END_TEST
134
START_TEST(test_bom_utf8)135 START_TEST(test_bom_utf8) {
136 /* This test is really just making sure we don't core on a UTF-8 BOM. */
137 const char *text = "\357\273\277<e/>";
138
139 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
140 == XML_STATUS_ERROR)
141 xml_failure(g_parser);
142 }
143 END_TEST
144
START_TEST(test_bom_utf16_be)145 START_TEST(test_bom_utf16_be) {
146 char text[] = "\376\377\0<\0e\0/\0>";
147
148 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
149 == XML_STATUS_ERROR)
150 xml_failure(g_parser);
151 }
152 END_TEST
153
START_TEST(test_bom_utf16_le)154 START_TEST(test_bom_utf16_le) {
155 char text[] = "\377\376<\0e\0/\0>\0";
156
157 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
158 == XML_STATUS_ERROR)
159 xml_failure(g_parser);
160 }
161 END_TEST
162
START_TEST(test_nobom_utf16_le)163 START_TEST(test_nobom_utf16_le) {
164 char text[] = " \0<\0e\0/\0>\0";
165
166 if (g_chunkSize == 1) {
167 // TODO: with just the first byte, we can't tell the difference between
168 // UTF-16-LE and UTF-8. Avoid the failure for now.
169 return;
170 }
171
172 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
173 == XML_STATUS_ERROR)
174 xml_failure(g_parser);
175 }
176 END_TEST
177
START_TEST(test_hash_collision)178 START_TEST(test_hash_collision) {
179 /* For full coverage of the lookup routine, we need to ensure a
180 * hash collision even though we can only tell that we have one
181 * through breakpoint debugging or coverage statistics. The
182 * following will cause a hash collision on machines with a 64-bit
183 * long type; others will have to experiment. The full coverage
184 * tests invoked from qa.sh usually provide a hash collision, but
185 * not always. This is an attempt to provide insurance.
186 */
187 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
188 const char *text
189 = "<doc>\n"
190 "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
191 "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
192 "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
193 "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
194 "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
195 "<d8>This triggers the table growth and collides with b2</d8>\n"
196 "</doc>\n";
197
198 XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
199 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
200 == XML_STATUS_ERROR)
201 xml_failure(g_parser);
202 }
203 END_TEST
204 #undef COLLIDING_HASH_SALT
205
206 /* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)207 START_TEST(test_danish_latin1) {
208 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
209 "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
210 #ifdef XML_UNICODE
211 const XML_Char *expected
212 = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
213 #else
214 const XML_Char *expected
215 = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
216 #endif
217 run_character_check(text, expected);
218 }
219 END_TEST
220
221 /* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)222 START_TEST(test_french_charref_hexidecimal) {
223 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
224 "<doc>éèàçêÈ</doc>";
225 #ifdef XML_UNICODE
226 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
227 #else
228 const XML_Char *expected
229 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
230 #endif
231 run_character_check(text, expected);
232 }
233 END_TEST
234
START_TEST(test_french_charref_decimal)235 START_TEST(test_french_charref_decimal) {
236 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
237 "<doc>éèàçêÈ</doc>";
238 #ifdef XML_UNICODE
239 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
240 #else
241 const XML_Char *expected
242 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
243 #endif
244 run_character_check(text, expected);
245 }
246 END_TEST
247
START_TEST(test_french_latin1)248 START_TEST(test_french_latin1) {
249 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
250 "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
251 #ifdef XML_UNICODE
252 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
253 #else
254 const XML_Char *expected
255 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
256 #endif
257 run_character_check(text, expected);
258 }
259 END_TEST
260
START_TEST(test_french_utf8)261 START_TEST(test_french_utf8) {
262 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
263 "<doc>\xC3\xA9</doc>";
264 #ifdef XML_UNICODE
265 const XML_Char *expected = XCS("\x00e9");
266 #else
267 const XML_Char *expected = XCS("\xC3\xA9");
268 #endif
269 run_character_check(text, expected);
270 }
271 END_TEST
272
273 /* Regression test for SF bug #600479.
274 XXX There should be a test that exercises all legal XML Unicode
275 characters as PCDATA and attribute value content, and XML Name
276 characters as part of element and attribute names.
277 */
START_TEST(test_utf8_false_rejection)278 START_TEST(test_utf8_false_rejection) {
279 const char *text = "<doc>\xEF\xBA\xBF</doc>";
280 #ifdef XML_UNICODE
281 const XML_Char *expected = XCS("\xfebf");
282 #else
283 const XML_Char *expected = XCS("\xEF\xBA\xBF");
284 #endif
285 run_character_check(text, expected);
286 }
287 END_TEST
288
289 /* Regression test for SF bug #477667.
290 This test assures that any 8-bit character followed by a 7-bit
291 character will not be mistakenly interpreted as a valid UTF-8
292 sequence.
293 */
START_TEST(test_illegal_utf8)294 START_TEST(test_illegal_utf8) {
295 char text[100];
296 int i;
297
298 for (i = 128; i <= 255; ++i) {
299 snprintf(text, sizeof(text), "<e>%ccd</e>", i);
300 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
301 == XML_STATUS_OK) {
302 snprintf(text, sizeof(text),
303 "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
304 i);
305 fail(text);
306 } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
307 xml_failure(g_parser);
308 /* Reset the parser since we use the same parser repeatedly. */
309 XML_ParserReset(g_parser, NULL);
310 }
311 }
312 END_TEST
313
314 /* Examples, not masks: */
315 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
316 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
317 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
318 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
319 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
320
START_TEST(test_utf8_auto_align)321 START_TEST(test_utf8_auto_align) {
322 struct TestCase {
323 ptrdiff_t expectedMovementInChars;
324 const char *input;
325 };
326
327 struct TestCase cases[] = {
328 {00, ""},
329
330 {00, UTF8_LEAD_1},
331
332 {-1, UTF8_LEAD_2},
333 {00, UTF8_LEAD_2 UTF8_FOLLOW},
334
335 {-1, UTF8_LEAD_3},
336 {-2, UTF8_LEAD_3 UTF8_FOLLOW},
337 {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
338
339 {-1, UTF8_LEAD_4},
340 {-2, UTF8_LEAD_4 UTF8_FOLLOW},
341 {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
342 {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
343 };
344
345 size_t i = 0;
346 bool success = true;
347 for (; i < sizeof(cases) / sizeof(*cases); i++) {
348 const char *fromLim = cases[i].input + strlen(cases[i].input);
349 const char *const fromLimInitially = fromLim;
350 ptrdiff_t actualMovementInChars;
351
352 _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
353
354 actualMovementInChars = (fromLim - fromLimInitially);
355 if (actualMovementInChars != cases[i].expectedMovementInChars) {
356 size_t j = 0;
357 success = false;
358 printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
359 ", actually moved by %2d chars: \"",
360 (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
361 (int)actualMovementInChars);
362 for (; j < strlen(cases[i].input); j++) {
363 printf("\\x%02x", (unsigned char)cases[i].input[j]);
364 }
365 printf("\"\n");
366 }
367 }
368
369 if (! success) {
370 fail("UTF-8 auto-alignment is not bullet-proof\n");
371 }
372 }
373 END_TEST
374
START_TEST(test_utf16)375 START_TEST(test_utf16) {
376 /* <?xml version="1.0" encoding="UTF-16"?>
377 * <doc a='123'>some {A} text</doc>
378 *
379 * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
380 */
381 char text[]
382 = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
383 "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
384 "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
385 "\000'\000?\000>\000\n"
386 "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
387 "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
388 "<\000/\000d\000o\000c\000>";
389 #ifdef XML_UNICODE
390 const XML_Char *expected = XCS("some \xff21 text");
391 #else
392 const XML_Char *expected = XCS("some \357\274\241 text");
393 #endif
394 CharData storage;
395
396 CharData_Init(&storage);
397 XML_SetUserData(g_parser, &storage);
398 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
399 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
400 == XML_STATUS_ERROR)
401 xml_failure(g_parser);
402 CharData_CheckXMLChars(&storage, expected);
403 }
404 END_TEST
405
START_TEST(test_utf16_le_epilog_newline)406 START_TEST(test_utf16_le_epilog_newline) {
407 unsigned int first_chunk_bytes = 17;
408 char text[] = "\xFF\xFE" /* BOM */
409 "<\000e\000/\000>\000" /* document element */
410 "\r\000\n\000\r\000\n\000"; /* epilog */
411
412 if (first_chunk_bytes >= sizeof(text) - 1)
413 fail("bad value of first_chunk_bytes");
414 if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE)
415 == XML_STATUS_ERROR)
416 xml_failure(g_parser);
417 else {
418 enum XML_Status rc;
419 rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
420 sizeof(text) - first_chunk_bytes - 1,
421 XML_TRUE);
422 if (rc == XML_STATUS_ERROR)
423 xml_failure(g_parser);
424 }
425 }
426 END_TEST
427
428 /* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)429 START_TEST(test_not_utf16) {
430 const char *text = "<?xml version='1.0' encoding='utf-16'?>"
431 "<doc>Hi</doc>";
432
433 /* Use a handler to provoke the appropriate code paths */
434 XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
435 expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
436 "UTF-16 declared in UTF-8 not faulted");
437 }
438 END_TEST
439
440 /* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)441 START_TEST(test_bad_encoding) {
442 const char *text = "<doc>Hi</doc>";
443
444 if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
445 fail("XML_SetEncoding failed");
446 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
447 "Unknown encoding not faulted");
448 }
449 END_TEST
450
451 /* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)452 START_TEST(test_latin1_umlauts) {
453 const char *text
454 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
455 "<e a='\xE4 \xF6 \xFC ä ö ü ä ö ü >'\n"
456 " >\xE4 \xF6 \xFC ä ö ü ä ö ü ></e>";
457 #ifdef XML_UNICODE
458 /* Expected results in UTF-16 */
459 const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
460 XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
461 #else
462 /* Expected results in UTF-8 */
463 const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
464 XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
465 #endif
466
467 run_character_check(text, expected);
468 XML_ParserReset(g_parser, NULL);
469 run_attribute_check(text, expected);
470 /* Repeat with a default handler */
471 XML_ParserReset(g_parser, NULL);
472 XML_SetDefaultHandler(g_parser, dummy_default_handler);
473 run_character_check(text, expected);
474 XML_ParserReset(g_parser, NULL);
475 XML_SetDefaultHandler(g_parser, dummy_default_handler);
476 run_attribute_check(text, expected);
477 }
478 END_TEST
479
480 /* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)481 START_TEST(test_long_utf8_character) {
482 const char *text
483 = "<?xml version='1.0' encoding='utf-8'?>\n"
484 /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
485 "<do\xf0\x90\x80\x80/>";
486 expect_failure(text, XML_ERROR_INVALID_TOKEN,
487 "4-byte UTF-8 character in element name not faulted");
488 }
489 END_TEST
490
491 /* Test that a long latin-1 attribute (too long to convert in one go)
492 * is correctly converted
493 */
START_TEST(test_long_latin1_attribute)494 START_TEST(test_long_latin1_attribute) {
495 const char *text
496 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
497 "<doc att='"
498 /* 64 characters per line */
499 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
500 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
512 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
513 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
514 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
515 /* Last character splits across a buffer boundary */
516 "\xe4'>\n</doc>";
517
518 const XML_Char *expected =
519 /* 64 characters per line */
520 /* clang-format off */
521 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
522 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
523 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
524 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
525 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
526 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
527 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
528 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
529 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
530 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
531 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
532 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
533 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
534 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
535 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
536 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
537 /* clang-format on */
538 #ifdef XML_UNICODE
539 XCS("\x00e4");
540 #else
541 XCS("\xc3\xa4");
542 #endif
543
544 run_attribute_check(text, expected);
545 }
546 END_TEST
547
548 /* Test that a long ASCII attribute (too long to convert in one go)
549 * is correctly converted
550 */
START_TEST(test_long_ascii_attribute)551 START_TEST(test_long_ascii_attribute) {
552 const char *text
553 = "<?xml version='1.0' encoding='us-ascii'?>\n"
554 "<doc att='"
555 /* 64 characters per line */
556 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
557 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
558 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
559 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
560 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
561 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
562 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
563 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
564 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
565 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
566 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
567 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
568 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
569 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
570 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
571 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
572 "01234'>\n</doc>";
573 const XML_Char *expected =
574 /* 64 characters per line */
575 /* clang-format off */
576 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
577 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
578 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
579 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
580 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
581 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
582 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
583 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
584 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
585 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
586 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
587 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
588 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
589 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
590 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
591 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
592 XCS("01234");
593 /* clang-format on */
594
595 run_attribute_check(text, expected);
596 }
597 END_TEST
598
599 /* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)600 START_TEST(test_line_number_after_parse) {
601 const char *text = "<tag>\n"
602 "\n"
603 "\n</tag>";
604 XML_Size lineno;
605
606 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
607 == XML_STATUS_ERROR)
608 xml_failure(g_parser);
609 lineno = XML_GetCurrentLineNumber(g_parser);
610 if (lineno != 4) {
611 char buffer[100];
612 snprintf(buffer, sizeof(buffer),
613 "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
614 fail(buffer);
615 }
616 }
617 END_TEST
618
619 /* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)620 START_TEST(test_column_number_after_parse) {
621 const char *text = "<tag></tag>";
622 XML_Size colno;
623
624 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
625 == XML_STATUS_ERROR)
626 xml_failure(g_parser);
627 colno = XML_GetCurrentColumnNumber(g_parser);
628 if (colno != 11) {
629 char buffer[100];
630 snprintf(buffer, sizeof(buffer),
631 "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
632 fail(buffer);
633 }
634 }
635 END_TEST
636
637 /* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)638 START_TEST(test_line_and_column_numbers_inside_handlers) {
639 const char *text = "<a>\n" /* Unix end-of-line */
640 " <b>\r\n" /* Windows end-of-line */
641 " <c/>\r" /* Mac OS end-of-line */
642 " </b>\n"
643 " <d>\n"
644 " <f/>\n"
645 " </d>\n"
646 "</a>";
647 const StructDataEntry expected[]
648 = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
649 {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
650 {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG},
651 {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
652 {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}};
653 const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
654 StructData storage;
655
656 StructData_Init(&storage);
657 XML_SetUserData(g_parser, &storage);
658 XML_SetStartElementHandler(g_parser, start_element_event_handler2);
659 XML_SetEndElementHandler(g_parser, end_element_event_handler2);
660 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
661 == XML_STATUS_ERROR)
662 xml_failure(g_parser);
663
664 StructData_CheckItems(&storage, expected, expected_count);
665 StructData_Dispose(&storage);
666 }
667 END_TEST
668
669 /* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)670 START_TEST(test_line_number_after_error) {
671 const char *text = "<a>\n"
672 " <b>\n"
673 " </a>"; /* missing </b> */
674 XML_Size lineno;
675 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
676 != XML_STATUS_ERROR)
677 fail("Expected a parse error");
678
679 lineno = XML_GetCurrentLineNumber(g_parser);
680 if (lineno != 3) {
681 char buffer[100];
682 snprintf(buffer, sizeof(buffer),
683 "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
684 fail(buffer);
685 }
686 }
687 END_TEST
688
689 /* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)690 START_TEST(test_column_number_after_error) {
691 const char *text = "<a>\n"
692 " <b>\n"
693 " </a>"; /* missing </b> */
694 XML_Size colno;
695 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
696 != XML_STATUS_ERROR)
697 fail("Expected a parse error");
698
699 colno = XML_GetCurrentColumnNumber(g_parser);
700 if (colno != 4) {
701 char buffer[100];
702 snprintf(buffer, sizeof(buffer),
703 "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
704 fail(buffer);
705 }
706 }
707 END_TEST
708
709 /* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)710 START_TEST(test_really_long_lines) {
711 /* This parses an input line longer than INIT_DATA_BUF_SIZE
712 characters long (defined to be 1024 in xmlparse.c). We take a
713 really cheesy approach to building the input buffer, because
714 this avoids writing bugs in buffer-filling code.
715 */
716 const char *text
717 = "<e>"
718 /* 64 chars */
719 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
720 /* until we have at least 1024 characters on the line: */
721 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
722 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
723 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
724 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
725 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
726 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
727 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
728 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
729 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
730 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
731 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
732 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
733 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
734 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
735 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
736 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
737 "</e>";
738 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
739 == XML_STATUS_ERROR)
740 xml_failure(g_parser);
741 }
742 END_TEST
743
744 /* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)745 START_TEST(test_really_long_encoded_lines) {
746 /* As above, except that we want to provoke an output buffer
747 * overflow with a non-trivial encoding. For this we need to pass
748 * the whole cdata in one go, not byte-by-byte.
749 */
750 void *buffer;
751 const char *text
752 = "<?xml version='1.0' encoding='iso-8859-1'?>"
753 "<e>"
754 /* 64 chars */
755 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756 /* until we have at least 1024 characters on the line: */
757 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
765 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
766 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
767 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
768 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
769 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
770 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
771 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
772 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
773 "</e>";
774 int parse_len = (int)strlen(text);
775
776 /* Need a cdata handler to provoke the code path we want to test */
777 XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
778 buffer = XML_GetBuffer(g_parser, parse_len);
779 if (buffer == NULL)
780 fail("Could not allocate parse buffer");
781 assert(buffer != NULL);
782 memcpy(buffer, text, parse_len);
783 if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
784 xml_failure(g_parser);
785 }
786 END_TEST
787
788 /*
789 * Element event tests.
790 */
791
START_TEST(test_end_element_events)792 START_TEST(test_end_element_events) {
793 const char *text = "<a><b><c/></b><d><f/></d></a>";
794 const XML_Char *expected = XCS("/c/b/f/d/a");
795 CharData storage;
796
797 CharData_Init(&storage);
798 XML_SetUserData(g_parser, &storage);
799 XML_SetEndElementHandler(g_parser, end_element_event_handler);
800 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
801 == XML_STATUS_ERROR)
802 xml_failure(g_parser);
803 CharData_CheckXMLChars(&storage, expected);
804 }
805 END_TEST
806
807 /*
808 * Attribute tests.
809 */
810
811 /* Helper used by the following tests; this checks any "attr" and "refs"
812 attributes to make sure whitespace has been normalized.
813
814 Return true if whitespace has been normalized in a string, using
815 the rules for attribute value normalization. The 'is_cdata' flag
816 is needed since CDATA attributes don't need to have multiple
817 whitespace characters collapsed to a single space, while other
818 attribute data types do. (Section 3.3.3 of the recommendation.)
819 */
820 static int
is_whitespace_normalized(const XML_Char * s,int is_cdata)821 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
822 int blanks = 0;
823 int at_start = 1;
824 while (*s) {
825 if (*s == XCS(' '))
826 ++blanks;
827 else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
828 return 0;
829 else {
830 if (at_start) {
831 at_start = 0;
832 if (blanks && ! is_cdata)
833 /* illegal leading blanks */
834 return 0;
835 } else if (blanks > 1 && ! is_cdata)
836 return 0;
837 blanks = 0;
838 }
839 ++s;
840 }
841 if (blanks && ! is_cdata)
842 return 0;
843 return 1;
844 }
845
846 /* Check the attribute whitespace checker: */
START_TEST(test_helper_is_whitespace_normalized)847 START_TEST(test_helper_is_whitespace_normalized) {
848 assert(is_whitespace_normalized(XCS("abc"), 0));
849 assert(is_whitespace_normalized(XCS("abc"), 1));
850 assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
851 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
852 assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
853 assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
854 assert(! is_whitespace_normalized(XCS("abc def ghi"), 0));
855 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
856 assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
857 assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
858 assert(! is_whitespace_normalized(XCS(" "), 0));
859 assert(is_whitespace_normalized(XCS(" "), 1));
860 assert(! is_whitespace_normalized(XCS("\t"), 0));
861 assert(! is_whitespace_normalized(XCS("\t"), 1));
862 assert(! is_whitespace_normalized(XCS("\n"), 0));
863 assert(! is_whitespace_normalized(XCS("\n"), 1));
864 assert(! is_whitespace_normalized(XCS("\r"), 0));
865 assert(! is_whitespace_normalized(XCS("\r"), 1));
866 assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
867 }
868 END_TEST
869
870 static void XMLCALL
check_attr_contains_normalized_whitespace(void * userData,const XML_Char * name,const XML_Char ** atts)871 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
872 const XML_Char **atts) {
873 int i;
874 UNUSED_P(userData);
875 UNUSED_P(name);
876 for (i = 0; atts[i] != NULL; i += 2) {
877 const XML_Char *attrname = atts[i];
878 const XML_Char *value = atts[i + 1];
879 if (xcstrcmp(XCS("attr"), attrname) == 0
880 || xcstrcmp(XCS("ents"), attrname) == 0
881 || xcstrcmp(XCS("refs"), attrname) == 0) {
882 if (! is_whitespace_normalized(value, 0)) {
883 char buffer[256];
884 snprintf(buffer, sizeof(buffer),
885 "attribute value not normalized: %" XML_FMT_STR
886 "='%" XML_FMT_STR "'",
887 attrname, value);
888 fail(buffer);
889 }
890 }
891 }
892 }
893
START_TEST(test_attr_whitespace_normalization)894 START_TEST(test_attr_whitespace_normalization) {
895 const char *text
896 = "<!DOCTYPE doc [\n"
897 " <!ATTLIST doc\n"
898 " attr NMTOKENS #REQUIRED\n"
899 " ents ENTITIES #REQUIRED\n"
900 " refs IDREFS #REQUIRED>\n"
901 "]>\n"
902 "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n"
903 " ents=' ent-1 \t\r\n"
904 " ent-2 ' >\n"
905 " <e id='id-1'/>\n"
906 " <e id='id-2'/>\n"
907 "</doc>";
908
909 XML_SetStartElementHandler(g_parser,
910 check_attr_contains_normalized_whitespace);
911 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
912 == XML_STATUS_ERROR)
913 xml_failure(g_parser);
914 }
915 END_TEST
916
917 /*
918 * XML declaration tests.
919 */
920
START_TEST(test_xmldecl_misplaced)921 START_TEST(test_xmldecl_misplaced) {
922 expect_failure("\n"
923 "<?xml version='1.0'?>\n"
924 "<a/>",
925 XML_ERROR_MISPLACED_XML_PI,
926 "failed to report misplaced XML declaration");
927 }
928 END_TEST
929
START_TEST(test_xmldecl_invalid)930 START_TEST(test_xmldecl_invalid) {
931 expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
932 "Failed to report invalid XML declaration");
933 }
934 END_TEST
935
START_TEST(test_xmldecl_missing_attr)936 START_TEST(test_xmldecl_missing_attr) {
937 expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
938 "Failed to report missing XML declaration attribute");
939 }
940 END_TEST
941
START_TEST(test_xmldecl_missing_value)942 START_TEST(test_xmldecl_missing_value) {
943 expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
944 "<doc/>",
945 XML_ERROR_XML_DECL,
946 "Failed to report missing attribute value");
947 }
948 END_TEST
949
950 /* Regression test for SF bug #584832. */
START_TEST(test_unknown_encoding_internal_entity)951 START_TEST(test_unknown_encoding_internal_entity) {
952 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
953 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
954 "<test a='&foo;'/>";
955
956 XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
957 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
958 == XML_STATUS_ERROR)
959 xml_failure(g_parser);
960 }
961 END_TEST
962
963 /* Test unrecognised encoding handler */
START_TEST(test_unrecognised_encoding_internal_entity)964 START_TEST(test_unrecognised_encoding_internal_entity) {
965 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
966 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
967 "<test a='&foo;'/>";
968
969 XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
970 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
971 != XML_STATUS_ERROR)
972 fail("Unrecognised encoding not rejected");
973 }
974 END_TEST
975
976 /* Regression test for SF bug #620106. */
START_TEST(test_ext_entity_set_encoding)977 START_TEST(test_ext_entity_set_encoding) {
978 const char *text = "<!DOCTYPE doc [\n"
979 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
980 "]>\n"
981 "<doc>&en;</doc>";
982 ExtTest test_data
983 = {/* This text says it's an unsupported encoding, but it's really
984 UTF-8, which we tell Expat using XML_SetEncoding().
985 */
986 "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
987 #ifdef XML_UNICODE
988 const XML_Char *expected = XCS("\x00e9");
989 #else
990 const XML_Char *expected = XCS("\xc3\xa9");
991 #endif
992
993 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
994 run_ext_character_check(text, &test_data, expected);
995 }
996 END_TEST
997
998 /* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)999 START_TEST(test_ext_entity_no_handler) {
1000 const char *text = "<!DOCTYPE doc [\n"
1001 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1002 "]>\n"
1003 "<doc>&en;</doc>";
1004
1005 XML_SetDefaultHandler(g_parser, dummy_default_handler);
1006 run_character_check(text, XCS(""));
1007 }
1008 END_TEST
1009
1010 /* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)1011 START_TEST(test_ext_entity_set_bom) {
1012 const char *text = "<!DOCTYPE doc [\n"
1013 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1014 "]>\n"
1015 "<doc>&en;</doc>";
1016 ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1017 "<?xml encoding='iso-8859-3'?>"
1018 "\xC3\xA9",
1019 XCS("utf-8"), NULL};
1020 #ifdef XML_UNICODE
1021 const XML_Char *expected = XCS("\x00e9");
1022 #else
1023 const XML_Char *expected = XCS("\xc3\xa9");
1024 #endif
1025
1026 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1027 run_ext_character_check(text, &test_data, expected);
1028 }
1029 END_TEST
1030
1031 /* Test that bad encodings are faulted */
START_TEST(test_ext_entity_bad_encoding)1032 START_TEST(test_ext_entity_bad_encoding) {
1033 const char *text = "<!DOCTYPE doc [\n"
1034 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1035 "]>\n"
1036 "<doc>&en;</doc>";
1037 ExtFaults fault
1038 = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1039 XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1040
1041 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1042 XML_SetUserData(g_parser, &fault);
1043 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1044 "Bad encoding should not have been accepted");
1045 }
1046 END_TEST
1047
1048 /* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)1049 START_TEST(test_ext_entity_bad_encoding_2) {
1050 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1051 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1052 "<doc>&entity;</doc>";
1053 ExtFaults fault
1054 = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1055 XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1056
1057 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1058 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1059 XML_SetUserData(g_parser, &fault);
1060 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1061 "Bad encoding not faulted in external entity handler");
1062 }
1063 END_TEST
1064
1065 /* Test that no error is reported for unknown entities if we don't
1066 read an external subset. This was fixed in Expat 1.95.5.
1067 */
START_TEST(test_wfc_undeclared_entity_unread_external_subset)1068 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1069 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1070 "<doc>&entity;</doc>";
1071
1072 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1073 == XML_STATUS_ERROR)
1074 xml_failure(g_parser);
1075 }
1076 END_TEST
1077
1078 /* Test that an error is reported for unknown entities if we don't
1079 have an external subset.
1080 */
START_TEST(test_wfc_undeclared_entity_no_external_subset)1081 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1082 expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1083 "Parser did not report undefined entity w/out a DTD.");
1084 }
1085 END_TEST
1086
1087 /* Test that an error is reported for unknown entities if we don't
1088 read an external subset, but have been declared standalone.
1089 */
START_TEST(test_wfc_undeclared_entity_standalone)1090 START_TEST(test_wfc_undeclared_entity_standalone) {
1091 const char *text
1092 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1093 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1094 "<doc>&entity;</doc>";
1095
1096 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1097 "Parser did not report undefined entity (standalone).");
1098 }
1099 END_TEST
1100
1101 /* Test that an error is reported for unknown entities if we have read
1102 an external subset, and standalone is true.
1103 */
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone)1104 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1105 const char *text
1106 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1107 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1108 "<doc>&entity;</doc>";
1109 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1110
1111 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1112 XML_SetUserData(g_parser, &test_data);
1113 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1114 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1115 "Parser did not report undefined entity (external DTD).");
1116 }
1117 END_TEST
1118
1119 /* Test that external entity handling is not done if the parsing flag
1120 * is set to UNLESS_STANDALONE
1121 */
START_TEST(test_entity_with_external_subset_unless_standalone)1122 START_TEST(test_entity_with_external_subset_unless_standalone) {
1123 const char *text
1124 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1125 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1126 "<doc>&entity;</doc>";
1127 ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1128
1129 XML_SetParamEntityParsing(g_parser,
1130 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1131 XML_SetUserData(g_parser, &test_data);
1132 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1133 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1134 "Parser did not report undefined entity");
1135 }
1136 END_TEST
1137
1138 /* Test that no error is reported for unknown entities if we have read
1139 an external subset, and standalone is false.
1140 */
START_TEST(test_wfc_undeclared_entity_with_external_subset)1141 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1142 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1143 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1144 "<doc>&entity;</doc>";
1145 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1146
1147 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1148 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1149 run_ext_character_check(text, &test_data, XCS(""));
1150 }
1151 END_TEST
1152
1153 /* Test that an error is reported if our NotStandalone handler fails */
START_TEST(test_not_standalone_handler_reject)1154 START_TEST(test_not_standalone_handler_reject) {
1155 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1156 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1157 "<doc>&entity;</doc>";
1158 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1159
1160 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1161 XML_SetUserData(g_parser, &test_data);
1162 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1163 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1164 expect_failure(text, XML_ERROR_NOT_STANDALONE,
1165 "NotStandalone handler failed to reject");
1166
1167 /* Try again but without external entity handling */
1168 XML_ParserReset(g_parser, NULL);
1169 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1170 expect_failure(text, XML_ERROR_NOT_STANDALONE,
1171 "NotStandalone handler failed to reject");
1172 }
1173 END_TEST
1174
1175 /* Test that no error is reported if our NotStandalone handler succeeds */
START_TEST(test_not_standalone_handler_accept)1176 START_TEST(test_not_standalone_handler_accept) {
1177 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1178 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1179 "<doc>&entity;</doc>";
1180 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1181
1182 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1183 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1184 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1185 run_ext_character_check(text, &test_data, XCS(""));
1186
1187 /* Repeat without the external entity handler */
1188 XML_ParserReset(g_parser, NULL);
1189 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1190 run_character_check(text, XCS(""));
1191 }
1192 END_TEST
1193
START_TEST(test_wfc_no_recursive_entity_refs)1194 START_TEST(test_wfc_no_recursive_entity_refs) {
1195 const char *text = "<!DOCTYPE doc [\n"
1196 " <!ENTITY entity '&entity;'>\n"
1197 "]>\n"
1198 "<doc>&entity;</doc>";
1199
1200 expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1201 "Parser did not report recursive entity reference.");
1202 }
1203 END_TEST
1204
START_TEST(test_recursive_external_parameter_entity_2)1205 START_TEST(test_recursive_external_parameter_entity_2) {
1206 struct TestCase {
1207 const char *doc;
1208 enum XML_Status expectedStatus;
1209 };
1210
1211 struct TestCase cases[] = {
1212 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1213 {"<!ENTITY % p1 '%p1;'>"
1214 "<!ENTITY % p1 'first declaration wins'>",
1215 XML_STATUS_ERROR},
1216 {"<!ENTITY % p1 'first declaration wins'>"
1217 "<!ENTITY % p1 '%p1;'>",
1218 XML_STATUS_OK},
1219 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_OK},
1220 };
1221
1222 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1223 const char *const doc = cases[i].doc;
1224 const enum XML_Status expectedStatus = cases[i].expectedStatus;
1225 set_subtest("%s", doc);
1226
1227 XML_Parser parser = XML_ParserCreate(NULL);
1228 assert_true(parser != NULL);
1229
1230 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1231 assert_true(ext_parser != NULL);
1232
1233 const enum XML_Status actualStatus
1234 = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1235
1236 assert_true(actualStatus == expectedStatus);
1237 if (actualStatus != XML_STATUS_OK) {
1238 assert_true(XML_GetErrorCode(ext_parser)
1239 == XML_ERROR_RECURSIVE_ENTITY_REF);
1240 }
1241
1242 XML_ParserFree(ext_parser);
1243 XML_ParserFree(parser);
1244 }
1245 }
1246 END_TEST
1247
1248 /* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)1249 START_TEST(test_ext_entity_invalid_parse) {
1250 const char *text = "<!DOCTYPE doc [\n"
1251 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1252 "]>\n"
1253 "<doc>&en;</doc>";
1254 const ExtFaults faults[]
1255 = {{"<", "Incomplete element declaration not faulted", NULL,
1256 XML_ERROR_UNCLOSED_TOKEN},
1257 {"<\xe2\x82", /* First two bytes of a three-byte char */
1258 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1259 {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1260 XML_ERROR_PARTIAL_CHAR},
1261 {NULL, NULL, NULL, XML_ERROR_NONE}};
1262 const ExtFaults *fault = faults;
1263
1264 for (; fault->parse_text != NULL; fault++) {
1265 set_subtest("\"%s\"", fault->parse_text);
1266 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1267 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1268 XML_SetUserData(g_parser, (void *)fault);
1269 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1270 "Parser did not report external entity error");
1271 XML_ParserReset(g_parser, NULL);
1272 }
1273 }
1274 END_TEST
1275
1276 /* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)1277 START_TEST(test_dtd_default_handling) {
1278 const char *text = "<!DOCTYPE doc [\n"
1279 "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1280 "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1281 "<!ELEMENT doc EMPTY>\n"
1282 "<!ATTLIST doc a CDATA #IMPLIED>\n"
1283 "<?pi in dtd?>\n"
1284 "<!--comment in dtd-->\n"
1285 "]><doc/>";
1286
1287 XML_SetDefaultHandler(g_parser, accumulate_characters);
1288 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1289 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1290 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1291 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1292 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1293 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1294 XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1295 XML_SetCommentHandler(g_parser, dummy_comment_handler);
1296 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1297 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1298 run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1299 }
1300 END_TEST
1301
1302 /* Test handling of attribute declarations */
START_TEST(test_dtd_attr_handling)1303 START_TEST(test_dtd_attr_handling) {
1304 const char *prolog = "<!DOCTYPE doc [\n"
1305 "<!ELEMENT doc EMPTY>\n";
1306 AttTest attr_data[]
1307 = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1308 "]>"
1309 "<doc a='two'/>",
1310 XCS("doc"), XCS("a"),
1311 XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1312 NULL, XML_TRUE},
1313 {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1314 "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1315 "]>"
1316 "<doc/>",
1317 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1318 {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1319 "]>"
1320 "<doc/>",
1321 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1322 {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1323 "]>"
1324 "<doc/>",
1325 XCS("doc"), XCS("a"), XCS("CDATA"),
1326 #ifdef XML_UNICODE
1327 XCS("\x06f2"),
1328 #else
1329 XCS("\xdb\xb2"),
1330 #endif
1331 XML_FALSE},
1332 {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1333 AttTest *test;
1334
1335 for (test = attr_data; test->definition != NULL; test++) {
1336 set_subtest("%s", test->definition);
1337 XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1338 XML_SetUserData(g_parser, test);
1339 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1340 XML_FALSE)
1341 == XML_STATUS_ERROR)
1342 xml_failure(g_parser);
1343 if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1344 (int)strlen(test->definition), XML_TRUE)
1345 == XML_STATUS_ERROR)
1346 xml_failure(g_parser);
1347 XML_ParserReset(g_parser, NULL);
1348 }
1349 }
1350 END_TEST
1351
1352 /* See related SF bug #673791.
1353 When namespace processing is enabled, setting the namespace URI for
1354 a prefix is not allowed; this test ensures that it *is* allowed
1355 when namespace processing is not enabled.
1356 (See Namespaces in XML, section 2.)
1357 */
START_TEST(test_empty_ns_without_namespaces)1358 START_TEST(test_empty_ns_without_namespaces) {
1359 const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1360 " <e xmlns:prefix=''/>\n"
1361 "</doc>";
1362
1363 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1364 == XML_STATUS_ERROR)
1365 xml_failure(g_parser);
1366 }
1367 END_TEST
1368
1369 /* Regression test for SF bug #824420.
1370 Checks that an xmlns:prefix attribute set in an attribute's default
1371 value isn't misinterpreted.
1372 */
START_TEST(test_ns_in_attribute_default_without_namespaces)1373 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1374 const char *text = "<!DOCTYPE e:element [\n"
1375 " <!ATTLIST e:element\n"
1376 " xmlns:e CDATA 'http://example.org/'>\n"
1377 " ]>\n"
1378 "<e:element/>";
1379
1380 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1381 == XML_STATUS_ERROR)
1382 xml_failure(g_parser);
1383 }
1384 END_TEST
1385
1386 /* Regression test for SF bug #1515266: missing check of stopped
1387 parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)1388 START_TEST(test_stop_parser_between_char_data_calls) {
1389 /* The sample data must be big enough that there are two calls to
1390 the character data handler from within the inner "for" loop of
1391 the XML_TOK_DATA_CHARS case in doContent(), and the character
1392 handler must stop the parser and clear the character data
1393 handler.
1394 */
1395 const char *text = long_character_data_text;
1396
1397 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1398 g_resumable = XML_FALSE;
1399 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1400 != XML_STATUS_ERROR)
1401 xml_failure(g_parser);
1402 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1403 xml_failure(g_parser);
1404 }
1405 END_TEST
1406
1407 /* Regression test for SF bug #1515266: missing check of stopped
1408 parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)1409 START_TEST(test_suspend_parser_between_char_data_calls) {
1410 /* The sample data must be big enough that there are two calls to
1411 the character data handler from within the inner "for" loop of
1412 the XML_TOK_DATA_CHARS case in doContent(), and the character
1413 handler must stop the parser and clear the character data
1414 handler.
1415 */
1416 const char *text = long_character_data_text;
1417
1418 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1419 g_resumable = XML_TRUE;
1420 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1421 != XML_STATUS_SUSPENDED)
1422 xml_failure(g_parser);
1423 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1424 xml_failure(g_parser);
1425 /* Try parsing directly */
1426 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1427 != XML_STATUS_ERROR)
1428 fail("Attempt to continue parse while suspended not faulted");
1429 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1430 fail("Suspended parse not faulted with correct error");
1431 }
1432 END_TEST
1433
1434 /* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)1435 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1436 const char *text = long_character_data_text;
1437
1438 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1439 g_resumable = XML_FALSE;
1440 g_abortable = XML_FALSE;
1441 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1442 != XML_STATUS_ERROR)
1443 fail("Failed to double-stop parser");
1444
1445 XML_ParserReset(g_parser, NULL);
1446 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1447 g_resumable = XML_TRUE;
1448 g_abortable = XML_FALSE;
1449 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1450 != XML_STATUS_SUSPENDED)
1451 fail("Failed to double-suspend parser");
1452
1453 XML_ParserReset(g_parser, NULL);
1454 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1455 g_resumable = XML_TRUE;
1456 g_abortable = XML_TRUE;
1457 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1458 != XML_STATUS_ERROR)
1459 fail("Failed to suspend-abort parser");
1460 }
1461 END_TEST
1462
START_TEST(test_good_cdata_ascii)1463 START_TEST(test_good_cdata_ascii) {
1464 const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1465 const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1466
1467 CharData storage;
1468 CharData_Init(&storage);
1469 XML_SetUserData(g_parser, &storage);
1470 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1471 /* Add start and end handlers for coverage */
1472 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1473 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1474
1475 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1476 == XML_STATUS_ERROR)
1477 xml_failure(g_parser);
1478 CharData_CheckXMLChars(&storage, expected);
1479
1480 /* Try again, this time with a default handler */
1481 XML_ParserReset(g_parser, NULL);
1482 CharData_Init(&storage);
1483 XML_SetUserData(g_parser, &storage);
1484 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1485 XML_SetDefaultHandler(g_parser, dummy_default_handler);
1486
1487 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1488 == XML_STATUS_ERROR)
1489 xml_failure(g_parser);
1490 CharData_CheckXMLChars(&storage, expected);
1491 }
1492 END_TEST
1493
START_TEST(test_good_cdata_utf16)1494 START_TEST(test_good_cdata_utf16) {
1495 /* Test data is:
1496 * <?xml version='1.0' encoding='utf-16'?>
1497 * <a><![CDATA[hello]]></a>
1498 */
1499 const char text[]
1500 = "\0<\0?\0x\0m\0l\0"
1501 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1502 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1503 "1\0"
1504 "6\0'"
1505 "\0?\0>\0\n"
1506 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1507 const XML_Char *expected = XCS("hello");
1508
1509 CharData storage;
1510 CharData_Init(&storage);
1511 XML_SetUserData(g_parser, &storage);
1512 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1513
1514 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1515 == XML_STATUS_ERROR)
1516 xml_failure(g_parser);
1517 CharData_CheckXMLChars(&storage, expected);
1518 }
1519 END_TEST
1520
START_TEST(test_good_cdata_utf16_le)1521 START_TEST(test_good_cdata_utf16_le) {
1522 /* Test data is:
1523 * <?xml version='1.0' encoding='utf-16'?>
1524 * <a><![CDATA[hello]]></a>
1525 */
1526 const char text[]
1527 = "<\0?\0x\0m\0l\0"
1528 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1529 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1530 "1\0"
1531 "6\0'"
1532 "\0?\0>\0\n"
1533 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1534 const XML_Char *expected = XCS("hello");
1535
1536 CharData storage;
1537 CharData_Init(&storage);
1538 XML_SetUserData(g_parser, &storage);
1539 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1540
1541 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1542 == XML_STATUS_ERROR)
1543 xml_failure(g_parser);
1544 CharData_CheckXMLChars(&storage, expected);
1545 }
1546 END_TEST
1547
1548 /* Test UTF16 conversion of a long cdata string */
1549
1550 /* 16 characters: handy macro to reduce visual clutter */
1551 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1552
START_TEST(test_long_cdata_utf16)1553 START_TEST(test_long_cdata_utf16) {
1554 /* Test data is:
1555 * <?xlm version='1.0' encoding='utf-16'?>
1556 * <a><![CDATA[
1557 * ABCDEFGHIJKLMNOP
1558 * ]]></a>
1559 */
1560 const char text[]
1561 = "\0<\0?\0x\0m\0l\0 "
1562 "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1563 "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1564 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1565 /* 64 characters per line */
1566 /* clang-format off */
1567 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1568 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1569 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1570 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1571 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1572 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1573 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1574 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1575 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1576 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1577 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1578 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1579 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1580 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1581 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1582 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1583 A_TO_P_IN_UTF16
1584 /* clang-format on */
1585 "\0]\0]\0>\0<\0/\0a\0>";
1586 const XML_Char *expected =
1587 /* clang-format off */
1588 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1589 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1590 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1591 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1592 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1593 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1594 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1595 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1596 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1597 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1598 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1599 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1600 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1601 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1602 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1603 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1604 XCS("ABCDEFGHIJKLMNOP");
1605 /* clang-format on */
1606 CharData storage;
1607 void *buffer;
1608
1609 CharData_Init(&storage);
1610 XML_SetUserData(g_parser, &storage);
1611 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1612 buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1613 if (buffer == NULL)
1614 fail("Could not allocate parse buffer");
1615 assert(buffer != NULL);
1616 memcpy(buffer, text, sizeof(text) - 1);
1617 if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1618 xml_failure(g_parser);
1619 CharData_CheckXMLChars(&storage, expected);
1620 }
1621 END_TEST
1622
1623 /* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)1624 START_TEST(test_multichar_cdata_utf16) {
1625 /* Test data is:
1626 * <?xml version='1.0' encoding='utf-16'?>
1627 * <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1628 *
1629 * where {MINIM} is U+1d15e (a minim or half-note)
1630 * UTF-16: 0xd834 0xdd5e
1631 * UTF-8: 0xf0 0x9d 0x85 0x9e
1632 * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1633 * UTF-16: 0xd834 0xdd5f
1634 * UTF-8: 0xf0 0x9d 0x85 0x9f
1635 */
1636 const char text[] = "\0<\0?\0x\0m\0l\0"
1637 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1638 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1639 "1\0"
1640 "6\0'"
1641 "\0?\0>\0\n"
1642 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1643 "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1644 "\0]\0]\0>\0<\0/\0a\0>";
1645 #ifdef XML_UNICODE
1646 const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1647 #else
1648 const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1649 #endif
1650 CharData storage;
1651
1652 CharData_Init(&storage);
1653 XML_SetUserData(g_parser, &storage);
1654 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1655
1656 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1657 == XML_STATUS_ERROR)
1658 xml_failure(g_parser);
1659 CharData_CheckXMLChars(&storage, expected);
1660 }
1661 END_TEST
1662
1663 /* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)1664 START_TEST(test_utf16_bad_surrogate_pair) {
1665 /* Test data is:
1666 * <?xml version='1.0' encoding='utf-16'?>
1667 * <a><![CDATA[{BADLINB}]]></a>
1668 *
1669 * where {BADLINB} is U+10000 (the first Linear B character)
1670 * with the UTF-16 surrogate pair in the wrong order, i.e.
1671 * 0xdc00 0xd800
1672 */
1673 const char text[] = "\0<\0?\0x\0m\0l\0"
1674 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1675 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1676 "1\0"
1677 "6\0'"
1678 "\0?\0>\0\n"
1679 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1680 "\xdc\x00\xd8\x00"
1681 "\0]\0]\0>\0<\0/\0a\0>";
1682
1683 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1684 != XML_STATUS_ERROR)
1685 fail("Reversed UTF-16 surrogate pair not faulted");
1686 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1687 xml_failure(g_parser);
1688 }
1689 END_TEST
1690
START_TEST(test_bad_cdata)1691 START_TEST(test_bad_cdata) {
1692 struct CaseData {
1693 const char *text;
1694 enum XML_Error expectedError;
1695 };
1696
1697 struct CaseData cases[]
1698 = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1699 {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1700 {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1701 {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1702 {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1703 {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1704 {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1705 {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1706
1707 {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1708 {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1709 {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1710
1711 {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1712 {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1713 {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1714 {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1715 {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1716 {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1717 {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1718
1719 {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1720 {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1721 {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1722
1723 size_t i = 0;
1724 for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1725 set_subtest("%s", cases[i].text);
1726 const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1727 g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1728 const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1729
1730 assert(actualStatus == XML_STATUS_ERROR);
1731
1732 if (actualError != cases[i].expectedError) {
1733 char message[100];
1734 snprintf(message, sizeof(message),
1735 "Expected error %d but got error %d for case %u: \"%s\"\n",
1736 cases[i].expectedError, actualError, (unsigned int)i + 1,
1737 cases[i].text);
1738 fail(message);
1739 }
1740
1741 XML_ParserReset(g_parser, NULL);
1742 }
1743 }
1744 END_TEST
1745
1746 /* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)1747 START_TEST(test_bad_cdata_utf16) {
1748 struct CaseData {
1749 size_t text_bytes;
1750 const char *text;
1751 enum XML_Error expected_error;
1752 };
1753
1754 const char prolog[] = "\0<\0?\0x\0m\0l\0"
1755 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1756 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1757 "1\0"
1758 "6\0'"
1759 "\0?\0>\0\n"
1760 "\0<\0a\0>";
1761 struct CaseData cases[] = {
1762 {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1763 {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1764 {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1765 {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1766 {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1767 {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1768 {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1769 {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1770 {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1771 {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1772 {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1773 {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1774 {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1775 {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1776 {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1777 {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1778 {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1779 {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1780 {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1781 {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1782 /* Now add a four-byte UTF-16 character */
1783 {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1784 XML_ERROR_UNCLOSED_CDATA_SECTION},
1785 {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1786 {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1787 XML_ERROR_PARTIAL_CHAR},
1788 {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1789 XML_ERROR_UNCLOSED_CDATA_SECTION}};
1790 size_t i;
1791
1792 for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1793 set_subtest("case %lu", (long unsigned)(i + 1));
1794 enum XML_Status actual_status;
1795 enum XML_Error actual_error;
1796
1797 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1798 XML_FALSE)
1799 == XML_STATUS_ERROR)
1800 xml_failure(g_parser);
1801 actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1802 (int)cases[i].text_bytes, XML_TRUE);
1803 assert(actual_status == XML_STATUS_ERROR);
1804 actual_error = XML_GetErrorCode(g_parser);
1805 if (actual_error != cases[i].expected_error) {
1806 char message[1024];
1807
1808 snprintf(message, sizeof(message),
1809 "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1810 ") for case %lu\n",
1811 cases[i].expected_error,
1812 XML_ErrorString(cases[i].expected_error), actual_error,
1813 XML_ErrorString(actual_error), (long unsigned)(i + 1));
1814 fail(message);
1815 }
1816 XML_ParserReset(g_parser, NULL);
1817 }
1818 }
1819 END_TEST
1820
1821 /* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)1822 START_TEST(test_stop_parser_between_cdata_calls) {
1823 const char *text = long_cdata_text;
1824
1825 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1826 g_resumable = XML_FALSE;
1827 expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1828 }
1829 END_TEST
1830
1831 /* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)1832 START_TEST(test_suspend_parser_between_cdata_calls) {
1833 const char *text = long_cdata_text;
1834 enum XML_Status result;
1835
1836 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1837 g_resumable = XML_TRUE;
1838 result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
1839 if (result != XML_STATUS_SUSPENDED) {
1840 if (result == XML_STATUS_ERROR)
1841 xml_failure(g_parser);
1842 fail("Parse not suspended in CDATA handler");
1843 }
1844 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1845 xml_failure(g_parser);
1846 }
1847 END_TEST
1848
1849 /* Test memory allocation functions */
START_TEST(test_memory_allocation)1850 START_TEST(test_memory_allocation) {
1851 char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1852 char *p;
1853
1854 if (buffer == NULL) {
1855 fail("Allocation failed");
1856 } else {
1857 /* Try writing to memory; some OSes try to cheat! */
1858 buffer[0] = 'T';
1859 buffer[1] = 'E';
1860 buffer[2] = 'S';
1861 buffer[3] = 'T';
1862 buffer[4] = '\0';
1863 if (strcmp(buffer, "TEST") != 0) {
1864 fail("Memory not writable");
1865 } else {
1866 p = (char *)XML_MemRealloc(g_parser, buffer, 512);
1867 if (p == NULL) {
1868 fail("Reallocation failed");
1869 } else {
1870 /* Write again, just to be sure */
1871 buffer = p;
1872 buffer[0] = 'V';
1873 if (strcmp(buffer, "VEST") != 0) {
1874 fail("Reallocated memory not writable");
1875 }
1876 }
1877 }
1878 XML_MemFree(g_parser, buffer);
1879 }
1880 }
1881 END_TEST
1882
1883 /* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)1884 START_TEST(test_default_current) {
1885 const char *text = "<doc>hell]</doc>";
1886 const char *entity_text = "<!DOCTYPE doc [\n"
1887 "<!ENTITY entity '%'>\n"
1888 "]>\n"
1889 "<doc>&entity;</doc>";
1890
1891 set_subtest("with defaulting");
1892 {
1893 struct handler_record_list storage;
1894 storage.count = 0;
1895 XML_SetDefaultHandler(g_parser, record_default_handler);
1896 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1897 XML_SetUserData(g_parser, &storage);
1898 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1899 == XML_STATUS_ERROR)
1900 xml_failure(g_parser);
1901 int i = 0;
1902 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1903 // we should have gotten one or more cdata callbacks, totaling 5 chars
1904 int cdata_len_remaining = 5;
1905 while (cdata_len_remaining > 0) {
1906 const struct handler_record_entry *c_entry
1907 = handler_record_get(&storage, i++);
1908 assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
1909 assert_true(c_entry->arg > 0);
1910 assert_true(c_entry->arg <= cdata_len_remaining);
1911 cdata_len_remaining -= c_entry->arg;
1912 // default handler must follow, with the exact same len argument.
1913 assert_record_handler_called(&storage, i++, "record_default_handler",
1914 c_entry->arg);
1915 }
1916 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1917 assert_true(storage.count == i);
1918 }
1919
1920 /* Again, without the defaulting */
1921 set_subtest("no defaulting");
1922 {
1923 struct handler_record_list storage;
1924 storage.count = 0;
1925 XML_ParserReset(g_parser, NULL);
1926 XML_SetDefaultHandler(g_parser, record_default_handler);
1927 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
1928 XML_SetUserData(g_parser, &storage);
1929 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1930 == XML_STATUS_ERROR)
1931 xml_failure(g_parser);
1932 int i = 0;
1933 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1934 // we should have gotten one or more cdata callbacks, totaling 5 chars
1935 int cdata_len_remaining = 5;
1936 while (cdata_len_remaining > 0) {
1937 const struct handler_record_entry *c_entry
1938 = handler_record_get(&storage, i++);
1939 assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
1940 assert_true(c_entry->arg > 0);
1941 assert_true(c_entry->arg <= cdata_len_remaining);
1942 cdata_len_remaining -= c_entry->arg;
1943 }
1944 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1945 assert_true(storage.count == i);
1946 }
1947
1948 /* Now with an internal entity to complicate matters */
1949 set_subtest("with internal entity");
1950 {
1951 struct handler_record_list storage;
1952 storage.count = 0;
1953 XML_ParserReset(g_parser, NULL);
1954 XML_SetDefaultHandler(g_parser, record_default_handler);
1955 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1956 XML_SetUserData(g_parser, &storage);
1957 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1958 XML_TRUE)
1959 == XML_STATUS_ERROR)
1960 xml_failure(g_parser);
1961 /* The default handler suppresses the entity */
1962 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1963 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1964 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1965 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1966 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1967 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1968 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1969 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1970 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1971 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1972 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1973 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1974 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1975 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1976 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1977 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1978 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1979 assert_record_handler_called(&storage, 17, "record_default_handler", 8);
1980 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1981 assert_true(storage.count == 19);
1982 }
1983
1984 /* Again, with a skip handler */
1985 set_subtest("with skip handler");
1986 {
1987 struct handler_record_list storage;
1988 storage.count = 0;
1989 XML_ParserReset(g_parser, NULL);
1990 XML_SetDefaultHandler(g_parser, record_default_handler);
1991 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1992 XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
1993 XML_SetUserData(g_parser, &storage);
1994 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1995 XML_TRUE)
1996 == XML_STATUS_ERROR)
1997 xml_failure(g_parser);
1998 /* The default handler suppresses the entity */
1999 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2000 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2001 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2002 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2003 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2004 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2005 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2006 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2007 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2008 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2009 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2010 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2011 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2012 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2013 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2014 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2015 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2016 assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2017 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2018 assert_true(storage.count == 19);
2019 }
2020
2021 /* This time, allow the entity through */
2022 set_subtest("allow entity");
2023 {
2024 struct handler_record_list storage;
2025 storage.count = 0;
2026 XML_ParserReset(g_parser, NULL);
2027 XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2028 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2029 XML_SetUserData(g_parser, &storage);
2030 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2031 XML_TRUE)
2032 == XML_STATUS_ERROR)
2033 xml_failure(g_parser);
2034 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2035 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2036 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2037 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2038 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2039 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2040 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2041 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2042 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2043 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2044 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2045 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2046 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2047 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2048 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2049 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2050 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2051 assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2052 assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2053 assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2054 assert_true(storage.count == 20);
2055 }
2056
2057 /* Finally, without passing the cdata to the default handler */
2058 set_subtest("not passing cdata");
2059 {
2060 struct handler_record_list storage;
2061 storage.count = 0;
2062 XML_ParserReset(g_parser, NULL);
2063 XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2064 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2065 XML_SetUserData(g_parser, &storage);
2066 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2067 XML_TRUE)
2068 == XML_STATUS_ERROR)
2069 xml_failure(g_parser);
2070 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2071 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2072 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2073 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2074 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2075 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2076 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2077 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2078 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2079 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2080 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2081 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2082 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2083 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2084 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2085 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2086 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2087 assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2088 1);
2089 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2090 assert_true(storage.count == 19);
2091 }
2092 }
2093 END_TEST
2094
2095 /* Test DTD element parsing code paths */
START_TEST(test_dtd_elements)2096 START_TEST(test_dtd_elements) {
2097 const char *text = "<!DOCTYPE doc [\n"
2098 "<!ELEMENT doc (chapter)>\n"
2099 "<!ELEMENT chapter (#PCDATA)>\n"
2100 "]>\n"
2101 "<doc><chapter>Wombats are go</chapter></doc>";
2102
2103 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2104 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2105 == XML_STATUS_ERROR)
2106 xml_failure(g_parser);
2107 }
2108 END_TEST
2109
2110 static void XMLCALL
element_decl_check_model(void * userData,const XML_Char * name,XML_Content * model)2111 element_decl_check_model(void *userData, const XML_Char *name,
2112 XML_Content *model) {
2113 UNUSED_P(userData);
2114 uint32_t errorFlags = 0;
2115
2116 /* Expected model array structure is this:
2117 * [0] (type 6, quant 0)
2118 * [1] (type 5, quant 0)
2119 * [3] (type 4, quant 0, name "bar")
2120 * [4] (type 4, quant 0, name "foo")
2121 * [5] (type 4, quant 3, name "xyz")
2122 * [2] (type 4, quant 2, name "zebra")
2123 */
2124 errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2125 errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2126
2127 if (model != NULL) {
2128 errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2129 errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2130 errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2131 errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2132 errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2133
2134 errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2135 errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2136 errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2137 errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2138 errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2139
2140 errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2141 errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2142 errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2143 errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2144 errorFlags
2145 |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2146
2147 errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2148 errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2149 errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2150 errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2151 errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2152
2153 errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2154 errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2155 errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2156 errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2157 errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2158
2159 errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2160 errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2161 errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2162 errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2163 errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2164 }
2165
2166 XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2167 XML_FreeContentModel(g_parser, model);
2168 }
2169
START_TEST(test_dtd_elements_nesting)2170 START_TEST(test_dtd_elements_nesting) {
2171 // Payload inspired by a test in Perl's XML::Parser
2172 const char *text = "<!DOCTYPE foo [\n"
2173 "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2174 "]>\n"
2175 "<foo/>";
2176
2177 XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2178
2179 XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2180 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2181 == XML_STATUS_ERROR)
2182 xml_failure(g_parser);
2183
2184 if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2185 fail("Element declaration model regression detected");
2186 }
2187 END_TEST
2188
2189 /* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd)2190 START_TEST(test_set_foreign_dtd) {
2191 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2192 const char *text2 = "<doc>&entity;</doc>";
2193 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2194
2195 /* Check hash salt is passed through too */
2196 XML_SetHashSalt(g_parser, 0x12345678);
2197 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2198 XML_SetUserData(g_parser, &test_data);
2199 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2200 /* Add a default handler to exercise more code paths */
2201 XML_SetDefaultHandler(g_parser, dummy_default_handler);
2202 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2203 fail("Could not set foreign DTD");
2204 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2205 == XML_STATUS_ERROR)
2206 xml_failure(g_parser);
2207
2208 /* Ensure that trying to set the DTD after parsing has started
2209 * is faulted, even if it's the same setting.
2210 */
2211 if (XML_UseForeignDTD(g_parser, XML_TRUE)
2212 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2213 fail("Failed to reject late foreign DTD setting");
2214 /* Ditto for the hash salt */
2215 if (XML_SetHashSalt(g_parser, 0x23456789))
2216 fail("Failed to reject late hash salt change");
2217
2218 /* Now finish the parse */
2219 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2220 == XML_STATUS_ERROR)
2221 xml_failure(g_parser);
2222 }
2223 END_TEST
2224
2225 /* Test foreign DTD handling with a failing NotStandalone handler */
START_TEST(test_foreign_dtd_not_standalone)2226 START_TEST(test_foreign_dtd_not_standalone) {
2227 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2228 "<doc>&entity;</doc>";
2229 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2230
2231 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2232 XML_SetUserData(g_parser, &test_data);
2233 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2234 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2235 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2236 fail("Could not set foreign DTD");
2237 expect_failure(text, XML_ERROR_NOT_STANDALONE,
2238 "NotStandalonehandler failed to reject");
2239 }
2240 END_TEST
2241
2242 /* Test invalid character in a foreign DTD is faulted */
START_TEST(test_invalid_foreign_dtd)2243 START_TEST(test_invalid_foreign_dtd) {
2244 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2245 "<doc>&entity;</doc>";
2246 ExtFaults test_data
2247 = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2248
2249 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2250 XML_SetUserData(g_parser, &test_data);
2251 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2252 XML_UseForeignDTD(g_parser, XML_TRUE);
2253 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2254 "Bad DTD should not have been accepted");
2255 }
2256 END_TEST
2257
2258 /* Test foreign DTD use with a doctype */
START_TEST(test_foreign_dtd_with_doctype)2259 START_TEST(test_foreign_dtd_with_doctype) {
2260 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2261 "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2262 const char *text2 = "<doc>&entity;</doc>";
2263 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2264
2265 /* Check hash salt is passed through too */
2266 XML_SetHashSalt(g_parser, 0x12345678);
2267 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2268 XML_SetUserData(g_parser, &test_data);
2269 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2270 /* Add a default handler to exercise more code paths */
2271 XML_SetDefaultHandler(g_parser, dummy_default_handler);
2272 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2273 fail("Could not set foreign DTD");
2274 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2275 == XML_STATUS_ERROR)
2276 xml_failure(g_parser);
2277
2278 /* Ensure that trying to set the DTD after parsing has started
2279 * is faulted, even if it's the same setting.
2280 */
2281 if (XML_UseForeignDTD(g_parser, XML_TRUE)
2282 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2283 fail("Failed to reject late foreign DTD setting");
2284 /* Ditto for the hash salt */
2285 if (XML_SetHashSalt(g_parser, 0x23456789))
2286 fail("Failed to reject late hash salt change");
2287
2288 /* Now finish the parse */
2289 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2290 == XML_STATUS_ERROR)
2291 xml_failure(g_parser);
2292 }
2293 END_TEST
2294
2295 /* Test XML_UseForeignDTD with no external subset present */
START_TEST(test_foreign_dtd_without_external_subset)2296 START_TEST(test_foreign_dtd_without_external_subset) {
2297 const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2298 "<doc>&foo;</doc>";
2299
2300 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2301 XML_SetUserData(g_parser, NULL);
2302 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2303 XML_UseForeignDTD(g_parser, XML_TRUE);
2304 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2305 == XML_STATUS_ERROR)
2306 xml_failure(g_parser);
2307 }
2308 END_TEST
2309
START_TEST(test_empty_foreign_dtd)2310 START_TEST(test_empty_foreign_dtd) {
2311 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2312 "<doc>&entity;</doc>";
2313
2314 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2315 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2316 XML_UseForeignDTD(g_parser, XML_TRUE);
2317 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2318 "Undefined entity not faulted");
2319 }
2320 END_TEST
2321
2322 /* Test XML Base is set and unset appropriately */
START_TEST(test_set_base)2323 START_TEST(test_set_base) {
2324 const XML_Char *old_base;
2325 const XML_Char *new_base = XCS("/local/file/name.xml");
2326
2327 old_base = XML_GetBase(g_parser);
2328 if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2329 fail("Unable to set base");
2330 if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2331 fail("Base setting not correct");
2332 if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2333 fail("Unable to NULL base");
2334 if (XML_GetBase(g_parser) != NULL)
2335 fail("Base setting not nulled");
2336 XML_SetBase(g_parser, old_base);
2337 }
2338 END_TEST
2339
2340 /* Test attribute counts, indexing, etc */
START_TEST(test_attributes)2341 START_TEST(test_attributes) {
2342 const char *text = "<!DOCTYPE doc [\n"
2343 "<!ELEMENT doc (tag)>\n"
2344 "<!ATTLIST doc id ID #REQUIRED>\n"
2345 "]>"
2346 "<doc a='1' id='one' b='2'>"
2347 "<tag c='3'/>"
2348 "</doc>";
2349 AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2350 {XCS("b"), XCS("2")},
2351 {XCS("id"), XCS("one")},
2352 {NULL, NULL}};
2353 AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2354 ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2355 {XCS("tag"), 1, NULL, NULL},
2356 {NULL, 0, NULL, NULL}};
2357 info[0].attributes = doc_info;
2358 info[1].attributes = tag_info;
2359
2360 XML_SetStartElementHandler(g_parser, counting_start_element_handler);
2361 XML_SetUserData(g_parser, info);
2362 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2363 == XML_STATUS_ERROR)
2364 xml_failure(g_parser);
2365 }
2366 END_TEST
2367
2368 /* Test reset works correctly in the middle of processing an internal
2369 * entity. Exercises some obscure code in XML_ParserReset().
2370 */
START_TEST(test_reset_in_entity)2371 START_TEST(test_reset_in_entity) {
2372 const char *text = "<!DOCTYPE doc [\n"
2373 "<!ENTITY wombat 'wom'>\n"
2374 "<!ENTITY entity 'hi &wom; there'>\n"
2375 "]>\n"
2376 "<doc>&entity;</doc>";
2377 XML_ParsingStatus status;
2378
2379 g_resumable = XML_TRUE;
2380 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2381 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2382 == XML_STATUS_ERROR)
2383 xml_failure(g_parser);
2384 XML_GetParsingStatus(g_parser, &status);
2385 if (status.parsing != XML_SUSPENDED)
2386 fail("Parsing status not SUSPENDED");
2387 XML_ParserReset(g_parser, NULL);
2388 XML_GetParsingStatus(g_parser, &status);
2389 if (status.parsing != XML_INITIALIZED)
2390 fail("Parsing status doesn't reset to INITIALIZED");
2391 }
2392 END_TEST
2393
2394 /* Test that resume correctly passes through parse errors */
START_TEST(test_resume_invalid_parse)2395 START_TEST(test_resume_invalid_parse) {
2396 const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2397
2398 g_resumable = XML_TRUE;
2399 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2400 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2401 == XML_STATUS_ERROR)
2402 xml_failure(g_parser);
2403 if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2404 fail("Resumed invalid parse not faulted");
2405 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2406 fail("Invalid parse not correctly faulted");
2407 }
2408 END_TEST
2409
2410 /* Test that re-suspended parses are correctly passed through */
START_TEST(test_resume_resuspended)2411 START_TEST(test_resume_resuspended) {
2412 const char *text = "<doc>Hello<meep/>world</doc>";
2413
2414 g_resumable = XML_TRUE;
2415 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2416 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2417 == XML_STATUS_ERROR)
2418 xml_failure(g_parser);
2419 g_resumable = XML_TRUE;
2420 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2421 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2422 fail("Resumption not suspended");
2423 /* This one should succeed and finish up */
2424 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2425 xml_failure(g_parser);
2426 }
2427 END_TEST
2428
2429 /* Test that CDATA shows up correctly through a default handler */
START_TEST(test_cdata_default)2430 START_TEST(test_cdata_default) {
2431 const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2432 const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2433 CharData storage;
2434
2435 CharData_Init(&storage);
2436 XML_SetUserData(g_parser, &storage);
2437 XML_SetDefaultHandler(g_parser, accumulate_characters);
2438
2439 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2440 == XML_STATUS_ERROR)
2441 xml_failure(g_parser);
2442 CharData_CheckXMLChars(&storage, expected);
2443 }
2444 END_TEST
2445
2446 /* Test resetting a subordinate parser does exactly nothing */
START_TEST(test_subordinate_reset)2447 START_TEST(test_subordinate_reset) {
2448 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2449 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2450 "<doc>&entity;</doc>";
2451
2452 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2453 XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2454 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2455 == XML_STATUS_ERROR)
2456 xml_failure(g_parser);
2457 }
2458 END_TEST
2459
2460 /* Test suspending a subordinate parser */
START_TEST(test_subordinate_suspend)2461 START_TEST(test_subordinate_suspend) {
2462 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2463 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2464 "<doc>&entity;</doc>";
2465
2466 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2467 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2468 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2469 == XML_STATUS_ERROR)
2470 xml_failure(g_parser);
2471 }
2472 END_TEST
2473
2474 /* Test suspending a subordinate parser from an XML declaration */
2475 /* Increases code coverage of the tests */
2476
START_TEST(test_subordinate_xdecl_suspend)2477 START_TEST(test_subordinate_xdecl_suspend) {
2478 const char *text
2479 = "<!DOCTYPE doc [\n"
2480 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2481 "]>\n"
2482 "<doc>&entity;</doc>";
2483
2484 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2485 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2486 g_resumable = XML_TRUE;
2487 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2488 == XML_STATUS_ERROR)
2489 xml_failure(g_parser);
2490 }
2491 END_TEST
2492
START_TEST(test_subordinate_xdecl_abort)2493 START_TEST(test_subordinate_xdecl_abort) {
2494 const char *text
2495 = "<!DOCTYPE doc [\n"
2496 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2497 "]>\n"
2498 "<doc>&entity;</doc>";
2499
2500 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2501 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2502 g_resumable = XML_FALSE;
2503 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2504 == XML_STATUS_ERROR)
2505 xml_failure(g_parser);
2506 }
2507 END_TEST
2508
2509 /* Test external entity fault handling with suspension */
START_TEST(test_ext_entity_invalid_suspended_parse)2510 START_TEST(test_ext_entity_invalid_suspended_parse) {
2511 const char *text = "<!DOCTYPE doc [\n"
2512 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2513 "]>\n"
2514 "<doc>&en;</doc>";
2515 ExtFaults faults[]
2516 = {{"<?xml version='1.0' encoding='us-ascii'?><",
2517 "Incomplete element declaration not faulted", NULL,
2518 XML_ERROR_UNCLOSED_TOKEN},
2519 {/* First two bytes of a three-byte char */
2520 "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2521 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2522 {NULL, NULL, NULL, XML_ERROR_NONE}};
2523 ExtFaults *fault;
2524
2525 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2526 set_subtest("%s", fault->parse_text);
2527 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2528 XML_SetExternalEntityRefHandler(g_parser,
2529 external_entity_suspending_faulter);
2530 XML_SetUserData(g_parser, fault);
2531 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2532 "Parser did not report external entity error");
2533 XML_ParserReset(g_parser, NULL);
2534 }
2535 }
2536 END_TEST
2537
2538 /* Test setting an explicit encoding */
START_TEST(test_explicit_encoding)2539 START_TEST(test_explicit_encoding) {
2540 const char *text1 = "<doc>Hello ";
2541 const char *text2 = " World</doc>";
2542
2543 /* Just check that we can set the encoding to NULL before starting */
2544 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2545 fail("Failed to initialise encoding to NULL");
2546 /* Say we are UTF-8 */
2547 if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2548 fail("Failed to set explicit encoding");
2549 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2550 == XML_STATUS_ERROR)
2551 xml_failure(g_parser);
2552 /* Try to switch encodings mid-parse */
2553 if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2554 fail("Allowed encoding change");
2555 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2556 == XML_STATUS_ERROR)
2557 xml_failure(g_parser);
2558 /* Try now the parse is over */
2559 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2560 fail("Failed to unset encoding");
2561 }
2562 END_TEST
2563
2564 /* Test handling of trailing CR (rather than newline) */
START_TEST(test_trailing_cr)2565 START_TEST(test_trailing_cr) {
2566 const char *text = "<doc>\r";
2567 int found_cr;
2568
2569 /* Try with a character handler, for code coverage */
2570 XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2571 XML_SetUserData(g_parser, &found_cr);
2572 found_cr = 0;
2573 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2574 == XML_STATUS_OK)
2575 fail("Failed to fault unclosed doc");
2576 if (found_cr == 0)
2577 fail("Did not catch the carriage return");
2578 XML_ParserReset(g_parser, NULL);
2579
2580 /* Now with a default handler instead */
2581 XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2582 XML_SetUserData(g_parser, &found_cr);
2583 found_cr = 0;
2584 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2585 == XML_STATUS_OK)
2586 fail("Failed to fault unclosed doc");
2587 if (found_cr == 0)
2588 fail("Did not catch default carriage return");
2589 }
2590 END_TEST
2591
2592 /* Test trailing CR in an external entity parse */
START_TEST(test_ext_entity_trailing_cr)2593 START_TEST(test_ext_entity_trailing_cr) {
2594 const char *text = "<!DOCTYPE doc [\n"
2595 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2596 "]>\n"
2597 "<doc>&en;</doc>";
2598 int found_cr;
2599
2600 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2601 XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2602 XML_SetUserData(g_parser, &found_cr);
2603 found_cr = 0;
2604 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2605 != XML_STATUS_OK)
2606 xml_failure(g_parser);
2607 if (found_cr == 0)
2608 fail("No carriage return found");
2609 XML_ParserReset(g_parser, NULL);
2610
2611 /* Try again with a different trailing CR */
2612 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2613 XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2614 XML_SetUserData(g_parser, &found_cr);
2615 found_cr = 0;
2616 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2617 != XML_STATUS_OK)
2618 xml_failure(g_parser);
2619 if (found_cr == 0)
2620 fail("No carriage return found");
2621 }
2622 END_TEST
2623
2624 /* Test handling of trailing square bracket */
START_TEST(test_trailing_rsqb)2625 START_TEST(test_trailing_rsqb) {
2626 const char *text8 = "<doc>]";
2627 const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2628 int found_rsqb;
2629 int text8_len = (int)strlen(text8);
2630
2631 XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2632 XML_SetUserData(g_parser, &found_rsqb);
2633 found_rsqb = 0;
2634 if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2635 == XML_STATUS_OK)
2636 fail("Failed to fault unclosed doc");
2637 if (found_rsqb == 0)
2638 fail("Did not catch the right square bracket");
2639
2640 /* Try again with a different encoding */
2641 XML_ParserReset(g_parser, NULL);
2642 XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2643 XML_SetUserData(g_parser, &found_rsqb);
2644 found_rsqb = 0;
2645 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2646 XML_TRUE)
2647 == XML_STATUS_OK)
2648 fail("Failed to fault unclosed doc");
2649 if (found_rsqb == 0)
2650 fail("Did not catch the right square bracket");
2651
2652 /* And finally with a default handler */
2653 XML_ParserReset(g_parser, NULL);
2654 XML_SetDefaultHandler(g_parser, rsqb_handler);
2655 XML_SetUserData(g_parser, &found_rsqb);
2656 found_rsqb = 0;
2657 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2658 XML_TRUE)
2659 == XML_STATUS_OK)
2660 fail("Failed to fault unclosed doc");
2661 if (found_rsqb == 0)
2662 fail("Did not catch the right square bracket");
2663 }
2664 END_TEST
2665
2666 /* Test trailing right square bracket in an external entity parse */
START_TEST(test_ext_entity_trailing_rsqb)2667 START_TEST(test_ext_entity_trailing_rsqb) {
2668 const char *text = "<!DOCTYPE doc [\n"
2669 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2670 "]>\n"
2671 "<doc>&en;</doc>";
2672 int found_rsqb;
2673
2674 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2675 XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2676 XML_SetUserData(g_parser, &found_rsqb);
2677 found_rsqb = 0;
2678 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2679 != XML_STATUS_OK)
2680 xml_failure(g_parser);
2681 if (found_rsqb == 0)
2682 fail("No right square bracket found");
2683 }
2684 END_TEST
2685
2686 /* Test CDATA handling in an external entity */
START_TEST(test_ext_entity_good_cdata)2687 START_TEST(test_ext_entity_good_cdata) {
2688 const char *text = "<!DOCTYPE doc [\n"
2689 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2690 "]>\n"
2691 "<doc>&en;</doc>";
2692
2693 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2694 XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2695 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2696 != XML_STATUS_OK)
2697 xml_failure(g_parser);
2698 }
2699 END_TEST
2700
2701 /* Test user parameter settings */
START_TEST(test_user_parameters)2702 START_TEST(test_user_parameters) {
2703 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2704 "<!-- Primary parse -->\n"
2705 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2706 "<doc>&entity;";
2707 const char *epilog = "<!-- Back to primary parser -->\n"
2708 "</doc>";
2709
2710 g_comment_count = 0;
2711 g_skip_count = 0;
2712 g_xdecl_count = 0;
2713 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2714 XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2715 XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2716 XML_SetCommentHandler(g_parser, data_check_comment_handler);
2717 XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2718 XML_UseParserAsHandlerArg(g_parser);
2719 XML_SetUserData(g_parser, (void *)1);
2720 g_handler_data = g_parser;
2721 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2722 == XML_STATUS_ERROR)
2723 xml_failure(g_parser);
2724 /* Ensure we can't change policy mid-parse */
2725 if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2726 fail("Changed param entity parsing policy while parsing");
2727 if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2728 == XML_STATUS_ERROR)
2729 xml_failure(g_parser);
2730 if (g_comment_count != 3)
2731 fail("Comment handler not invoked enough times");
2732 if (g_skip_count != 1)
2733 fail("Skip handler not invoked enough times");
2734 if (g_xdecl_count != 1)
2735 fail("XML declaration handler not invoked");
2736 }
2737 END_TEST
2738
2739 /* Test that an explicit external entity handler argument replaces
2740 * the parser as the first argument.
2741 *
2742 * We do not call the first parameter to the external entity handler
2743 * 'parser' for once, since the first time the handler is called it
2744 * will actually be a text string. We need to be able to access the
2745 * global 'parser' variable to create our external entity parser from,
2746 * since there are code paths we need to ensure get executed.
2747 */
START_TEST(test_ext_entity_ref_parameter)2748 START_TEST(test_ext_entity_ref_parameter) {
2749 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2750 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2751 "<doc>&entity;</doc>";
2752
2753 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2754 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2755 /* Set a handler arg that is not NULL and not parser (which is
2756 * what NULL would cause to be passed.
2757 */
2758 XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2759 g_handler_data = text;
2760 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2761 == XML_STATUS_ERROR)
2762 xml_failure(g_parser);
2763
2764 /* Now try again with unset args */
2765 XML_ParserReset(g_parser, NULL);
2766 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2767 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2768 XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2769 g_handler_data = g_parser;
2770 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2771 == XML_STATUS_ERROR)
2772 xml_failure(g_parser);
2773 }
2774 END_TEST
2775
2776 /* Test the parsing of an empty string */
START_TEST(test_empty_parse)2777 START_TEST(test_empty_parse) {
2778 const char *text = "<doc></doc>";
2779 const char *partial = "<doc>";
2780
2781 if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2782 fail("Parsing empty string faulted");
2783 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2784 fail("Parsing final empty string not faulted");
2785 if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2786 fail("Parsing final empty string faulted for wrong reason");
2787
2788 /* Now try with valid text before the empty end */
2789 XML_ParserReset(g_parser, NULL);
2790 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2791 == XML_STATUS_ERROR)
2792 xml_failure(g_parser);
2793 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2794 fail("Parsing final empty string faulted");
2795
2796 /* Now try with invalid text before the empty end */
2797 XML_ParserReset(g_parser, NULL);
2798 if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2799 XML_FALSE)
2800 == XML_STATUS_ERROR)
2801 xml_failure(g_parser);
2802 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2803 fail("Parsing final incomplete empty string not faulted");
2804 }
2805 END_TEST
2806
2807 /* Test XML_Parse for len < 0 */
START_TEST(test_negative_len_parse)2808 START_TEST(test_negative_len_parse) {
2809 const char *const doc = "<root/>";
2810 for (int isFinal = 0; isFinal < 2; isFinal++) {
2811 set_subtest("isFinal=%d", isFinal);
2812
2813 XML_Parser parser = XML_ParserCreate(NULL);
2814
2815 if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2816 fail("There was not supposed to be any initial parse error.");
2817
2818 const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
2819
2820 if (status != XML_STATUS_ERROR)
2821 fail("Negative len was expected to fail the parse but did not.");
2822
2823 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2824 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2825
2826 XML_ParserFree(parser);
2827 }
2828 }
2829 END_TEST
2830
2831 /* Test XML_ParseBuffer for len < 0 */
START_TEST(test_negative_len_parse_buffer)2832 START_TEST(test_negative_len_parse_buffer) {
2833 const char *const doc = "<root/>";
2834 for (int isFinal = 0; isFinal < 2; isFinal++) {
2835 set_subtest("isFinal=%d", isFinal);
2836
2837 XML_Parser parser = XML_ParserCreate(NULL);
2838
2839 if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2840 fail("There was not supposed to be any initial parse error.");
2841
2842 void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
2843
2844 if (buffer == NULL)
2845 fail("XML_GetBuffer failed.");
2846
2847 memcpy(buffer, doc, strlen(doc));
2848
2849 const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
2850
2851 if (status != XML_STATUS_ERROR)
2852 fail("Negative len was expected to fail the parse but did not.");
2853
2854 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2855 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2856
2857 XML_ParserFree(parser);
2858 }
2859 }
2860 END_TEST
2861
2862 /* Test odd corners of the XML_GetBuffer interface */
2863 static enum XML_Status
get_feature(enum XML_FeatureEnum feature_id,long * presult)2864 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
2865 const XML_Feature *feature = XML_GetFeatureList();
2866
2867 if (feature == NULL)
2868 return XML_STATUS_ERROR;
2869 for (; feature->feature != XML_FEATURE_END; feature++) {
2870 if (feature->feature == feature_id) {
2871 *presult = feature->value;
2872 return XML_STATUS_OK;
2873 }
2874 }
2875 return XML_STATUS_ERROR;
2876 }
2877
2878 /* Test odd corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_1)2879 START_TEST(test_get_buffer_1) {
2880 const char *text = get_buffer_test_text;
2881 void *buffer;
2882 long context_bytes;
2883
2884 /* Attempt to allocate a negative length buffer */
2885 if (XML_GetBuffer(g_parser, -12) != NULL)
2886 fail("Negative length buffer not failed");
2887
2888 /* Now get a small buffer and extend it past valid length */
2889 buffer = XML_GetBuffer(g_parser, 1536);
2890 if (buffer == NULL)
2891 fail("1.5K buffer failed");
2892 assert(buffer != NULL);
2893 memcpy(buffer, text, strlen(text));
2894 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2895 == XML_STATUS_ERROR)
2896 xml_failure(g_parser);
2897 if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
2898 fail("INT_MAX buffer not failed");
2899
2900 /* Now try extending it a more reasonable but still too large
2901 * amount. The allocator in XML_GetBuffer() doubles the buffer
2902 * size until it exceeds the requested amount or INT_MAX. If it
2903 * exceeds INT_MAX, it rejects the request, so we want a request
2904 * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable,
2905 * with an extra byte just to ensure that the request is off any
2906 * boundary. The request will be inflated internally by
2907 * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
2908 * request.
2909 */
2910 if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
2911 context_bytes = 0;
2912 if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
2913 fail("INT_MAX- buffer not failed");
2914
2915 /* Now try extending it a carefully crafted amount */
2916 if (XML_GetBuffer(g_parser, 1000) == NULL)
2917 fail("1000 buffer failed");
2918 }
2919 END_TEST
2920
2921 /* Test more corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_2)2922 START_TEST(test_get_buffer_2) {
2923 const char *text = get_buffer_test_text;
2924 void *buffer;
2925
2926 /* Now get a decent buffer */
2927 buffer = XML_GetBuffer(g_parser, 1536);
2928 if (buffer == NULL)
2929 fail("1.5K buffer failed");
2930 assert(buffer != NULL);
2931 memcpy(buffer, text, strlen(text));
2932 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2933 == XML_STATUS_ERROR)
2934 xml_failure(g_parser);
2935
2936 /* Extend it, to catch a different code path */
2937 if (XML_GetBuffer(g_parser, 1024) == NULL)
2938 fail("1024 buffer failed");
2939 }
2940 END_TEST
2941
2942 /* Test for signed integer overflow CVE-2022-23852 */
2943 #if XML_CONTEXT_BYTES > 0
START_TEST(test_get_buffer_3_overflow)2944 START_TEST(test_get_buffer_3_overflow) {
2945 XML_Parser parser = XML_ParserCreate(NULL);
2946 assert(parser != NULL);
2947
2948 const char *const text = "\n";
2949 const int expectedKeepValue = (int)strlen(text);
2950
2951 // After this call, variable "keep" in XML_GetBuffer will
2952 // have value expectedKeepValue
2953 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
2954 XML_FALSE /* isFinal */)
2955 == XML_STATUS_ERROR)
2956 xml_failure(parser);
2957
2958 assert(expectedKeepValue > 0);
2959 if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
2960 fail("enlarging buffer not failed");
2961
2962 XML_ParserFree(parser);
2963 }
2964 END_TEST
2965 #endif // XML_CONTEXT_BYTES > 0
2966
START_TEST(test_buffer_can_grow_to_max)2967 START_TEST(test_buffer_can_grow_to_max) {
2968 const char *const prefixes[] = {
2969 "",
2970 "<",
2971 "<x a='",
2972 "<doc><x a='",
2973 "<document><x a='",
2974 "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
2975 "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
2976 "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
2977 "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
2978 "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
2979 const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
2980 int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
2981 #if defined(__MINGW32__) && ! defined(__MINGW64__)
2982 // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
2983 // Can we make a big allocation?
2984 void *big = malloc(maxbuf);
2985 if (! big) {
2986 // The big allocation failed. Let's be a little lenient.
2987 maxbuf = maxbuf / 2;
2988 }
2989 free(big);
2990 #endif
2991
2992 for (int i = 0; i < num_prefixes; ++i) {
2993 set_subtest("\"%s\"", prefixes[i]);
2994 XML_Parser parser = XML_ParserCreate(NULL);
2995 const int prefix_len = (int)strlen(prefixes[i]);
2996 const enum XML_Status s
2997 = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
2998 if (s != XML_STATUS_OK)
2999 xml_failure(parser);
3000
3001 // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
3002 // subtracting the whole prefix is easiest, and close enough.
3003 assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
3004 // The limit should be consistent; no prefix should allow us to
3005 // reach above the max buffer size.
3006 assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
3007 XML_ParserFree(parser);
3008 }
3009 }
3010 END_TEST
3011
START_TEST(test_getbuffer_allocates_on_zero_len)3012 START_TEST(test_getbuffer_allocates_on_zero_len) {
3013 for (int first_len = 1; first_len >= 0; first_len--) {
3014 set_subtest("with len=%d first", first_len);
3015 XML_Parser parser = XML_ParserCreate(NULL);
3016 assert_true(parser != NULL);
3017 assert_true(XML_GetBuffer(parser, first_len) != NULL);
3018 assert_true(XML_GetBuffer(parser, 0) != NULL);
3019 if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
3020 xml_failure(parser);
3021 XML_ParserFree(parser);
3022 }
3023 }
3024 END_TEST
3025
3026 /* Test position information macros */
START_TEST(test_byte_info_at_end)3027 START_TEST(test_byte_info_at_end) {
3028 const char *text = "<doc></doc>";
3029
3030 if (XML_GetCurrentByteIndex(g_parser) != -1
3031 || XML_GetCurrentByteCount(g_parser) != 0)
3032 fail("Byte index/count incorrect at start of parse");
3033 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3034 == XML_STATUS_ERROR)
3035 xml_failure(g_parser);
3036 /* At end, the count will be zero and the index the end of string */
3037 if (XML_GetCurrentByteCount(g_parser) != 0)
3038 fail("Terminal byte count incorrect");
3039 if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
3040 fail("Terminal byte index incorrect");
3041 }
3042 END_TEST
3043
3044 /* Test position information from errors */
3045 #define PRE_ERROR_STR "<doc></"
3046 #define POST_ERROR_STR "wombat></doc>"
START_TEST(test_byte_info_at_error)3047 START_TEST(test_byte_info_at_error) {
3048 const char *text = PRE_ERROR_STR POST_ERROR_STR;
3049
3050 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3051 == XML_STATUS_OK)
3052 fail("Syntax error not faulted");
3053 if (XML_GetCurrentByteCount(g_parser) != 0)
3054 fail("Error byte count incorrect");
3055 if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3056 fail("Error byte index incorrect");
3057 }
3058 END_TEST
3059 #undef PRE_ERROR_STR
3060 #undef POST_ERROR_STR
3061
3062 /* Test position information in handler */
3063 #define START_ELEMENT "<e>"
3064 #define CDATA_TEXT "Hello"
3065 #define END_ELEMENT "</e>"
START_TEST(test_byte_info_at_cdata)3066 START_TEST(test_byte_info_at_cdata) {
3067 const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3068 int offset, size;
3069 ByteTestData data;
3070
3071 /* Check initial context is empty */
3072 if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3073 fail("Unexpected context at start of parse");
3074
3075 data.start_element_len = (int)strlen(START_ELEMENT);
3076 data.cdata_len = (int)strlen(CDATA_TEXT);
3077 data.total_string_len = (int)strlen(text);
3078 XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3079 XML_SetUserData(g_parser, &data);
3080 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3081 xml_failure(g_parser);
3082 }
3083 END_TEST
3084 #undef START_ELEMENT
3085 #undef CDATA_TEXT
3086 #undef END_ELEMENT
3087
3088 /* Test predefined entities are correctly recognised */
START_TEST(test_predefined_entities)3089 START_TEST(test_predefined_entities) {
3090 const char *text = "<doc><>&"'</doc>";
3091 const XML_Char *expected = XCS("<doc><>&"'</doc>");
3092 const XML_Char *result = XCS("<>&\"'");
3093 CharData storage;
3094
3095 XML_SetDefaultHandler(g_parser, accumulate_characters);
3096 /* run_character_check uses XML_SetCharacterDataHandler(), which
3097 * unfortunately heads off a code path that we need to exercise.
3098 */
3099 CharData_Init(&storage);
3100 XML_SetUserData(g_parser, &storage);
3101 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3102 == XML_STATUS_ERROR)
3103 xml_failure(g_parser);
3104 /* The default handler doesn't translate the entities */
3105 CharData_CheckXMLChars(&storage, expected);
3106
3107 /* Now try again and check the translation */
3108 XML_ParserReset(g_parser, NULL);
3109 run_character_check(text, result);
3110 }
3111 END_TEST
3112
3113 /* Regression test that an invalid tag in an external parameter
3114 * reference in an external DTD is correctly faulted.
3115 *
3116 * Only a few specific tags are legal in DTDs ignoring comments and
3117 * processing instructions, all of which begin with an exclamation
3118 * mark. "<el/>" is not one of them, so the parser should raise an
3119 * error on encountering it.
3120 */
START_TEST(test_invalid_tag_in_dtd)3121 START_TEST(test_invalid_tag_in_dtd) {
3122 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3123 "<doc></doc>\n";
3124
3125 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3126 XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3127 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3128 "Invalid tag IN DTD external param not rejected");
3129 }
3130 END_TEST
3131
3132 /* Test entities not quite the predefined ones are not mis-recognised */
START_TEST(test_not_predefined_entities)3133 START_TEST(test_not_predefined_entities) {
3134 const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3135 "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3136 int i = 0;
3137
3138 while (text[i] != NULL) {
3139 expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3140 "Undefined entity not rejected");
3141 XML_ParserReset(g_parser, NULL);
3142 i++;
3143 }
3144 }
3145 END_TEST
3146
3147 /* Test conditional inclusion (IGNORE) */
START_TEST(test_ignore_section)3148 START_TEST(test_ignore_section) {
3149 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3150 "<doc><e>&entity;</e></doc>";
3151 const XML_Char *expected
3152 = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3153 CharData storage;
3154
3155 CharData_Init(&storage);
3156 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3157 XML_SetUserData(g_parser, &storage);
3158 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3159 XML_SetDefaultHandler(g_parser, accumulate_characters);
3160 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3161 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3162 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3163 XML_SetStartElementHandler(g_parser, dummy_start_element);
3164 XML_SetEndElementHandler(g_parser, dummy_end_element);
3165 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3166 == XML_STATUS_ERROR)
3167 xml_failure(g_parser);
3168 CharData_CheckXMLChars(&storage, expected);
3169 }
3170 END_TEST
3171
START_TEST(test_ignore_section_utf16)3172 START_TEST(test_ignore_section_utf16) {
3173 const char text[] =
3174 /* <!DOCTYPE d SYSTEM 's'> */
3175 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3176 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3177 /* <d><e>&en;</e></d> */
3178 "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3179 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3180 CharData storage;
3181
3182 CharData_Init(&storage);
3183 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3184 XML_SetUserData(g_parser, &storage);
3185 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3186 XML_SetDefaultHandler(g_parser, accumulate_characters);
3187 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3188 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3189 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3190 XML_SetStartElementHandler(g_parser, dummy_start_element);
3191 XML_SetEndElementHandler(g_parser, dummy_end_element);
3192 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3193 == XML_STATUS_ERROR)
3194 xml_failure(g_parser);
3195 CharData_CheckXMLChars(&storage, expected);
3196 }
3197 END_TEST
3198
START_TEST(test_ignore_section_utf16_be)3199 START_TEST(test_ignore_section_utf16_be) {
3200 const char text[] =
3201 /* <!DOCTYPE d SYSTEM 's'> */
3202 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3203 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3204 /* <d><e>&en;</e></d> */
3205 "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3206 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3207 CharData storage;
3208
3209 CharData_Init(&storage);
3210 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3211 XML_SetUserData(g_parser, &storage);
3212 XML_SetExternalEntityRefHandler(g_parser,
3213 external_entity_load_ignore_utf16_be);
3214 XML_SetDefaultHandler(g_parser, accumulate_characters);
3215 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3216 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3217 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3218 XML_SetStartElementHandler(g_parser, dummy_start_element);
3219 XML_SetEndElementHandler(g_parser, dummy_end_element);
3220 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3221 == XML_STATUS_ERROR)
3222 xml_failure(g_parser);
3223 CharData_CheckXMLChars(&storage, expected);
3224 }
3225 END_TEST
3226
3227 /* Test mis-formatted conditional exclusion */
START_TEST(test_bad_ignore_section)3228 START_TEST(test_bad_ignore_section) {
3229 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3230 "<doc><e>&entity;</e></doc>";
3231 ExtFaults faults[]
3232 = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3233 XML_ERROR_SYNTAX},
3234 {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3235 XML_ERROR_INVALID_TOKEN},
3236 {/* FIrst two bytes of a three-byte char */
3237 "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3238 XML_ERROR_PARTIAL_CHAR},
3239 {NULL, NULL, NULL, XML_ERROR_NONE}};
3240 ExtFaults *fault;
3241
3242 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3243 set_subtest("%s", fault->parse_text);
3244 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3245 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3246 XML_SetUserData(g_parser, fault);
3247 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3248 "Incomplete IGNORE section not failed");
3249 XML_ParserReset(g_parser, NULL);
3250 }
3251 }
3252 END_TEST
3253
3254 struct bom_testdata {
3255 const char *external;
3256 int split;
3257 XML_Bool nested_callback_happened;
3258 };
3259
3260 static int XMLCALL
external_bom_checker(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)3261 external_bom_checker(XML_Parser parser, const XML_Char *context,
3262 const XML_Char *base, const XML_Char *systemId,
3263 const XML_Char *publicId) {
3264 const char *text;
3265 UNUSED_P(base);
3266 UNUSED_P(systemId);
3267 UNUSED_P(publicId);
3268
3269 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3270 if (ext_parser == NULL)
3271 fail("Could not create external entity parser");
3272
3273 if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3274 struct bom_testdata *const testdata
3275 = (struct bom_testdata *)XML_GetUserData(parser);
3276 const char *const external = testdata->external;
3277 const int split = testdata->split;
3278 testdata->nested_callback_happened = XML_TRUE;
3279
3280 if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3281 != XML_STATUS_OK) {
3282 xml_failure(ext_parser);
3283 }
3284 text = external + split; // the parse below will continue where we left off.
3285 } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3286 text = "<!ELEMENT doc EMPTY>\n"
3287 "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3288 "<!ENTITY % e2 '%e1;'>\n";
3289 } else {
3290 fail("unknown systemId");
3291 }
3292
3293 if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3294 != XML_STATUS_OK)
3295 xml_failure(ext_parser);
3296
3297 XML_ParserFree(ext_parser);
3298 return XML_STATUS_OK;
3299 }
3300
3301 /* regression test: BOM should be consumed when followed by a partial token. */
START_TEST(test_external_bom_consumed)3302 START_TEST(test_external_bom_consumed) {
3303 const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3304 "<doc></doc>\n";
3305 const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3306 const int len = (int)strlen(external);
3307 for (int split = 0; split <= len; ++split) {
3308 set_subtest("split at byte %d", split);
3309
3310 struct bom_testdata testdata;
3311 testdata.external = external;
3312 testdata.split = split;
3313 testdata.nested_callback_happened = XML_FALSE;
3314
3315 XML_Parser parser = XML_ParserCreate(NULL);
3316 if (parser == NULL) {
3317 fail("Couldn't create parser");
3318 }
3319 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3320 XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3321 XML_SetUserData(parser, &testdata);
3322 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3323 == XML_STATUS_ERROR)
3324 xml_failure(parser);
3325 if (! testdata.nested_callback_happened) {
3326 fail("ref handler not called");
3327 }
3328 XML_ParserFree(parser);
3329 }
3330 }
3331 END_TEST
3332
3333 /* Test recursive parsing */
START_TEST(test_external_entity_values)3334 START_TEST(test_external_entity_values) {
3335 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3336 "<doc></doc>\n";
3337 ExtFaults data_004_2[] = {
3338 {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3339 {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3340 XML_ERROR_INVALID_TOKEN},
3341 {"'wombat", "Unterminated string not faulted", NULL,
3342 XML_ERROR_UNCLOSED_TOKEN},
3343 {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3344 XML_ERROR_PARTIAL_CHAR},
3345 {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3346 {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3347 XML_ERROR_XML_DECL},
3348 {/* UTF-8 BOM */
3349 "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3350 XML_ERROR_NONE},
3351 {"<?xml version='1.0' encoding='utf-8'?>\n$",
3352 "Invalid token after text declaration not faulted", NULL,
3353 XML_ERROR_INVALID_TOKEN},
3354 {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3355 "Unterminated string after text decl not faulted", NULL,
3356 XML_ERROR_UNCLOSED_TOKEN},
3357 {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3358 "Partial UTF-8 character after text decl not faulted", NULL,
3359 XML_ERROR_PARTIAL_CHAR},
3360 {"%e1;", "Recursive parameter entity not faulted", NULL,
3361 XML_ERROR_RECURSIVE_ENTITY_REF},
3362 {NULL, NULL, NULL, XML_ERROR_NONE}};
3363 int i;
3364
3365 for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3366 set_subtest("%s", data_004_2[i].parse_text);
3367 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3368 XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3369 XML_SetUserData(g_parser, &data_004_2[i]);
3370 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3371 == XML_STATUS_ERROR)
3372 xml_failure(g_parser);
3373 XML_ParserReset(g_parser, NULL);
3374 }
3375 }
3376 END_TEST
3377
3378 /* Test the recursive parse interacts with a not standalone handler */
START_TEST(test_ext_entity_not_standalone)3379 START_TEST(test_ext_entity_not_standalone) {
3380 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3381 "<doc></doc>";
3382
3383 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3384 XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3385 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3386 "Standalone rejection not caught");
3387 }
3388 END_TEST
3389
START_TEST(test_ext_entity_value_abort)3390 START_TEST(test_ext_entity_value_abort) {
3391 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3392 "<doc></doc>\n";
3393
3394 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3395 XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3396 g_resumable = XML_FALSE;
3397 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3398 == XML_STATUS_ERROR)
3399 xml_failure(g_parser);
3400 }
3401 END_TEST
3402
START_TEST(test_bad_public_doctype)3403 START_TEST(test_bad_public_doctype) {
3404 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3405 "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3406 "<doc></doc>";
3407
3408 /* Setting a handler provokes a particular code path */
3409 XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3410 dummy_end_doctype_handler);
3411 expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3412 }
3413 END_TEST
3414
3415 /* Test based on ibm/valid/P32/ibm32v04.xml */
START_TEST(test_attribute_enum_value)3416 START_TEST(test_attribute_enum_value) {
3417 const char *text = "<?xml version='1.0' standalone='no'?>\n"
3418 "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3419 "<animal>This is a \n <a/> \n\nyellow tiger</animal>";
3420 ExtTest dtd_data
3421 = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3422 "<!ELEMENT a EMPTY>\n"
3423 "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3424 NULL, NULL};
3425 const XML_Char *expected = XCS("This is a \n \n\nyellow tiger");
3426
3427 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3428 XML_SetUserData(g_parser, &dtd_data);
3429 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3430 /* An attribute list handler provokes a different code path */
3431 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3432 run_ext_character_check(text, &dtd_data, expected);
3433 }
3434 END_TEST
3435
3436 /* Slightly bizarrely, the library seems to silently ignore entity
3437 * definitions for predefined entities, even when they are wrong. The
3438 * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3439 * to happen, so this is currently treated as acceptable.
3440 */
START_TEST(test_predefined_entity_redefinition)3441 START_TEST(test_predefined_entity_redefinition) {
3442 const char *text = "<!DOCTYPE doc [\n"
3443 "<!ENTITY apos 'foo'>\n"
3444 "]>\n"
3445 "<doc>'</doc>";
3446 run_character_check(text, XCS("'"));
3447 }
3448 END_TEST
3449
3450 /* Test that the parser stops processing the DTD after an unresolved
3451 * parameter entity is encountered.
3452 */
START_TEST(test_dtd_stop_processing)3453 START_TEST(test_dtd_stop_processing) {
3454 const char *text = "<!DOCTYPE doc [\n"
3455 "%foo;\n"
3456 "<!ENTITY bar 'bas'>\n"
3457 "]><doc/>";
3458
3459 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3460 init_dummy_handlers();
3461 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3462 == XML_STATUS_ERROR)
3463 xml_failure(g_parser);
3464 if (get_dummy_handler_flags() != 0)
3465 fail("DTD processing still going after undefined PE");
3466 }
3467 END_TEST
3468
3469 /* Test public notations with no system ID */
START_TEST(test_public_notation_no_sysid)3470 START_TEST(test_public_notation_no_sysid) {
3471 const char *text = "<!DOCTYPE doc [\n"
3472 "<!NOTATION note PUBLIC 'foo'>\n"
3473 "<!ELEMENT doc EMPTY>\n"
3474 "]>\n<doc/>";
3475
3476 init_dummy_handlers();
3477 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3478 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3479 == XML_STATUS_ERROR)
3480 xml_failure(g_parser);
3481 if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3482 fail("Notation declaration handler not called");
3483 }
3484 END_TEST
3485
START_TEST(test_nested_groups)3486 START_TEST(test_nested_groups) {
3487 const char *text
3488 = "<!DOCTYPE doc [\n"
3489 "<!ELEMENT doc "
3490 /* Sixteen elements per line */
3491 "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3492 "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3493 "))))))))))))))))))))))))))))))))>\n"
3494 "<!ELEMENT e EMPTY>"
3495 "]>\n"
3496 "<doc><e/></doc>";
3497 CharData storage;
3498
3499 CharData_Init(&storage);
3500 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3501 XML_SetStartElementHandler(g_parser, record_element_start_handler);
3502 XML_SetUserData(g_parser, &storage);
3503 init_dummy_handlers();
3504 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3505 == XML_STATUS_ERROR)
3506 xml_failure(g_parser);
3507 CharData_CheckXMLChars(&storage, XCS("doce"));
3508 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3509 fail("Element handler not fired");
3510 }
3511 END_TEST
3512
START_TEST(test_group_choice)3513 START_TEST(test_group_choice) {
3514 const char *text = "<!DOCTYPE doc [\n"
3515 "<!ELEMENT doc (a|b|c)+>\n"
3516 "<!ELEMENT a EMPTY>\n"
3517 "<!ELEMENT b (#PCDATA)>\n"
3518 "<!ELEMENT c ANY>\n"
3519 "]>\n"
3520 "<doc>\n"
3521 "<a/>\n"
3522 "<b attr='foo'>This is a foo</b>\n"
3523 "<c></c>\n"
3524 "</doc>\n";
3525
3526 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3527 init_dummy_handlers();
3528 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3529 == XML_STATUS_ERROR)
3530 xml_failure(g_parser);
3531 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3532 fail("Element handler flag not raised");
3533 }
3534 END_TEST
3535
START_TEST(test_standalone_parameter_entity)3536 START_TEST(test_standalone_parameter_entity) {
3537 const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3538 "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3539 "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3540 "%entity;\n"
3541 "]>\n"
3542 "<doc></doc>";
3543 char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3544
3545 XML_SetUserData(g_parser, dtd_data);
3546 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3547 XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3548 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3549 == XML_STATUS_ERROR)
3550 xml_failure(g_parser);
3551 }
3552 END_TEST
3553
3554 /* Test skipping of parameter entity in an external DTD */
3555 /* Derived from ibm/invalid/P69/ibm69i01.xml */
START_TEST(test_skipped_parameter_entity)3556 START_TEST(test_skipped_parameter_entity) {
3557 const char *text = "<?xml version='1.0'?>\n"
3558 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3559 "<!ELEMENT root (#PCDATA|a)* >\n"
3560 "]>\n"
3561 "<root></root>";
3562 ExtTest dtd_data = {"%pe2;", NULL, NULL};
3563
3564 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3565 XML_SetUserData(g_parser, &dtd_data);
3566 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3567 XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3568 init_dummy_handlers();
3569 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3570 == XML_STATUS_ERROR)
3571 xml_failure(g_parser);
3572 if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3573 fail("Skip handler not executed");
3574 }
3575 END_TEST
3576
3577 /* Test recursive parameter entity definition rejected in external DTD */
START_TEST(test_recursive_external_parameter_entity)3578 START_TEST(test_recursive_external_parameter_entity) {
3579 const char *text = "<?xml version='1.0'?>\n"
3580 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3581 "<!ELEMENT root (#PCDATA|a)* >\n"
3582 "]>\n"
3583 "<root></root>";
3584 ExtFaults dtd_data = {"<!ENTITY % pe2 '%pe2;'>\n%pe2;",
3585 "Recursive external parameter entity not faulted", NULL,
3586 XML_ERROR_RECURSIVE_ENTITY_REF};
3587
3588 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3589 XML_SetUserData(g_parser, &dtd_data);
3590 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3591 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3592 "Recursive external parameter not spotted");
3593 }
3594 END_TEST
3595
3596 /* Test undefined parameter entity in external entity handler */
START_TEST(test_undefined_ext_entity_in_external_dtd)3597 START_TEST(test_undefined_ext_entity_in_external_dtd) {
3598 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3599 "<doc></doc>\n";
3600
3601 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3602 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3603 XML_SetUserData(g_parser, NULL);
3604 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3605 == XML_STATUS_ERROR)
3606 xml_failure(g_parser);
3607
3608 /* Now repeat without the external entity ref handler invoking
3609 * another copy of itself.
3610 */
3611 XML_ParserReset(g_parser, NULL);
3612 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3613 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3614 XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3615 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3616 == XML_STATUS_ERROR)
3617 xml_failure(g_parser);
3618 }
3619 END_TEST
3620
3621 /* Test suspending the parse on receiving an XML declaration works */
START_TEST(test_suspend_xdecl)3622 START_TEST(test_suspend_xdecl) {
3623 const char *text = long_character_data_text;
3624
3625 XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3626 XML_SetUserData(g_parser, g_parser);
3627 g_resumable = XML_TRUE;
3628 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3629 != XML_STATUS_SUSPENDED)
3630 xml_failure(g_parser);
3631 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3632 xml_failure(g_parser);
3633 /* Attempt to start a new parse while suspended */
3634 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3635 != XML_STATUS_ERROR)
3636 fail("Attempt to parse while suspended not faulted");
3637 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3638 fail("Suspended parse not faulted with correct error");
3639 }
3640 END_TEST
3641
3642 /* Test aborting the parse in an epilog works */
START_TEST(test_abort_epilog)3643 START_TEST(test_abort_epilog) {
3644 const char *text = "<doc></doc>\n\r\n";
3645 XML_Char trigger_char = XCS('\r');
3646
3647 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3648 XML_SetUserData(g_parser, &trigger_char);
3649 g_resumable = XML_FALSE;
3650 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3651 != XML_STATUS_ERROR)
3652 fail("Abort not triggered");
3653 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3654 xml_failure(g_parser);
3655 }
3656 END_TEST
3657
3658 /* Test a different code path for abort in the epilog */
START_TEST(test_abort_epilog_2)3659 START_TEST(test_abort_epilog_2) {
3660 const char *text = "<doc></doc>\n";
3661 XML_Char trigger_char = XCS('\n');
3662
3663 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3664 XML_SetUserData(g_parser, &trigger_char);
3665 g_resumable = XML_FALSE;
3666 expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3667 }
3668 END_TEST
3669
3670 /* Test suspension from the epilog */
START_TEST(test_suspend_epilog)3671 START_TEST(test_suspend_epilog) {
3672 const char *text = "<doc></doc>\n";
3673 XML_Char trigger_char = XCS('\n');
3674
3675 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3676 XML_SetUserData(g_parser, &trigger_char);
3677 g_resumable = XML_TRUE;
3678 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3679 != XML_STATUS_SUSPENDED)
3680 xml_failure(g_parser);
3681 }
3682 END_TEST
3683
START_TEST(test_suspend_in_sole_empty_tag)3684 START_TEST(test_suspend_in_sole_empty_tag) {
3685 const char *text = "<doc/>";
3686 enum XML_Status rc;
3687
3688 XML_SetEndElementHandler(g_parser, suspending_end_handler);
3689 XML_SetUserData(g_parser, g_parser);
3690 rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3691 if (rc == XML_STATUS_ERROR)
3692 xml_failure(g_parser);
3693 else if (rc != XML_STATUS_SUSPENDED)
3694 fail("Suspend not triggered");
3695 rc = XML_ResumeParser(g_parser);
3696 if (rc == XML_STATUS_ERROR)
3697 xml_failure(g_parser);
3698 else if (rc != XML_STATUS_OK)
3699 fail("Resume failed");
3700 }
3701 END_TEST
3702
START_TEST(test_unfinished_epilog)3703 START_TEST(test_unfinished_epilog) {
3704 const char *text = "<doc></doc><";
3705
3706 expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3707 "Incomplete epilog entry not faulted");
3708 }
3709 END_TEST
3710
START_TEST(test_partial_char_in_epilog)3711 START_TEST(test_partial_char_in_epilog) {
3712 const char *text = "<doc></doc>\xe2\x82";
3713
3714 /* First check that no fault is raised if the parse is not finished */
3715 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3716 == XML_STATUS_ERROR)
3717 xml_failure(g_parser);
3718 /* Now check that it is faulted once we finish */
3719 if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3720 fail("Partial character in epilog not faulted");
3721 if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3722 xml_failure(g_parser);
3723 }
3724 END_TEST
3725
3726 /* Test resuming a parse suspended in entity substitution */
START_TEST(test_suspend_resume_internal_entity)3727 START_TEST(test_suspend_resume_internal_entity) {
3728 const char *text
3729 = "<!DOCTYPE doc [\n"
3730 "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3731 "]>\n"
3732 "<doc>&foo;</doc>\n";
3733 const XML_Char *expected1 = XCS("Hi");
3734 const XML_Char *expected2 = XCS("HiHo");
3735 CharData storage;
3736
3737 CharData_Init(&storage);
3738 XML_SetStartElementHandler(g_parser, start_element_suspender);
3739 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3740 XML_SetUserData(g_parser, &storage);
3741 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3742 // we won't know exactly how much input we actually managed to give Expat.
3743 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3744 != XML_STATUS_SUSPENDED)
3745 xml_failure(g_parser);
3746 CharData_CheckXMLChars(&storage, XCS(""));
3747 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3748 xml_failure(g_parser);
3749 CharData_CheckXMLChars(&storage, expected1);
3750 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3751 xml_failure(g_parser);
3752 CharData_CheckXMLChars(&storage, expected2);
3753 }
3754 END_TEST
3755
START_TEST(test_suspend_resume_internal_entity_issue_629)3756 START_TEST(test_suspend_resume_internal_entity_issue_629) {
3757 const char *const text
3758 = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3759 "<"
3760 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3761 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3762 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3763 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3764 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3765 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3766 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3767 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3768 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3769 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3770 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3771 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3772 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3773 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3774 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3775 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3776 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3777 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3778 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3779 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3780 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3781 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3782 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3783 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3784 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3785 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3786 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3787 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3788 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3789 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3790 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3791 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3792 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3793 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3794 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3795 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3796 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3797 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3798 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3799 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3800 "/>"
3801 "</b></a>";
3802 const size_t firstChunkSizeBytes = 54;
3803
3804 XML_Parser parser = XML_ParserCreate(NULL);
3805 XML_SetUserData(parser, parser);
3806 XML_SetCommentHandler(parser, suspending_comment_handler);
3807
3808 if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3809 != XML_STATUS_SUSPENDED)
3810 xml_failure(parser);
3811 if (XML_ResumeParser(parser) != XML_STATUS_OK)
3812 xml_failure(parser);
3813 if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3814 (int)(strlen(text) - firstChunkSizeBytes),
3815 XML_TRUE)
3816 != XML_STATUS_OK)
3817 xml_failure(parser);
3818 XML_ParserFree(parser);
3819 }
3820 END_TEST
3821
3822 /* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error)3823 START_TEST(test_resume_entity_with_syntax_error) {
3824 const char *text = "<!DOCTYPE doc [\n"
3825 "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3826 "]>\n"
3827 "<doc>&foo;</doc>\n";
3828
3829 XML_SetStartElementHandler(g_parser, start_element_suspender);
3830 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3831 != XML_STATUS_SUSPENDED)
3832 xml_failure(g_parser);
3833 if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
3834 fail("Syntax error in entity not faulted");
3835 if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
3836 xml_failure(g_parser);
3837 }
3838 END_TEST
3839
3840 /* Test suspending and resuming in a parameter entity substitution */
START_TEST(test_suspend_resume_parameter_entity)3841 START_TEST(test_suspend_resume_parameter_entity) {
3842 const char *text = "<!DOCTYPE doc [\n"
3843 "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
3844 "%foo;\n"
3845 "]>\n"
3846 "<doc>Hello, world</doc>";
3847 const XML_Char *expected = XCS("Hello, world");
3848 CharData storage;
3849
3850 CharData_Init(&storage);
3851 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3852 XML_SetElementDeclHandler(g_parser, element_decl_suspender);
3853 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3854 XML_SetUserData(g_parser, &storage);
3855 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3856 != XML_STATUS_SUSPENDED)
3857 xml_failure(g_parser);
3858 CharData_CheckXMLChars(&storage, XCS(""));
3859 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3860 xml_failure(g_parser);
3861 CharData_CheckXMLChars(&storage, expected);
3862 }
3863 END_TEST
3864
3865 /* Test attempting to use parser after an error is faulted */
START_TEST(test_restart_on_error)3866 START_TEST(test_restart_on_error) {
3867 const char *text = "<$doc><doc></doc>";
3868
3869 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3870 != XML_STATUS_ERROR)
3871 fail("Invalid tag name not faulted");
3872 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3873 xml_failure(g_parser);
3874 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3875 fail("Restarting invalid parse not faulted");
3876 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3877 xml_failure(g_parser);
3878 }
3879 END_TEST
3880
3881 /* Test that angle brackets in an attribute default value are faulted */
START_TEST(test_reject_lt_in_attribute_value)3882 START_TEST(test_reject_lt_in_attribute_value) {
3883 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
3884 "<doc></doc>";
3885
3886 expect_failure(text, XML_ERROR_INVALID_TOKEN,
3887 "Bad attribute default not faulted");
3888 }
3889 END_TEST
3890
START_TEST(test_reject_unfinished_param_in_att_value)3891 START_TEST(test_reject_unfinished_param_in_att_value) {
3892 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
3893 "<doc></doc>";
3894
3895 expect_failure(text, XML_ERROR_INVALID_TOKEN,
3896 "Bad attribute default not faulted");
3897 }
3898 END_TEST
3899
START_TEST(test_trailing_cr_in_att_value)3900 START_TEST(test_trailing_cr_in_att_value) {
3901 const char *text = "<doc a='value\r'/>";
3902
3903 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3904 == XML_STATUS_ERROR)
3905 xml_failure(g_parser);
3906 }
3907 END_TEST
3908
3909 /* Try parsing a general entity within a parameter entity in a
3910 * standalone internal DTD. Covers a corner case in the parser.
3911 */
START_TEST(test_standalone_internal_entity)3912 START_TEST(test_standalone_internal_entity) {
3913 const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
3914 "<!DOCTYPE doc [\n"
3915 " <!ELEMENT doc (#PCDATA)>\n"
3916 " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"≥\">'>\n"
3917 " <!ENTITY ge 'AttDefaultValue'>\n"
3918 " %pe;\n"
3919 "]>\n"
3920 "<doc att2='any'/>";
3921
3922 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3923 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3924 == XML_STATUS_ERROR)
3925 xml_failure(g_parser);
3926 }
3927 END_TEST
3928
3929 /* Test that a reference to an unknown external entity is skipped */
START_TEST(test_skipped_external_entity)3930 START_TEST(test_skipped_external_entity) {
3931 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3932 "<doc></doc>\n";
3933 ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
3934 "<!ENTITY % e2 '%e1;'>\n",
3935 NULL, NULL};
3936
3937 XML_SetUserData(g_parser, &test_data);
3938 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3939 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3940 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3941 == XML_STATUS_ERROR)
3942 xml_failure(g_parser);
3943 }
3944 END_TEST
3945
3946 /* Test a different form of unknown external entity */
START_TEST(test_skipped_null_loaded_ext_entity)3947 START_TEST(test_skipped_null_loaded_ext_entity) {
3948 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3949 "<doc />";
3950 ExtHdlrData test_data
3951 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3952 "<!ENTITY % pe2 '%pe1;'>\n"
3953 "%pe2;\n",
3954 external_entity_null_loader};
3955
3956 XML_SetUserData(g_parser, &test_data);
3957 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3958 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3959 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3960 == XML_STATUS_ERROR)
3961 xml_failure(g_parser);
3962 }
3963 END_TEST
3964
START_TEST(test_skipped_unloaded_ext_entity)3965 START_TEST(test_skipped_unloaded_ext_entity) {
3966 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3967 "<doc />";
3968 ExtHdlrData test_data
3969 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3970 "<!ENTITY % pe2 '%pe1;'>\n"
3971 "%pe2;\n",
3972 NULL};
3973
3974 XML_SetUserData(g_parser, &test_data);
3975 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3976 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3977 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3978 == XML_STATUS_ERROR)
3979 xml_failure(g_parser);
3980 }
3981 END_TEST
3982
3983 /* Test that a parameter entity value ending with a carriage return
3984 * has it translated internally into a newline.
3985 */
START_TEST(test_param_entity_with_trailing_cr)3986 START_TEST(test_param_entity_with_trailing_cr) {
3987 #define PARAM_ENTITY_NAME "pe"
3988 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
3989 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3990 "<doc/>";
3991 ExtTest test_data
3992 = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
3993 "%" PARAM_ENTITY_NAME ";\n",
3994 NULL, NULL};
3995
3996 XML_SetUserData(g_parser, &test_data);
3997 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3998 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3999 XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
4000 param_entity_match_init(XCS(PARAM_ENTITY_NAME),
4001 XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
4002 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4003 == XML_STATUS_ERROR)
4004 xml_failure(g_parser);
4005 int entity_match_flag = get_param_entity_match_flag();
4006 if (entity_match_flag == ENTITY_MATCH_FAIL)
4007 fail("Parameter entity CR->NEWLINE conversion failed");
4008 else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
4009 fail("Parameter entity not parsed");
4010 }
4011 #undef PARAM_ENTITY_NAME
4012 #undef PARAM_ENTITY_CORE_VALUE
4013 END_TEST
4014
START_TEST(test_invalid_character_entity)4015 START_TEST(test_invalid_character_entity) {
4016 const char *text = "<!DOCTYPE doc [\n"
4017 " <!ENTITY entity '�'>\n"
4018 "]>\n"
4019 "<doc>&entity;</doc>";
4020
4021 expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4022 "Out of range character reference not faulted");
4023 }
4024 END_TEST
4025
START_TEST(test_invalid_character_entity_2)4026 START_TEST(test_invalid_character_entity_2) {
4027 const char *text = "<!DOCTYPE doc [\n"
4028 " <!ENTITY entity '&#xg0;'>\n"
4029 "]>\n"
4030 "<doc>&entity;</doc>";
4031
4032 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4033 "Out of range character reference not faulted");
4034 }
4035 END_TEST
4036
START_TEST(test_invalid_character_entity_3)4037 START_TEST(test_invalid_character_entity_3) {
4038 const char text[] =
4039 /* <!DOCTYPE doc [\n */
4040 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4041 /* U+0E04 = KHO KHWAI
4042 * U+0E08 = CHO CHAN */
4043 /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
4044 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
4045 "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
4046 /* ]>\n */
4047 "\0]\0>\0\n"
4048 /* <doc>&entity;</doc> */
4049 "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
4050
4051 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4052 != XML_STATUS_ERROR)
4053 fail("Invalid start of entity name not faulted");
4054 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4055 xml_failure(g_parser);
4056 }
4057 END_TEST
4058
START_TEST(test_invalid_character_entity_4)4059 START_TEST(test_invalid_character_entity_4) {
4060 const char *text = "<!DOCTYPE doc [\n"
4061 " <!ENTITY entity '�'>\n" /* = � */
4062 "]>\n"
4063 "<doc>&entity;</doc>";
4064
4065 expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4066 "Out of range character reference not faulted");
4067 }
4068 END_TEST
4069
4070 /* Test that processing instructions are picked up by a default handler */
START_TEST(test_pi_handled_in_default)4071 START_TEST(test_pi_handled_in_default) {
4072 const char *text = "<?test processing instruction?>\n<doc/>";
4073 const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4074 CharData storage;
4075
4076 CharData_Init(&storage);
4077 XML_SetDefaultHandler(g_parser, accumulate_characters);
4078 XML_SetUserData(g_parser, &storage);
4079 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4080 == XML_STATUS_ERROR)
4081 xml_failure(g_parser);
4082 CharData_CheckXMLChars(&storage, expected);
4083 }
4084 END_TEST
4085
4086 /* Test that comments are picked up by a default handler */
START_TEST(test_comment_handled_in_default)4087 START_TEST(test_comment_handled_in_default) {
4088 const char *text = "<!-- This is a comment -->\n<doc/>";
4089 const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4090 CharData storage;
4091
4092 CharData_Init(&storage);
4093 XML_SetDefaultHandler(g_parser, accumulate_characters);
4094 XML_SetUserData(g_parser, &storage);
4095 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4096 == XML_STATUS_ERROR)
4097 xml_failure(g_parser);
4098 CharData_CheckXMLChars(&storage, expected);
4099 }
4100 END_TEST
4101
4102 /* Test PIs that look almost but not quite like XML declarations */
START_TEST(test_pi_yml)4103 START_TEST(test_pi_yml) {
4104 const char *text = "<?yml something like data?><doc/>";
4105 const XML_Char *expected = XCS("yml: something like data\n");
4106 CharData storage;
4107
4108 CharData_Init(&storage);
4109 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4110 XML_SetUserData(g_parser, &storage);
4111 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4112 == XML_STATUS_ERROR)
4113 xml_failure(g_parser);
4114 CharData_CheckXMLChars(&storage, expected);
4115 }
4116 END_TEST
4117
START_TEST(test_pi_xnl)4118 START_TEST(test_pi_xnl) {
4119 const char *text = "<?xnl nothing like data?><doc/>";
4120 const XML_Char *expected = XCS("xnl: nothing like data\n");
4121 CharData storage;
4122
4123 CharData_Init(&storage);
4124 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4125 XML_SetUserData(g_parser, &storage);
4126 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4127 == XML_STATUS_ERROR)
4128 xml_failure(g_parser);
4129 CharData_CheckXMLChars(&storage, expected);
4130 }
4131 END_TEST
4132
START_TEST(test_pi_xmm)4133 START_TEST(test_pi_xmm) {
4134 const char *text = "<?xmm everything like data?><doc/>";
4135 const XML_Char *expected = XCS("xmm: everything like data\n");
4136 CharData storage;
4137
4138 CharData_Init(&storage);
4139 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4140 XML_SetUserData(g_parser, &storage);
4141 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4142 == XML_STATUS_ERROR)
4143 xml_failure(g_parser);
4144 CharData_CheckXMLChars(&storage, expected);
4145 }
4146 END_TEST
4147
START_TEST(test_utf16_pi)4148 START_TEST(test_utf16_pi) {
4149 const char text[] =
4150 /* <?{KHO KHWAI}{CHO CHAN}?>
4151 * where {KHO KHWAI} = U+0E04
4152 * and {CHO CHAN} = U+0E08
4153 */
4154 "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4155 /* <q/> */
4156 "<\0q\0/\0>\0";
4157 #ifdef XML_UNICODE
4158 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4159 #else
4160 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4161 #endif
4162 CharData storage;
4163
4164 CharData_Init(&storage);
4165 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4166 XML_SetUserData(g_parser, &storage);
4167 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4168 == XML_STATUS_ERROR)
4169 xml_failure(g_parser);
4170 CharData_CheckXMLChars(&storage, expected);
4171 }
4172 END_TEST
4173
START_TEST(test_utf16_be_pi)4174 START_TEST(test_utf16_be_pi) {
4175 const char text[] =
4176 /* <?{KHO KHWAI}{CHO CHAN}?>
4177 * where {KHO KHWAI} = U+0E04
4178 * and {CHO CHAN} = U+0E08
4179 */
4180 "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4181 /* <q/> */
4182 "\0<\0q\0/\0>";
4183 #ifdef XML_UNICODE
4184 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4185 #else
4186 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4187 #endif
4188 CharData storage;
4189
4190 CharData_Init(&storage);
4191 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4192 XML_SetUserData(g_parser, &storage);
4193 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4194 == XML_STATUS_ERROR)
4195 xml_failure(g_parser);
4196 CharData_CheckXMLChars(&storage, expected);
4197 }
4198 END_TEST
4199
4200 /* Test that comments can be picked up and translated */
START_TEST(test_utf16_be_comment)4201 START_TEST(test_utf16_be_comment) {
4202 const char text[] =
4203 /* <!-- Comment A --> */
4204 "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4205 /* <doc/> */
4206 "\0<\0d\0o\0c\0/\0>";
4207 const XML_Char *expected = XCS(" Comment A ");
4208 CharData storage;
4209
4210 CharData_Init(&storage);
4211 XML_SetCommentHandler(g_parser, accumulate_comment);
4212 XML_SetUserData(g_parser, &storage);
4213 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4214 == XML_STATUS_ERROR)
4215 xml_failure(g_parser);
4216 CharData_CheckXMLChars(&storage, expected);
4217 }
4218 END_TEST
4219
START_TEST(test_utf16_le_comment)4220 START_TEST(test_utf16_le_comment) {
4221 const char text[] =
4222 /* <!-- Comment B --> */
4223 "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4224 /* <doc/> */
4225 "<\0d\0o\0c\0/\0>\0";
4226 const XML_Char *expected = XCS(" Comment B ");
4227 CharData storage;
4228
4229 CharData_Init(&storage);
4230 XML_SetCommentHandler(g_parser, accumulate_comment);
4231 XML_SetUserData(g_parser, &storage);
4232 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4233 == XML_STATUS_ERROR)
4234 xml_failure(g_parser);
4235 CharData_CheckXMLChars(&storage, expected);
4236 }
4237 END_TEST
4238
4239 /* Test that the unknown encoding handler with map entries that expect
4240 * conversion but no conversion function is faulted
4241 */
START_TEST(test_missing_encoding_conversion_fn)4242 START_TEST(test_missing_encoding_conversion_fn) {
4243 const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4244 "<doc>\x81</doc>";
4245
4246 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4247 /* MiscEncodingHandler sets up an encoding with every top-bit-set
4248 * character introducing a two-byte sequence. For this, it
4249 * requires a convert function. The above function call doesn't
4250 * pass one through, so when BadEncodingHandler actually gets
4251 * called it should supply an invalid encoding.
4252 */
4253 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4254 "Encoding with missing convert() not faulted");
4255 }
4256 END_TEST
4257
START_TEST(test_failing_encoding_conversion_fn)4258 START_TEST(test_failing_encoding_conversion_fn) {
4259 const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4260 "<doc>\x81</doc>";
4261
4262 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4263 /* BadEncodingHandler sets up an encoding with every top-bit-set
4264 * character introducing a two-byte sequence. For this, it
4265 * requires a convert function. The above function call passes
4266 * one that insists all possible sequences are invalid anyway.
4267 */
4268 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4269 "Encoding with failing convert() not faulted");
4270 }
4271 END_TEST
4272
4273 /* Test unknown encoding conversions */
START_TEST(test_unknown_encoding_success)4274 START_TEST(test_unknown_encoding_success) {
4275 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4276 /* Equivalent to <eoc>Hello, world</eoc> */
4277 "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4278
4279 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4280 run_character_check(text, XCS("Hello, world"));
4281 }
4282 END_TEST
4283
4284 /* Test bad name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name)4285 START_TEST(test_unknown_encoding_bad_name) {
4286 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4287 "<\xff\x64oc>Hello, world</\xff\x64oc>";
4288
4289 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4290 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4291 "Bad name start in unknown encoding not faulted");
4292 }
4293 END_TEST
4294
4295 /* Test bad mid-name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name_2)4296 START_TEST(test_unknown_encoding_bad_name_2) {
4297 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4298 "<d\xffoc>Hello, world</d\xffoc>";
4299
4300 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4301 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4302 "Bad name in unknown encoding not faulted");
4303 }
4304 END_TEST
4305
4306 /* Test element name that is long enough to fill the conversion buffer
4307 * in an unknown encoding, finishing with an encoded character.
4308 */
START_TEST(test_unknown_encoding_long_name_1)4309 START_TEST(test_unknown_encoding_long_name_1) {
4310 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4311 "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4312 "Hi"
4313 "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4314 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4315 CharData storage;
4316
4317 CharData_Init(&storage);
4318 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4319 XML_SetStartElementHandler(g_parser, record_element_start_handler);
4320 XML_SetUserData(g_parser, &storage);
4321 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4322 == XML_STATUS_ERROR)
4323 xml_failure(g_parser);
4324 CharData_CheckXMLChars(&storage, expected);
4325 }
4326 END_TEST
4327
4328 /* Test element name that is long enough to fill the conversion buffer
4329 * in an unknown encoding, finishing with an simple character.
4330 */
START_TEST(test_unknown_encoding_long_name_2)4331 START_TEST(test_unknown_encoding_long_name_2) {
4332 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4333 "<abcdefghabcdefghabcdefghijklmnop>"
4334 "Hi"
4335 "</abcdefghabcdefghabcdefghijklmnop>";
4336 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4337 CharData storage;
4338
4339 CharData_Init(&storage);
4340 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4341 XML_SetStartElementHandler(g_parser, record_element_start_handler);
4342 XML_SetUserData(g_parser, &storage);
4343 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4344 == XML_STATUS_ERROR)
4345 xml_failure(g_parser);
4346 CharData_CheckXMLChars(&storage, expected);
4347 }
4348 END_TEST
4349
START_TEST(test_invalid_unknown_encoding)4350 START_TEST(test_invalid_unknown_encoding) {
4351 const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4352 "<doc>Hello world</doc>";
4353
4354 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4355 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4356 "Invalid unknown encoding not faulted");
4357 }
4358 END_TEST
4359
START_TEST(test_unknown_ascii_encoding_ok)4360 START_TEST(test_unknown_ascii_encoding_ok) {
4361 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4362 "<doc>Hello, world</doc>";
4363
4364 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4365 run_character_check(text, XCS("Hello, world"));
4366 }
4367 END_TEST
4368
START_TEST(test_unknown_ascii_encoding_fail)4369 START_TEST(test_unknown_ascii_encoding_fail) {
4370 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4371 "<doc>Hello, \x80 world</doc>";
4372
4373 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4374 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4375 "Invalid character not faulted");
4376 }
4377 END_TEST
4378
START_TEST(test_unknown_encoding_invalid_length)4379 START_TEST(test_unknown_encoding_invalid_length) {
4380 const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4381 "<doc>Hello, world</doc>";
4382
4383 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4384 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4385 "Invalid unknown encoding not faulted");
4386 }
4387 END_TEST
4388
START_TEST(test_unknown_encoding_invalid_topbit)4389 START_TEST(test_unknown_encoding_invalid_topbit) {
4390 const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4391 "<doc>Hello, world</doc>";
4392
4393 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4394 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4395 "Invalid unknown encoding not faulted");
4396 }
4397 END_TEST
4398
START_TEST(test_unknown_encoding_invalid_surrogate)4399 START_TEST(test_unknown_encoding_invalid_surrogate) {
4400 const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4401 "<doc>Hello, \x82 world</doc>";
4402
4403 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4404 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4405 "Invalid unknown encoding not faulted");
4406 }
4407 END_TEST
4408
START_TEST(test_unknown_encoding_invalid_high)4409 START_TEST(test_unknown_encoding_invalid_high) {
4410 const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4411 "<doc>Hello, world</doc>";
4412
4413 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4414 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4415 "Invalid unknown encoding not faulted");
4416 }
4417 END_TEST
4418
START_TEST(test_unknown_encoding_invalid_attr_value)4419 START_TEST(test_unknown_encoding_invalid_attr_value) {
4420 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4421 "<doc attr='\xff\x30'/>";
4422
4423 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4424 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4425 "Invalid attribute valid not faulted");
4426 }
4427 END_TEST
4428
4429 /* Test an external entity parser set to use latin-1 detects UTF-16
4430 * BOMs correctly.
4431 */
4432 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
START_TEST(test_ext_entity_latin1_utf16le_bom)4433 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4434 const char *text = "<!DOCTYPE doc [\n"
4435 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4436 "]>\n"
4437 "<doc>&en;</doc>";
4438 ExtTest2 test_data
4439 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4440 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4441 * 0x4c = L and 0x20 is a space
4442 */
4443 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4444 #ifdef XML_UNICODE
4445 const XML_Char *expected = XCS("\x00ff\x00feL ");
4446 #else
4447 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4448 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4449 #endif
4450 CharData storage;
4451
4452 CharData_Init(&storage);
4453 test_data.storage = &storage;
4454 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4455 XML_SetUserData(g_parser, &test_data);
4456 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4457 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4458 == XML_STATUS_ERROR)
4459 xml_failure(g_parser);
4460 CharData_CheckXMLChars(&storage, expected);
4461 }
4462 END_TEST
4463
START_TEST(test_ext_entity_latin1_utf16be_bom)4464 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4465 const char *text = "<!DOCTYPE doc [\n"
4466 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4467 "]>\n"
4468 "<doc>&en;</doc>";
4469 ExtTest2 test_data
4470 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4471 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4472 * 0x4c = L and 0x20 is a space
4473 */
4474 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4475 #ifdef XML_UNICODE
4476 const XML_Char *expected = XCS("\x00fe\x00ff L");
4477 #else
4478 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4479 const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4480 #endif
4481 CharData storage;
4482
4483 CharData_Init(&storage);
4484 test_data.storage = &storage;
4485 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4486 XML_SetUserData(g_parser, &test_data);
4487 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4488 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4489 == XML_STATUS_ERROR)
4490 xml_failure(g_parser);
4491 CharData_CheckXMLChars(&storage, expected);
4492 }
4493 END_TEST
4494
4495 /* Parsing the full buffer rather than a byte at a time makes a
4496 * difference to the encoding scanning code, so repeat the above tests
4497 * without breaking them down by byte.
4498 */
START_TEST(test_ext_entity_latin1_utf16le_bom2)4499 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4500 const char *text = "<!DOCTYPE doc [\n"
4501 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4502 "]>\n"
4503 "<doc>&en;</doc>";
4504 ExtTest2 test_data
4505 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4506 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4507 * 0x4c = L and 0x20 is a space
4508 */
4509 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4510 #ifdef XML_UNICODE
4511 const XML_Char *expected = XCS("\x00ff\x00feL ");
4512 #else
4513 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4514 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4515 #endif
4516 CharData storage;
4517
4518 CharData_Init(&storage);
4519 test_data.storage = &storage;
4520 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4521 XML_SetUserData(g_parser, &test_data);
4522 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4523 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4524 == XML_STATUS_ERROR)
4525 xml_failure(g_parser);
4526 CharData_CheckXMLChars(&storage, expected);
4527 }
4528 END_TEST
4529
START_TEST(test_ext_entity_latin1_utf16be_bom2)4530 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4531 const char *text = "<!DOCTYPE doc [\n"
4532 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4533 "]>\n"
4534 "<doc>&en;</doc>";
4535 ExtTest2 test_data
4536 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4537 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4538 * 0x4c = L and 0x20 is a space
4539 */
4540 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4541 #ifdef XML_UNICODE
4542 const XML_Char *expected = XCS("\x00fe\x00ff L");
4543 #else
4544 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4545 const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4546 #endif
4547 CharData storage;
4548
4549 CharData_Init(&storage);
4550 test_data.storage = &storage;
4551 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4552 XML_SetUserData(g_parser, &test_data);
4553 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4554 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4555 == XML_STATUS_ERROR)
4556 xml_failure(g_parser);
4557 CharData_CheckXMLChars(&storage, expected);
4558 }
4559 END_TEST
4560
4561 /* Test little-endian UTF-16 given an explicit big-endian encoding */
START_TEST(test_ext_entity_utf16_be)4562 START_TEST(test_ext_entity_utf16_be) {
4563 const char *text = "<!DOCTYPE doc [\n"
4564 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4565 "]>\n"
4566 "<doc>&en;</doc>";
4567 ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4568 #ifdef XML_UNICODE
4569 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4570 #else
4571 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
4572 "\xe6\x94\x80" /* U+6500 */
4573 "\xe2\xbc\x80" /* U+2F00 */
4574 "\xe3\xb8\x80"); /* U+3E00 */
4575 #endif
4576 CharData storage;
4577
4578 CharData_Init(&storage);
4579 test_data.storage = &storage;
4580 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4581 XML_SetUserData(g_parser, &test_data);
4582 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4583 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4584 == XML_STATUS_ERROR)
4585 xml_failure(g_parser);
4586 CharData_CheckXMLChars(&storage, expected);
4587 }
4588 END_TEST
4589
4590 /* Test big-endian UTF-16 given an explicit little-endian encoding */
START_TEST(test_ext_entity_utf16_le)4591 START_TEST(test_ext_entity_utf16_le) {
4592 const char *text = "<!DOCTYPE doc [\n"
4593 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4594 "]>\n"
4595 "<doc>&en;</doc>";
4596 ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4597 #ifdef XML_UNICODE
4598 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4599 #else
4600 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
4601 "\xe6\x94\x80" /* U+6500 */
4602 "\xe2\xbc\x80" /* U+2F00 */
4603 "\xe3\xb8\x80"); /* U+3E00 */
4604 #endif
4605 CharData storage;
4606
4607 CharData_Init(&storage);
4608 test_data.storage = &storage;
4609 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4610 XML_SetUserData(g_parser, &test_data);
4611 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4612 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4613 == XML_STATUS_ERROR)
4614 xml_failure(g_parser);
4615 CharData_CheckXMLChars(&storage, expected);
4616 }
4617 END_TEST
4618
4619 /* Test little-endian UTF-16 given no explicit encoding.
4620 * The existing default encoding (UTF-8) is assumed to hold without a
4621 * BOM to contradict it, so the entity value will in fact provoke an
4622 * error because 0x00 is not a valid XML character. We parse the
4623 * whole buffer in one go rather than feeding it in byte by byte to
4624 * exercise different code paths in the initial scanning routines.
4625 */
START_TEST(test_ext_entity_utf16_unknown)4626 START_TEST(test_ext_entity_utf16_unknown) {
4627 const char *text = "<!DOCTYPE doc [\n"
4628 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4629 "]>\n"
4630 "<doc>&en;</doc>";
4631 ExtFaults2 test_data
4632 = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4633 XML_ERROR_INVALID_TOKEN};
4634
4635 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4636 XML_SetUserData(g_parser, &test_data);
4637 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4638 "Invalid character should not have been accepted");
4639 }
4640 END_TEST
4641
4642 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
START_TEST(test_ext_entity_utf8_non_bom)4643 START_TEST(test_ext_entity_utf8_non_bom) {
4644 const char *text = "<!DOCTYPE doc [\n"
4645 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4646 "]>\n"
4647 "<doc>&en;</doc>";
4648 ExtTest2 test_data
4649 = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4650 3, NULL, NULL};
4651 #ifdef XML_UNICODE
4652 const XML_Char *expected = XCS("\xfec0");
4653 #else
4654 const XML_Char *expected = XCS("\xef\xbb\x80");
4655 #endif
4656 CharData storage;
4657
4658 CharData_Init(&storage);
4659 test_data.storage = &storage;
4660 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4661 XML_SetUserData(g_parser, &test_data);
4662 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4663 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4664 == XML_STATUS_ERROR)
4665 xml_failure(g_parser);
4666 CharData_CheckXMLChars(&storage, expected);
4667 }
4668 END_TEST
4669
4670 /* Test that UTF-8 in a CDATA section is correctly passed through */
START_TEST(test_utf8_in_cdata_section)4671 START_TEST(test_utf8_in_cdata_section) {
4672 const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4673 #ifdef XML_UNICODE
4674 const XML_Char *expected = XCS("one \x00e9 two");
4675 #else
4676 const XML_Char *expected = XCS("one \xc3\xa9 two");
4677 #endif
4678
4679 run_character_check(text, expected);
4680 }
4681 END_TEST
4682
4683 /* Test that little-endian UTF-16 in a CDATA section is handled */
START_TEST(test_utf8_in_cdata_section_2)4684 START_TEST(test_utf8_in_cdata_section_2) {
4685 const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4686 #ifdef XML_UNICODE
4687 const XML_Char *expected = XCS("\x00e9]\x00e9two");
4688 #else
4689 const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4690 #endif
4691
4692 run_character_check(text, expected);
4693 }
4694 END_TEST
4695
START_TEST(test_utf8_in_start_tags)4696 START_TEST(test_utf8_in_start_tags) {
4697 struct test_case {
4698 bool goodName;
4699 bool goodNameStart;
4700 const char *tagName;
4701 };
4702
4703 // The idea with the tests below is this:
4704 // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4705 // go to isNever and are hence not a concern.
4706 //
4707 // We start with a character that is a valid name character
4708 // (or even name-start character, see XML 1.0r4 spec) and then we flip
4709 // single bits at places where (1) the result leaves the UTF-8 encoding space
4710 // and (2) we stay in the same n-byte sequence family.
4711 //
4712 // The flipped bits are highlighted in angle brackets in comments,
4713 // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4714 // the most significant bit to 1 to leave UTF-8 encoding space.
4715 struct test_case cases[] = {
4716 // 1-byte UTF-8: [0xxx xxxx]
4717 {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
4718 {false, false, "\xBA"}, // [<1>011 1010]
4719 {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
4720 {false, false, "\xB9"}, // [<1>011 1001]
4721
4722 // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4723 {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
4724 // Arabic small waw U+06E5
4725 {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4726 {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4727 {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4728 {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
4729 // combining char U+0301
4730 {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4731 {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4732 {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4733
4734 // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4735 {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
4736 // Devanagari Letter A U+0905
4737 {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4738 {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4739 {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4740 {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4741 {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4742 {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
4743 // combining char U+0901
4744 {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4745 {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4746 {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4747 {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4748 {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4749 };
4750 const bool atNameStart[] = {true, false};
4751
4752 size_t i = 0;
4753 char doc[1024];
4754 size_t failCount = 0;
4755
4756 // we need all the bytes to be parsed, but we don't want the errors that can
4757 // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4758 if (g_reparseDeferralEnabledDefault) {
4759 return;
4760 }
4761
4762 for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4763 size_t j = 0;
4764 for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4765 const bool expectedSuccess
4766 = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4767 snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4768 cases[i].tagName);
4769 XML_Parser parser = XML_ParserCreate(NULL);
4770
4771 const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4772 parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4773
4774 bool success = true;
4775 if ((status == XML_STATUS_OK) != expectedSuccess) {
4776 success = false;
4777 }
4778 if ((status == XML_STATUS_ERROR)
4779 && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4780 success = false;
4781 }
4782
4783 if (! success) {
4784 fprintf(
4785 stderr,
4786 "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
4787 (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
4788 (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
4789 failCount++;
4790 }
4791
4792 XML_ParserFree(parser);
4793 }
4794 }
4795
4796 if (failCount > 0) {
4797 fail("UTF-8 regression detected");
4798 }
4799 }
4800 END_TEST
4801
4802 /* Test trailing spaces in elements are accepted */
START_TEST(test_trailing_spaces_in_elements)4803 START_TEST(test_trailing_spaces_in_elements) {
4804 const char *text = "<doc >Hi</doc >";
4805 const XML_Char *expected = XCS("doc/doc");
4806 CharData storage;
4807
4808 CharData_Init(&storage);
4809 XML_SetElementHandler(g_parser, record_element_start_handler,
4810 record_element_end_handler);
4811 XML_SetUserData(g_parser, &storage);
4812 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4813 == XML_STATUS_ERROR)
4814 xml_failure(g_parser);
4815 CharData_CheckXMLChars(&storage, expected);
4816 }
4817 END_TEST
4818
START_TEST(test_utf16_attribute)4819 START_TEST(test_utf16_attribute) {
4820 const char text[] =
4821 /* <d {KHO KHWAI}{CHO CHAN}='a'/>
4822 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4823 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4824 */
4825 "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
4826 const XML_Char *expected = XCS("a");
4827 CharData storage;
4828
4829 CharData_Init(&storage);
4830 XML_SetStartElementHandler(g_parser, accumulate_attribute);
4831 XML_SetUserData(g_parser, &storage);
4832 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4833 == XML_STATUS_ERROR)
4834 xml_failure(g_parser);
4835 CharData_CheckXMLChars(&storage, expected);
4836 }
4837 END_TEST
4838
START_TEST(test_utf16_second_attr)4839 START_TEST(test_utf16_second_attr) {
4840 /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
4841 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4842 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4843 */
4844 const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
4845 "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
4846 const XML_Char *expected = XCS("1");
4847 CharData storage;
4848
4849 CharData_Init(&storage);
4850 XML_SetStartElementHandler(g_parser, accumulate_attribute);
4851 XML_SetUserData(g_parser, &storage);
4852 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4853 == XML_STATUS_ERROR)
4854 xml_failure(g_parser);
4855 CharData_CheckXMLChars(&storage, expected);
4856 }
4857 END_TEST
4858
START_TEST(test_attr_after_solidus)4859 START_TEST(test_attr_after_solidus) {
4860 const char *text = "<doc attr1='a' / attr2='b'>";
4861
4862 expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
4863 }
4864 END_TEST
4865
START_TEST(test_utf16_pe)4866 START_TEST(test_utf16_pe) {
4867 /* <!DOCTYPE doc [
4868 * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
4869 * %{KHO KHWAI}{CHO CHAN};
4870 * ]>
4871 * <doc></doc>
4872 *
4873 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4874 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4875 */
4876 const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4877 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
4878 "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
4879 "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
4880 "\0%\x0e\x04\x0e\x08\0;\0\n"
4881 "\0]\0>\0\n"
4882 "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
4883 #ifdef XML_UNICODE
4884 const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
4885 #else
4886 const XML_Char *expected
4887 = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
4888 #endif
4889 CharData storage;
4890
4891 CharData_Init(&storage);
4892 XML_SetUserData(g_parser, &storage);
4893 XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
4894 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4895 == XML_STATUS_ERROR)
4896 xml_failure(g_parser);
4897 CharData_CheckXMLChars(&storage, expected);
4898 }
4899 END_TEST
4900
4901 /* Test that duff attribute description keywords are rejected */
START_TEST(test_bad_attr_desc_keyword)4902 START_TEST(test_bad_attr_desc_keyword) {
4903 const char *text = "<!DOCTYPE doc [\n"
4904 " <!ATTLIST doc attr CDATA #!IMPLIED>\n"
4905 "]>\n"
4906 "<doc />";
4907
4908 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4909 "Bad keyword !IMPLIED not faulted");
4910 }
4911 END_TEST
4912
4913 /* Test that an invalid attribute description keyword consisting of
4914 * UTF-16 characters with their top bytes non-zero are correctly
4915 * faulted
4916 */
START_TEST(test_bad_attr_desc_keyword_utf16)4917 START_TEST(test_bad_attr_desc_keyword_utf16) {
4918 /* <!DOCTYPE d [
4919 * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
4920 * ]><d/>
4921 *
4922 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4923 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4924 */
4925 const char text[]
4926 = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4927 "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
4928 "\0#\x0e\x04\x0e\x08\0>\0\n"
4929 "\0]\0>\0<\0d\0/\0>";
4930
4931 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4932 != XML_STATUS_ERROR)
4933 fail("Invalid UTF16 attribute keyword not faulted");
4934 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4935 xml_failure(g_parser);
4936 }
4937 END_TEST
4938
4939 /* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this
4940 * using prefix-encoding (see above) to trigger specific code paths
4941 */
START_TEST(test_bad_doctype)4942 START_TEST(test_bad_doctype) {
4943 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4944 "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
4945
4946 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4947 expect_failure(text, XML_ERROR_SYNTAX,
4948 "Invalid bytes in DOCTYPE not faulted");
4949 }
4950 END_TEST
4951
START_TEST(test_bad_doctype_utf8)4952 START_TEST(test_bad_doctype_utf8) {
4953 const char *text = "<!DOCTYPE \xDB\x25"
4954 "doc><doc/>"; // [1101 1011] [<0>010 0101]
4955 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4956 "Invalid UTF-8 in DOCTYPE not faulted");
4957 }
4958 END_TEST
4959
START_TEST(test_bad_doctype_utf16)4960 START_TEST(test_bad_doctype_utf16) {
4961 const char text[] =
4962 /* <!DOCTYPE doc [ \x06f2 ]><doc/>
4963 *
4964 * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
4965 * (name character) but not a valid letter (name start character)
4966 */
4967 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
4968 "\x06\xf2"
4969 "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
4970
4971 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4972 != XML_STATUS_ERROR)
4973 fail("Invalid bytes in DOCTYPE not faulted");
4974 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4975 xml_failure(g_parser);
4976 }
4977 END_TEST
4978
START_TEST(test_bad_doctype_plus)4979 START_TEST(test_bad_doctype_plus) {
4980 const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
4981 "<1+>&foo;</1+>";
4982
4983 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4984 "'+' in document name not faulted");
4985 }
4986 END_TEST
4987
START_TEST(test_bad_doctype_star)4988 START_TEST(test_bad_doctype_star) {
4989 const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
4990 "<1*>&foo;</1*>";
4991
4992 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4993 "'*' in document name not faulted");
4994 }
4995 END_TEST
4996
START_TEST(test_bad_doctype_query)4997 START_TEST(test_bad_doctype_query) {
4998 const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
4999 "<1?>&foo;</1?>";
5000
5001 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5002 "'?' in document name not faulted");
5003 }
5004 END_TEST
5005
START_TEST(test_unknown_encoding_bad_ignore)5006 START_TEST(test_unknown_encoding_bad_ignore) {
5007 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
5008 "<!DOCTYPE doc SYSTEM 'foo'>"
5009 "<doc><e>&entity;</e></doc>";
5010 ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
5011 "Invalid character not faulted", XCS("prefix-conv"),
5012 XML_ERROR_INVALID_TOKEN};
5013
5014 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5015 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5016 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
5017 XML_SetUserData(g_parser, &fault);
5018 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5019 "Bad IGNORE section with unknown encoding not failed");
5020 }
5021 END_TEST
5022
START_TEST(test_entity_in_utf16_be_attr)5023 START_TEST(test_entity_in_utf16_be_attr) {
5024 const char text[] =
5025 /* <e a='ä ä'></e> */
5026 "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
5027 "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
5028 #ifdef XML_UNICODE
5029 const XML_Char *expected = XCS("\x00e4 \x00e4");
5030 #else
5031 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5032 #endif
5033 CharData storage;
5034
5035 CharData_Init(&storage);
5036 XML_SetUserData(g_parser, &storage);
5037 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5038 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5039 == XML_STATUS_ERROR)
5040 xml_failure(g_parser);
5041 CharData_CheckXMLChars(&storage, expected);
5042 }
5043 END_TEST
5044
START_TEST(test_entity_in_utf16_le_attr)5045 START_TEST(test_entity_in_utf16_le_attr) {
5046 const char text[] =
5047 /* <e a='ä ä'></e> */
5048 "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
5049 "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
5050 #ifdef XML_UNICODE
5051 const XML_Char *expected = XCS("\x00e4 \x00e4");
5052 #else
5053 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5054 #endif
5055 CharData storage;
5056
5057 CharData_Init(&storage);
5058 XML_SetUserData(g_parser, &storage);
5059 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5060 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5061 == XML_STATUS_ERROR)
5062 xml_failure(g_parser);
5063 CharData_CheckXMLChars(&storage, expected);
5064 }
5065 END_TEST
5066
START_TEST(test_entity_public_utf16_be)5067 START_TEST(test_entity_public_utf16_be) {
5068 const char text[] =
5069 /* <!DOCTYPE d [ */
5070 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5071 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5072 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5073 "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5074 /* %e; */
5075 "\0%\0e\0;\0\n"
5076 /* ]> */
5077 "\0]\0>\0\n"
5078 /* <d>&j;</d> */
5079 "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5080 ExtTest2 test_data
5081 = {/* <!ENTITY j 'baz'> */
5082 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5083 const XML_Char *expected = XCS("baz");
5084 CharData storage;
5085
5086 CharData_Init(&storage);
5087 test_data.storage = &storage;
5088 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5089 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5090 XML_SetUserData(g_parser, &test_data);
5091 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5092 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5093 == XML_STATUS_ERROR)
5094 xml_failure(g_parser);
5095 CharData_CheckXMLChars(&storage, expected);
5096 }
5097 END_TEST
5098
START_TEST(test_entity_public_utf16_le)5099 START_TEST(test_entity_public_utf16_le) {
5100 const char text[] =
5101 /* <!DOCTYPE d [ */
5102 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5103 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5104 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5105 "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5106 /* %e; */
5107 "%\0e\0;\0\n\0"
5108 /* ]> */
5109 "]\0>\0\n\0"
5110 /* <d>&j;</d> */
5111 "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5112 ExtTest2 test_data
5113 = {/* <!ENTITY j 'baz'> */
5114 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5115 const XML_Char *expected = XCS("baz");
5116 CharData storage;
5117
5118 CharData_Init(&storage);
5119 test_data.storage = &storage;
5120 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5121 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5122 XML_SetUserData(g_parser, &test_data);
5123 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5124 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5125 == XML_STATUS_ERROR)
5126 xml_failure(g_parser);
5127 CharData_CheckXMLChars(&storage, expected);
5128 }
5129 END_TEST
5130
5131 /* Test that a doctype with neither an internal nor external subset is
5132 * faulted
5133 */
START_TEST(test_short_doctype)5134 START_TEST(test_short_doctype) {
5135 const char *text = "<!DOCTYPE doc></doc>";
5136 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5137 "DOCTYPE without subset not rejected");
5138 }
5139 END_TEST
5140
START_TEST(test_short_doctype_2)5141 START_TEST(test_short_doctype_2) {
5142 const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5143 expect_failure(text, XML_ERROR_SYNTAX,
5144 "DOCTYPE without Public ID not rejected");
5145 }
5146 END_TEST
5147
START_TEST(test_short_doctype_3)5148 START_TEST(test_short_doctype_3) {
5149 const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5150 expect_failure(text, XML_ERROR_SYNTAX,
5151 "DOCTYPE without System ID not rejected");
5152 }
5153 END_TEST
5154
START_TEST(test_long_doctype)5155 START_TEST(test_long_doctype) {
5156 const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5157 expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5158 }
5159 END_TEST
5160
START_TEST(test_bad_entity)5161 START_TEST(test_bad_entity) {
5162 const char *text = "<!DOCTYPE doc [\n"
5163 " <!ENTITY foo PUBLIC>\n"
5164 "]>\n"
5165 "<doc/>";
5166 expect_failure(text, XML_ERROR_SYNTAX,
5167 "ENTITY without Public ID is not rejected");
5168 }
5169 END_TEST
5170
5171 /* Test unquoted value is faulted */
START_TEST(test_bad_entity_2)5172 START_TEST(test_bad_entity_2) {
5173 const char *text = "<!DOCTYPE doc [\n"
5174 " <!ENTITY % foo bar>\n"
5175 "]>\n"
5176 "<doc/>";
5177 expect_failure(text, XML_ERROR_SYNTAX,
5178 "ENTITY without Public ID is not rejected");
5179 }
5180 END_TEST
5181
START_TEST(test_bad_entity_3)5182 START_TEST(test_bad_entity_3) {
5183 const char *text = "<!DOCTYPE doc [\n"
5184 " <!ENTITY % foo PUBLIC>\n"
5185 "]>\n"
5186 "<doc/>";
5187 expect_failure(text, XML_ERROR_SYNTAX,
5188 "Parameter ENTITY without Public ID is not rejected");
5189 }
5190 END_TEST
5191
START_TEST(test_bad_entity_4)5192 START_TEST(test_bad_entity_4) {
5193 const char *text = "<!DOCTYPE doc [\n"
5194 " <!ENTITY % foo SYSTEM>\n"
5195 "]>\n"
5196 "<doc/>";
5197 expect_failure(text, XML_ERROR_SYNTAX,
5198 "Parameter ENTITY without Public ID is not rejected");
5199 }
5200 END_TEST
5201
START_TEST(test_bad_notation)5202 START_TEST(test_bad_notation) {
5203 const char *text = "<!DOCTYPE doc [\n"
5204 " <!NOTATION n SYSTEM>\n"
5205 "]>\n"
5206 "<doc/>";
5207 expect_failure(text, XML_ERROR_SYNTAX,
5208 "Notation without System ID is not rejected");
5209 }
5210 END_TEST
5211
5212 /* Test for issue #11, wrongly suppressed default handler */
START_TEST(test_default_doctype_handler)5213 START_TEST(test_default_doctype_handler) {
5214 const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5215 " <!ENTITY foo 'bar'>\n"
5216 "]>\n"
5217 "<doc>&foo;</doc>";
5218 DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5219 {XCS("'test.dtd'"), 10, XML_FALSE},
5220 {NULL, 0, XML_FALSE}};
5221 int i;
5222
5223 XML_SetUserData(g_parser, &test_data);
5224 XML_SetDefaultHandler(g_parser, checking_default_handler);
5225 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5226 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5227 == XML_STATUS_ERROR)
5228 xml_failure(g_parser);
5229 for (i = 0; test_data[i].expected != NULL; i++)
5230 if (! test_data[i].seen)
5231 fail("Default handler not run for public !DOCTYPE");
5232 }
5233 END_TEST
5234
START_TEST(test_empty_element_abort)5235 START_TEST(test_empty_element_abort) {
5236 const char *text = "<abort/>";
5237
5238 XML_SetStartElementHandler(g_parser, start_element_suspender);
5239 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5240 != XML_STATUS_ERROR)
5241 fail("Expected to error on abort");
5242 }
5243 END_TEST
5244
5245 /* Regression test for GH issue #612: unfinished m_declAttributeType
5246 * allocation in ->m_tempPool can corrupt following allocation.
5247 */
START_TEST(test_pool_integrity_with_unfinished_attr)5248 START_TEST(test_pool_integrity_with_unfinished_attr) {
5249 const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5250 "<!DOCTYPE foo [\n"
5251 "<!ELEMENT foo ANY>\n"
5252 "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5253 "%entp;\n"
5254 "]>\n"
5255 "<a></a>\n";
5256 const XML_Char *expected = XCS("COMMENT");
5257 CharData storage;
5258
5259 CharData_Init(&storage);
5260 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5261 XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5262 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5263 XML_SetCommentHandler(g_parser, accumulate_comment);
5264 XML_SetUserData(g_parser, &storage);
5265 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5266 == XML_STATUS_ERROR)
5267 xml_failure(g_parser);
5268 CharData_CheckXMLChars(&storage, expected);
5269 }
5270 END_TEST
5271
START_TEST(test_nested_entity_suspend)5272 START_TEST(test_nested_entity_suspend) {
5273 const char *const text = "<!DOCTYPE a [\n"
5274 " <!ENTITY e1 '<!--e1-->'>\n"
5275 " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5276 " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5277 "]>\n"
5278 "<a><!--start-->&e3;<!--end--></a>";
5279 const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5280 XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5281 CharData storage;
5282 CharData_Init(&storage);
5283 XML_Parser parser = XML_ParserCreate(NULL);
5284 ParserPlusStorage parserPlusStorage = {parser, &storage};
5285
5286 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5287 XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5288 XML_SetUserData(parser, &parserPlusStorage);
5289
5290 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5291 while (status == XML_STATUS_SUSPENDED) {
5292 status = XML_ResumeParser(parser);
5293 }
5294 if (status != XML_STATUS_OK)
5295 xml_failure(parser);
5296
5297 CharData_CheckXMLChars(&storage, expected);
5298 XML_ParserFree(parser);
5299 }
5300 END_TEST
5301
5302 /* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_scale_linearly)5303 START_TEST(test_big_tokens_scale_linearly) {
5304 const struct {
5305 const char *pre;
5306 const char *post;
5307 } text[] = {
5308 {"<a>", "</a>"}, // assumed good, used as baseline
5309 {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5310 {"<c attr='", "'></c>"}, // big attribute, used to be O(N²)
5311 {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²)
5312 {"<e><", "/></e>"}, // big elem name, used to be O(N²)
5313 };
5314 const int num_cases = sizeof(text) / sizeof(text[0]);
5315 char aaaaaa[4096];
5316 const int fillsize = (int)sizeof(aaaaaa);
5317 const int fillcount = 100;
5318 const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5319 const unsigned max_factor = 4;
5320 const unsigned max_scanned = max_factor * approx_bytes;
5321
5322 memset(aaaaaa, 'a', fillsize);
5323
5324 if (! g_reparseDeferralEnabledDefault) {
5325 return; // heuristic is disabled; we would get O(n^2) and fail.
5326 }
5327
5328 for (int i = 0; i < num_cases; ++i) {
5329 XML_Parser parser = XML_ParserCreate(NULL);
5330 assert_true(parser != NULL);
5331 enum XML_Status status;
5332 set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
5333
5334 // parse the start text
5335 g_bytesScanned = 0;
5336 status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5337 (int)strlen(text[i].pre), XML_FALSE);
5338 if (status != XML_STATUS_OK) {
5339 xml_failure(parser);
5340 }
5341
5342 // parse lots of 'a', failing the test early if it takes too long
5343 unsigned past_max_count = 0;
5344 for (int f = 0; f < fillcount; ++f) {
5345 status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5346 if (status != XML_STATUS_OK) {
5347 xml_failure(parser);
5348 }
5349 if (g_bytesScanned > max_scanned) {
5350 // We're not done, and have already passed the limit -- the test will
5351 // definitely fail. This block allows us to save time by failing early.
5352 const unsigned pushed
5353 = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
5354 fprintf(
5355 stderr,
5356 "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5357 f + 1, fillcount, pushed, g_bytesScanned,
5358 g_bytesScanned / (double)pushed, max_scanned, max_factor);
5359 past_max_count++;
5360 // We are failing, but allow a few log prints first. If we don't reach
5361 // a count of five, the test will fail after the loop instead.
5362 assert_true(past_max_count < 5);
5363 }
5364 }
5365
5366 // parse the end text
5367 status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5368 (int)strlen(text[i].post), XML_TRUE);
5369 if (status != XML_STATUS_OK) {
5370 xml_failure(parser);
5371 }
5372
5373 assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
5374 if (g_bytesScanned > max_scanned) {
5375 fprintf(
5376 stderr,
5377 "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5378 g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
5379 max_factor);
5380 fail("scanned too many bytes");
5381 }
5382
5383 XML_ParserFree(parser);
5384 }
5385 }
5386 END_TEST
5387
START_TEST(test_set_reparse_deferral)5388 START_TEST(test_set_reparse_deferral) {
5389 const char *const pre = "<d>";
5390 const char *const start = "<x attr='";
5391 const char *const end = "'></x>";
5392 char eeeeee[100];
5393 const int fillsize = (int)sizeof(eeeeee);
5394 memset(eeeeee, 'e', fillsize);
5395
5396 for (int enabled = 0; enabled <= 1; enabled += 1) {
5397 set_subtest("deferral=%d", enabled);
5398
5399 XML_Parser parser = XML_ParserCreate(NULL);
5400 assert_true(parser != NULL);
5401 assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5402 // pre-grow the buffer to avoid reparsing due to almost-fullness
5403 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5404
5405 CharData storage;
5406 CharData_Init(&storage);
5407 XML_SetUserData(parser, &storage);
5408 XML_SetStartElementHandler(parser, start_element_event_handler);
5409
5410 enum XML_Status status;
5411 // parse the start text
5412 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5413 if (status != XML_STATUS_OK) {
5414 xml_failure(parser);
5415 }
5416 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5417
5418 // ..and the start of the token
5419 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5420 if (status != XML_STATUS_OK) {
5421 xml_failure(parser);
5422 }
5423 CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5424
5425 // try to parse lots of 'e', but the token isn't finished
5426 for (int c = 0; c < 100; ++c) {
5427 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5428 if (status != XML_STATUS_OK) {
5429 xml_failure(parser);
5430 }
5431 }
5432 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5433
5434 // end the <x> token.
5435 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5436 if (status != XML_STATUS_OK) {
5437 xml_failure(parser);
5438 }
5439
5440 if (enabled) {
5441 // In general, we may need to push more data to trigger a reparse attempt,
5442 // but in this test, the data is constructed to always require it.
5443 CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5444 // 2x the token length should suffice; the +1 covers the start and end.
5445 for (int c = 0; c < 101; ++c) {
5446 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5447 if (status != XML_STATUS_OK) {
5448 xml_failure(parser);
5449 }
5450 }
5451 }
5452 CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5453
5454 XML_ParserFree(parser);
5455 }
5456 }
5457 END_TEST
5458
5459 struct element_decl_data {
5460 XML_Parser parser;
5461 int count;
5462 };
5463
5464 static void
element_decl_counter(void * userData,const XML_Char * name,XML_Content * model)5465 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5466 UNUSED_P(name);
5467 struct element_decl_data *testdata = (struct element_decl_data *)userData;
5468 testdata->count += 1;
5469 XML_FreeContentModel(testdata->parser, model);
5470 }
5471
5472 static int
external_inherited_parser(XML_Parser p,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)5473 external_inherited_parser(XML_Parser p, const XML_Char *context,
5474 const XML_Char *base, const XML_Char *systemId,
5475 const XML_Char *publicId) {
5476 UNUSED_P(base);
5477 UNUSED_P(systemId);
5478 UNUSED_P(publicId);
5479 const char *const pre = "<!ELEMENT document ANY>\n";
5480 const char *const start = "<!ELEMENT ";
5481 const char *const end = " ANY>\n";
5482 const char *const post = "<!ELEMENT xyz ANY>\n";
5483 const int enabled = *(int *)XML_GetUserData(p);
5484 char eeeeee[100];
5485 char spaces[100];
5486 const int fillsize = (int)sizeof(eeeeee);
5487 assert_true(fillsize == (int)sizeof(spaces));
5488 memset(eeeeee, 'e', fillsize);
5489 memset(spaces, ' ', fillsize);
5490
5491 XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5492 assert_true(parser != NULL);
5493 // pre-grow the buffer to avoid reparsing due to almost-fullness
5494 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5495
5496 struct element_decl_data testdata;
5497 testdata.parser = parser;
5498 testdata.count = 0;
5499 XML_SetUserData(parser, &testdata);
5500 XML_SetElementDeclHandler(parser, element_decl_counter);
5501
5502 enum XML_Status status;
5503 // parse the initial text
5504 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5505 if (status != XML_STATUS_OK) {
5506 xml_failure(parser);
5507 }
5508 assert_true(testdata.count == 1); // first element should be done
5509
5510 // ..and the start of the big token
5511 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5512 if (status != XML_STATUS_OK) {
5513 xml_failure(parser);
5514 }
5515 assert_true(testdata.count == 1); // still just the first one
5516
5517 // try to parse lots of 'e', but the token isn't finished
5518 for (int c = 0; c < 100; ++c) {
5519 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5520 if (status != XML_STATUS_OK) {
5521 xml_failure(parser);
5522 }
5523 }
5524 assert_true(testdata.count == 1); // *still* just the first one
5525
5526 // end the big token.
5527 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5528 if (status != XML_STATUS_OK) {
5529 xml_failure(parser);
5530 }
5531
5532 if (enabled) {
5533 // In general, we may need to push more data to trigger a reparse attempt,
5534 // but in this test, the data is constructed to always require it.
5535 assert_true(testdata.count == 1); // or the test is incorrect
5536 // 2x the token length should suffice; the +1 covers the start and end.
5537 for (int c = 0; c < 101; ++c) {
5538 status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5539 if (status != XML_STATUS_OK) {
5540 xml_failure(parser);
5541 }
5542 }
5543 }
5544 assert_true(testdata.count == 2); // the big token should be done
5545
5546 // parse the final text
5547 status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5548 if (status != XML_STATUS_OK) {
5549 xml_failure(parser);
5550 }
5551 assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5552
5553 XML_ParserFree(parser);
5554 return XML_STATUS_OK;
5555 }
5556
START_TEST(test_reparse_deferral_is_inherited)5557 START_TEST(test_reparse_deferral_is_inherited) {
5558 const char *const text
5559 = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5560 for (int enabled = 0; enabled <= 1; ++enabled) {
5561 set_subtest("deferral=%d", enabled);
5562
5563 XML_Parser parser = XML_ParserCreate(NULL);
5564 assert_true(parser != NULL);
5565 XML_SetUserData(parser, (void *)&enabled);
5566 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5567 // this handler creates a sub-parser and checks that its deferral behavior
5568 // is what we expected, based on the value of `enabled` (in userdata).
5569 XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5570 assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5571 if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5572 xml_failure(parser);
5573
5574 XML_ParserFree(parser);
5575 }
5576 }
5577 END_TEST
5578
START_TEST(test_set_reparse_deferral_on_null_parser)5579 START_TEST(test_set_reparse_deferral_on_null_parser) {
5580 assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5581 assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5582 assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5583 assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5584 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5585 == XML_FALSE);
5586 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5587 == XML_FALSE);
5588 }
5589 END_TEST
5590
START_TEST(test_set_reparse_deferral_on_the_fly)5591 START_TEST(test_set_reparse_deferral_on_the_fly) {
5592 const char *const pre = "<d><x attr='";
5593 const char *const end = "'></x>";
5594 char iiiiii[100];
5595 const int fillsize = (int)sizeof(iiiiii);
5596 memset(iiiiii, 'i', fillsize);
5597
5598 XML_Parser parser = XML_ParserCreate(NULL);
5599 assert_true(parser != NULL);
5600 assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5601
5602 CharData storage;
5603 CharData_Init(&storage);
5604 XML_SetUserData(parser, &storage);
5605 XML_SetStartElementHandler(parser, start_element_event_handler);
5606
5607 enum XML_Status status;
5608 // parse the start text
5609 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5610 if (status != XML_STATUS_OK) {
5611 xml_failure(parser);
5612 }
5613 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5614
5615 // try to parse some 'i', but the token isn't finished
5616 status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
5617 if (status != XML_STATUS_OK) {
5618 xml_failure(parser);
5619 }
5620 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5621
5622 // end the <x> token.
5623 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5624 if (status != XML_STATUS_OK) {
5625 xml_failure(parser);
5626 }
5627 CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
5628
5629 // now change the heuristic setting and add *no* data
5630 assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
5631 // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
5632 status = XML_Parse(parser, "", 0, XML_FALSE);
5633 if (status != XML_STATUS_OK) {
5634 xml_failure(parser);
5635 }
5636 CharData_CheckXMLChars(&storage, XCS("dx"));
5637
5638 XML_ParserFree(parser);
5639 }
5640 END_TEST
5641
START_TEST(test_set_bad_reparse_option)5642 START_TEST(test_set_bad_reparse_option) {
5643 XML_Parser parser = XML_ParserCreate(NULL);
5644 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
5645 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
5646 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
5647 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
5648 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
5649 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
5650 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
5651 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
5652 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
5653 XML_ParserFree(parser);
5654 }
5655 END_TEST
5656
5657 static size_t g_totalAlloc = 0;
5658 static size_t g_biggestAlloc = 0;
5659
5660 static void *
counting_realloc(void * ptr,size_t size)5661 counting_realloc(void *ptr, size_t size) {
5662 g_totalAlloc += size;
5663 if (size > g_biggestAlloc) {
5664 g_biggestAlloc = size;
5665 }
5666 return realloc(ptr, size);
5667 }
5668
5669 static void *
counting_malloc(size_t size)5670 counting_malloc(size_t size) {
5671 return counting_realloc(NULL, size);
5672 }
5673
START_TEST(test_bypass_heuristic_when_close_to_bufsize)5674 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
5675 if (g_chunkSize != 0) {
5676 // this test does not use SINGLE_BYTES, because it depends on very precise
5677 // buffer fills.
5678 return;
5679 }
5680 if (! g_reparseDeferralEnabledDefault) {
5681 return; // this test is irrelevant when the deferral heuristic is disabled.
5682 }
5683
5684 const int document_length = 65536;
5685 char *const document = (char *)malloc(document_length);
5686
5687 const XML_Memory_Handling_Suite memfuncs = {
5688 counting_malloc,
5689 counting_realloc,
5690 free,
5691 };
5692
5693 const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
5694 const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
5695 const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
5696
5697 for (const int *leading = leading_list; *leading >= 0; leading++) {
5698 for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
5699 for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
5700 set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
5701 *fillsize);
5702 // start by checking that the test looks reasonably valid
5703 assert_true(*leading + *bigtoken <= document_length);
5704
5705 // put 'x' everywhere; some will be overwritten by elements.
5706 memset(document, 'x', document_length);
5707 // maybe add an initial tag
5708 if (*leading) {
5709 assert_true(*leading >= 3); // or the test case is invalid
5710 memcpy(document, "<a>", 3);
5711 }
5712 // add the large token
5713 document[*leading + 0] = '<';
5714 document[*leading + 1] = 'b';
5715 memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
5716 document[*leading + *bigtoken - 1] = '>';
5717
5718 // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
5719 const int expected_elem_total = 1 + (*leading ? 1 : 0);
5720
5721 XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
5722 assert_true(parser != NULL);
5723
5724 CharData storage;
5725 CharData_Init(&storage);
5726 XML_SetUserData(parser, &storage);
5727 XML_SetStartElementHandler(parser, start_element_event_handler);
5728
5729 g_biggestAlloc = 0;
5730 g_totalAlloc = 0;
5731 int offset = 0;
5732 // fill data until the big token is covered (but not necessarily parsed)
5733 while (offset < *leading + *bigtoken) {
5734 assert_true(offset + *fillsize <= document_length);
5735 const enum XML_Status status
5736 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5737 if (status != XML_STATUS_OK) {
5738 xml_failure(parser);
5739 }
5740 offset += *fillsize;
5741 }
5742 // Now, check that we've had a buffer allocation that could fit the
5743 // context bytes and our big token. In order to detect a special case,
5744 // we need to know how many bytes of our big token were included in the
5745 // first push that contained _any_ bytes of the big token:
5746 const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
5747 if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
5748 // Special case: we aren't saving any context, and the whole big token
5749 // was covered by a single fill, so Expat may have parsed directly
5750 // from our input pointer, without allocating an internal buffer.
5751 } else if (*leading < XML_CONTEXT_BYTES) {
5752 assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
5753 } else {
5754 assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
5755 }
5756 // fill data until the big token is actually parsed
5757 while (storage.count < expected_elem_total) {
5758 const size_t alloc_before = g_totalAlloc;
5759 assert_true(offset + *fillsize <= document_length);
5760 const enum XML_Status status
5761 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5762 if (status != XML_STATUS_OK) {
5763 xml_failure(parser);
5764 }
5765 offset += *fillsize;
5766 // since all the bytes of the big token are already in the buffer,
5767 // the bufsize ceiling should make us finish its parsing without any
5768 // further buffer allocations. We assume that there will be no other
5769 // large allocations in this test.
5770 assert_true(g_totalAlloc - alloc_before < 4096);
5771 }
5772 // test-the-test: was our alloc even called?
5773 assert_true(g_totalAlloc > 0);
5774 // test-the-test: there shouldn't be any extra start elements
5775 assert_true(storage.count == expected_elem_total);
5776
5777 XML_ParserFree(parser);
5778 }
5779 }
5780 }
5781 free(document);
5782 }
5783 END_TEST
5784
START_TEST(test_varying_buffer_fills)5785 START_TEST(test_varying_buffer_fills) {
5786 const int KiB = 1024;
5787 const int MiB = 1024 * KiB;
5788 const int document_length = 16 * MiB;
5789 const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
5790
5791 if (g_chunkSize != 0) {
5792 return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
5793 }
5794
5795 char *const document = (char *)malloc(document_length);
5796 assert_true(document != NULL);
5797 memset(document, 'x', document_length);
5798 document[0] = '<';
5799 document[1] = 't';
5800 memset(&document[2], ' ', big - 2); // a very spacy token
5801 document[big - 1] = '>';
5802
5803 // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
5804 // When reparse deferral is enabled, the final (negated) value is the expected
5805 // maximum number of bytes scanned in parse attempts.
5806 const int testcases[][30] = {
5807 {8 * MiB, -8 * MiB},
5808 {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
5809 // zero-size fills shouldn't trigger the bypass
5810 {4 * MiB, 0, 4 * MiB, -12 * MiB},
5811 {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
5812 {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
5813 // try to hit the buffer ceiling only once (at the end)
5814 {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
5815 // try to hit the same buffer ceiling multiple times
5816 {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
5817
5818 // try to hit every ceiling, by always landing 1K shy of the buffer size
5819 {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
5820 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
5821
5822 // try to avoid every ceiling, by always landing 1B past the buffer size
5823 // the normal 2x heuristic threshold still forces parse attempts.
5824 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
5825 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
5826 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
5827 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
5828 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5829 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5830 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7
5831 -(10 * MiB + 682 * KiB + 7)},
5832 // try to avoid every ceiling again, except on our last fill.
5833 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
5834 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
5835 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
5836 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
5837 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5838 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5839 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
5840 -(10 * MiB + 682 * KiB + 6)},
5841
5842 // try to hit ceilings on the way multiple times
5843 {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
5844 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
5845 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer
5846 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer
5847 // we'll make a parse attempt at every parse call
5848 -(45 * MiB + 12)},
5849 };
5850 const int testcount = sizeof(testcases) / sizeof(testcases[0]);
5851 for (int test_i = 0; test_i < testcount; test_i++) {
5852 const int *fillsize = testcases[test_i];
5853 set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
5854 fillsize[2], fillsize[3]);
5855 XML_Parser parser = XML_ParserCreate(NULL);
5856 assert_true(parser != NULL);
5857
5858 CharData storage;
5859 CharData_Init(&storage);
5860 XML_SetUserData(parser, &storage);
5861 XML_SetStartElementHandler(parser, start_element_event_handler);
5862
5863 g_bytesScanned = 0;
5864 int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
5865 int offset = 0;
5866 while (*fillsize >= 0) {
5867 assert_true(offset + *fillsize <= document_length); // or test is invalid
5868 const enum XML_Status status
5869 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5870 if (status != XML_STATUS_OK) {
5871 xml_failure(parser);
5872 }
5873 offset += *fillsize;
5874 fillsize++;
5875 assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
5876 worstcase_bytes += offset; // we might've tried to parse all pending bytes
5877 }
5878 assert_true(storage.count == 1); // the big token should've been parsed
5879 assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
5880 if (g_reparseDeferralEnabledDefault) {
5881 // heuristic is enabled; some XML_Parse calls may have deferred reparsing
5882 const unsigned max_bytes_scanned = -*fillsize;
5883 if (g_bytesScanned > max_bytes_scanned) {
5884 fprintf(stderr,
5885 "bytes scanned in parse attempts: actual=%u limit=%u \n",
5886 g_bytesScanned, max_bytes_scanned);
5887 fail("too many bytes scanned in parse attempts");
5888 }
5889 }
5890 assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
5891
5892 XML_ParserFree(parser);
5893 }
5894 free(document);
5895 }
5896 END_TEST
5897
5898 void
make_basic_test_case(Suite * s)5899 make_basic_test_case(Suite *s) {
5900 TCase *tc_basic = tcase_create("basic tests");
5901
5902 suite_add_tcase(s, tc_basic);
5903 tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
5904
5905 tcase_add_test(tc_basic, test_nul_byte);
5906 tcase_add_test(tc_basic, test_u0000_char);
5907 tcase_add_test(tc_basic, test_siphash_self);
5908 tcase_add_test(tc_basic, test_siphash_spec);
5909 tcase_add_test(tc_basic, test_bom_utf8);
5910 tcase_add_test(tc_basic, test_bom_utf16_be);
5911 tcase_add_test(tc_basic, test_bom_utf16_le);
5912 tcase_add_test(tc_basic, test_nobom_utf16_le);
5913 tcase_add_test(tc_basic, test_hash_collision);
5914 tcase_add_test(tc_basic, test_illegal_utf8);
5915 tcase_add_test(tc_basic, test_utf8_auto_align);
5916 tcase_add_test(tc_basic, test_utf16);
5917 tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
5918 tcase_add_test(tc_basic, test_not_utf16);
5919 tcase_add_test(tc_basic, test_bad_encoding);
5920 tcase_add_test(tc_basic, test_latin1_umlauts);
5921 tcase_add_test(tc_basic, test_long_utf8_character);
5922 tcase_add_test(tc_basic, test_long_latin1_attribute);
5923 tcase_add_test(tc_basic, test_long_ascii_attribute);
5924 /* Regression test for SF bug #491986. */
5925 tcase_add_test(tc_basic, test_danish_latin1);
5926 /* Regression test for SF bug #514281. */
5927 tcase_add_test(tc_basic, test_french_charref_hexidecimal);
5928 tcase_add_test(tc_basic, test_french_charref_decimal);
5929 tcase_add_test(tc_basic, test_french_latin1);
5930 tcase_add_test(tc_basic, test_french_utf8);
5931 tcase_add_test(tc_basic, test_utf8_false_rejection);
5932 tcase_add_test(tc_basic, test_line_number_after_parse);
5933 tcase_add_test(tc_basic, test_column_number_after_parse);
5934 tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
5935 tcase_add_test(tc_basic, test_line_number_after_error);
5936 tcase_add_test(tc_basic, test_column_number_after_error);
5937 tcase_add_test(tc_basic, test_really_long_lines);
5938 tcase_add_test(tc_basic, test_really_long_encoded_lines);
5939 tcase_add_test(tc_basic, test_end_element_events);
5940 tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
5941 tcase_add_test(tc_basic, test_attr_whitespace_normalization);
5942 tcase_add_test(tc_basic, test_xmldecl_misplaced);
5943 tcase_add_test(tc_basic, test_xmldecl_invalid);
5944 tcase_add_test(tc_basic, test_xmldecl_missing_attr);
5945 tcase_add_test(tc_basic, test_xmldecl_missing_value);
5946 tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
5947 tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
5948 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
5949 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
5950 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
5951 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
5952 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
5953 tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
5954 tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
5955 tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
5956 tcase_add_test(tc_basic,
5957 test_wfc_undeclared_entity_with_external_subset_standalone);
5958 tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
5959 tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
5960 tcase_add_test(tc_basic, test_not_standalone_handler_reject);
5961 tcase_add_test(tc_basic, test_not_standalone_handler_accept);
5962 tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
5963 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
5964 tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
5965 tcase_add_test(tc_basic, test_dtd_attr_handling);
5966 tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
5967 tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
5968 tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
5969 tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
5970 tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
5971 tcase_add_test(tc_basic, test_good_cdata_ascii);
5972 tcase_add_test(tc_basic, test_good_cdata_utf16);
5973 tcase_add_test(tc_basic, test_good_cdata_utf16_le);
5974 tcase_add_test(tc_basic, test_long_cdata_utf16);
5975 tcase_add_test(tc_basic, test_multichar_cdata_utf16);
5976 tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
5977 tcase_add_test(tc_basic, test_bad_cdata);
5978 tcase_add_test(tc_basic, test_bad_cdata_utf16);
5979 tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
5980 tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
5981 tcase_add_test(tc_basic, test_memory_allocation);
5982 tcase_add_test__if_xml_ge(tc_basic, test_default_current);
5983 tcase_add_test(tc_basic, test_dtd_elements);
5984 tcase_add_test(tc_basic, test_dtd_elements_nesting);
5985 tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
5986 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
5987 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
5988 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
5989 tcase_add_test__ifdef_xml_dtd(tc_basic,
5990 test_foreign_dtd_without_external_subset);
5991 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
5992 tcase_add_test(tc_basic, test_set_base);
5993 tcase_add_test(tc_basic, test_attributes);
5994 tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
5995 tcase_add_test(tc_basic, test_resume_invalid_parse);
5996 tcase_add_test(tc_basic, test_resume_resuspended);
5997 tcase_add_test(tc_basic, test_cdata_default);
5998 tcase_add_test(tc_basic, test_subordinate_reset);
5999 tcase_add_test(tc_basic, test_subordinate_suspend);
6000 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
6001 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
6002 tcase_add_test__ifdef_xml_dtd(tc_basic,
6003 test_ext_entity_invalid_suspended_parse);
6004 tcase_add_test(tc_basic, test_explicit_encoding);
6005 tcase_add_test(tc_basic, test_trailing_cr);
6006 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
6007 tcase_add_test(tc_basic, test_trailing_rsqb);
6008 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
6009 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
6010 tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
6011 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
6012 tcase_add_test(tc_basic, test_empty_parse);
6013 tcase_add_test(tc_basic, test_negative_len_parse);
6014 tcase_add_test(tc_basic, test_negative_len_parse_buffer);
6015 tcase_add_test(tc_basic, test_get_buffer_1);
6016 tcase_add_test(tc_basic, test_get_buffer_2);
6017 #if XML_CONTEXT_BYTES > 0
6018 tcase_add_test(tc_basic, test_get_buffer_3_overflow);
6019 #endif
6020 tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
6021 tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
6022 tcase_add_test(tc_basic, test_byte_info_at_end);
6023 tcase_add_test(tc_basic, test_byte_info_at_error);
6024 tcase_add_test(tc_basic, test_byte_info_at_cdata);
6025 tcase_add_test(tc_basic, test_predefined_entities);
6026 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
6027 tcase_add_test(tc_basic, test_not_predefined_entities);
6028 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
6029 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
6030 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
6031 tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
6032 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
6033 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
6034 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
6035 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
6036 tcase_add_test(tc_basic, test_bad_public_doctype);
6037 tcase_add_test(tc_basic, test_attribute_enum_value);
6038 tcase_add_test(tc_basic, test_predefined_entity_redefinition);
6039 tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
6040 tcase_add_test(tc_basic, test_public_notation_no_sysid);
6041 tcase_add_test(tc_basic, test_nested_groups);
6042 tcase_add_test(tc_basic, test_group_choice);
6043 tcase_add_test(tc_basic, test_standalone_parameter_entity);
6044 tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
6045 tcase_add_test__ifdef_xml_dtd(tc_basic,
6046 test_recursive_external_parameter_entity);
6047 tcase_add_test__ifdef_xml_dtd(tc_basic,
6048 test_recursive_external_parameter_entity_2);
6049 tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
6050 tcase_add_test(tc_basic, test_suspend_xdecl);
6051 tcase_add_test(tc_basic, test_abort_epilog);
6052 tcase_add_test(tc_basic, test_abort_epilog_2);
6053 tcase_add_test(tc_basic, test_suspend_epilog);
6054 tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
6055 tcase_add_test(tc_basic, test_unfinished_epilog);
6056 tcase_add_test(tc_basic, test_partial_char_in_epilog);
6057 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6058 tcase_add_test__ifdef_xml_dtd(tc_basic,
6059 test_suspend_resume_internal_entity_issue_629);
6060 tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6061 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6062 tcase_add_test(tc_basic, test_restart_on_error);
6063 tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6064 tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6065 tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6066 tcase_add_test(tc_basic, test_standalone_internal_entity);
6067 tcase_add_test(tc_basic, test_skipped_external_entity);
6068 tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6069 tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6070 tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6071 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6072 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6073 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6074 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6075 tcase_add_test(tc_basic, test_pi_handled_in_default);
6076 tcase_add_test(tc_basic, test_comment_handled_in_default);
6077 tcase_add_test(tc_basic, test_pi_yml);
6078 tcase_add_test(tc_basic, test_pi_xnl);
6079 tcase_add_test(tc_basic, test_pi_xmm);
6080 tcase_add_test(tc_basic, test_utf16_pi);
6081 tcase_add_test(tc_basic, test_utf16_be_pi);
6082 tcase_add_test(tc_basic, test_utf16_be_comment);
6083 tcase_add_test(tc_basic, test_utf16_le_comment);
6084 tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6085 tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6086 tcase_add_test(tc_basic, test_unknown_encoding_success);
6087 tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6088 tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6089 tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6090 tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6091 tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6092 tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6093 tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6094 tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6095 tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6096 tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6097 tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6098 tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6099 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6100 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6101 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6102 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6103 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6104 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6105 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6106 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6107 tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6108 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6109 tcase_add_test(tc_basic, test_utf8_in_start_tags);
6110 tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6111 tcase_add_test(tc_basic, test_utf16_attribute);
6112 tcase_add_test(tc_basic, test_utf16_second_attr);
6113 tcase_add_test(tc_basic, test_attr_after_solidus);
6114 tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6115 tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6116 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6117 tcase_add_test(tc_basic, test_bad_doctype);
6118 tcase_add_test(tc_basic, test_bad_doctype_utf8);
6119 tcase_add_test(tc_basic, test_bad_doctype_utf16);
6120 tcase_add_test(tc_basic, test_bad_doctype_plus);
6121 tcase_add_test(tc_basic, test_bad_doctype_star);
6122 tcase_add_test(tc_basic, test_bad_doctype_query);
6123 tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6124 tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6125 tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6126 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6127 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6128 tcase_add_test(tc_basic, test_short_doctype);
6129 tcase_add_test(tc_basic, test_short_doctype_2);
6130 tcase_add_test(tc_basic, test_short_doctype_3);
6131 tcase_add_test(tc_basic, test_long_doctype);
6132 tcase_add_test(tc_basic, test_bad_entity);
6133 tcase_add_test(tc_basic, test_bad_entity_2);
6134 tcase_add_test(tc_basic, test_bad_entity_3);
6135 tcase_add_test(tc_basic, test_bad_entity_4);
6136 tcase_add_test(tc_basic, test_bad_notation);
6137 tcase_add_test(tc_basic, test_default_doctype_handler);
6138 tcase_add_test(tc_basic, test_empty_element_abort);
6139 tcase_add_test__ifdef_xml_dtd(tc_basic,
6140 test_pool_integrity_with_unfinished_attr);
6141 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6142 tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6143 tcase_add_test(tc_basic, test_set_reparse_deferral);
6144 tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6145 tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6146 tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6147 tcase_add_test(tc_basic, test_set_bad_reparse_option);
6148 tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6149 tcase_add_test(tc_basic, test_varying_buffer_fills);
6150 }
6151