1 /* Tests in the "basic" test case for the Expat test suite
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net>
11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org>
14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15 Copyright (c) 2017 Joe Orton <jorton@redhat.com>
16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com>
17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com>
20 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
21 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22 Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com>
23 Copyright (c) 2026 Francesco Bertolaccini
24 Licensed under the MIT license:
25
26 Permission is hereby granted, free of charge, to any person obtaining
27 a copy of this software and associated documentation files (the
28 "Software"), to deal in the Software without restriction, including
29 without limitation the rights to use, copy, modify, merge, publish,
30 distribute, sublicense, and/or sell copies of the Software, and to permit
31 persons to whom the Software is furnished to do so, subject to the
32 following conditions:
33
34 The above copyright notice and this permission notice shall be included
35 in all copies or substantial portions of the Software.
36
37 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
38 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
39 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
40 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
41 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
42 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
43 USE OR OTHER DEALINGS IN THE SOFTWARE.
44 */
45
46 #if defined(NDEBUG)
47 # undef NDEBUG /* because test suite relies on assert(...) at the moment */
48 #endif
49
50 #include <assert.h>
51
52 #include <stdio.h>
53 #include <string.h>
54 #include <time.h>
55
56 #if ! defined(__cplusplus)
57 # include <stdbool.h>
58 #endif
59
60 #include "expat_config.h"
61
62 #include "expat.h"
63 #include "internal.h"
64 #include "minicheck.h"
65 #include "structdata.h"
66 #include "common.h"
67 #include "dummy.h"
68 #include "handlers.h"
69 #include "siphash.h"
70 #include "basic_tests.h"
71
72 static void
basic_setup(void)73 basic_setup(void) {
74 g_parser = XML_ParserCreate(NULL);
75 if (g_parser == NULL)
76 fail("Parser not created.");
77 }
78
79 /*
80 * Character & encoding tests.
81 */
82
START_TEST(test_nul_byte)83 START_TEST(test_nul_byte) {
84 char text[] = "<doc>\0</doc>";
85
86 /* test that a NUL byte (in US-ASCII data) is an error */
87 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
88 == XML_STATUS_OK)
89 fail("Parser did not report error on NUL-byte.");
90 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
91 xml_failure(g_parser);
92 }
93 END_TEST
94
START_TEST(test_u0000_char)95 START_TEST(test_u0000_char) {
96 /* test that a NUL byte (in US-ASCII data) is an error */
97 expect_failure("<doc>�</doc>", XML_ERROR_BAD_CHAR_REF,
98 "Parser did not report error on NUL-byte.");
99 }
100 END_TEST
101
START_TEST(test_siphash_self)102 START_TEST(test_siphash_self) {
103 if (! sip24_valid())
104 fail("SipHash self-test failed");
105 }
106 END_TEST
107
START_TEST(test_siphash_spec)108 START_TEST(test_siphash_spec) {
109 /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
110 const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
111 "\x0a\x0b\x0c\x0d\x0e";
112 const size_t len = sizeof(message) - 1;
113 const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
114 struct siphash state;
115 struct sipkey key;
116
117 sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
118 "\x0a\x0b\x0c\x0d\x0e\x0f");
119 sip24_init(&state, &key);
120
121 /* Cover spread across calls */
122 sip24_update(&state, message, 4);
123 sip24_update(&state, message + 4, len - 4);
124
125 /* Cover null length */
126 sip24_update(&state, message, 0);
127
128 if (sip24_final(&state) != expected)
129 fail("sip24_final failed spec test\n");
130
131 /* Cover wrapper */
132 if (siphash24(message, len, &key) != expected)
133 fail("siphash24 failed spec test\n");
134 }
135 END_TEST
136
START_TEST(test_bom_utf8)137 START_TEST(test_bom_utf8) {
138 /* This test is really just making sure we don't core on a UTF-8 BOM. */
139 const char *text = "\357\273\277<e/>";
140
141 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
142 == XML_STATUS_ERROR)
143 xml_failure(g_parser);
144 }
145 END_TEST
146
START_TEST(test_bom_utf16_be)147 START_TEST(test_bom_utf16_be) {
148 char text[] = "\376\377\0<\0e\0/\0>";
149
150 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
151 == XML_STATUS_ERROR)
152 xml_failure(g_parser);
153 }
154 END_TEST
155
START_TEST(test_bom_utf16_le)156 START_TEST(test_bom_utf16_le) {
157 char text[] = "\377\376<\0e\0/\0>\0";
158
159 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
160 == XML_STATUS_ERROR)
161 xml_failure(g_parser);
162 }
163 END_TEST
164
START_TEST(test_nobom_utf16_le)165 START_TEST(test_nobom_utf16_le) {
166 char text[] = " \0<\0e\0/\0>\0";
167
168 if (g_chunkSize == 1) {
169 // TODO: with just the first byte, we can't tell the difference between
170 // UTF-16-LE and UTF-8. Avoid the failure for now.
171 return;
172 }
173
174 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
175 == XML_STATUS_ERROR)
176 xml_failure(g_parser);
177 }
178 END_TEST
179
START_TEST(test_hash_collision)180 START_TEST(test_hash_collision) {
181 /* For full coverage of the lookup routine, we need to ensure a
182 * hash collision even though we can only tell that we have one
183 * through breakpoint debugging or coverage statistics. The
184 * following will cause a hash collision on machines with a 64-bit
185 * long type; others will have to experiment. The full coverage
186 * tests invoked from qa.sh usually provide a hash collision, but
187 * not always. This is an attempt to provide insurance.
188 */
189 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
190 const char *text
191 = "<doc>\n"
192 "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
193 "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
194 "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
195 "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
196 "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
197 "<d8>This triggers the table growth and collides with b2</d8>\n"
198 "</doc>\n";
199
200 XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
201 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
202 == XML_STATUS_ERROR)
203 xml_failure(g_parser);
204 }
205 END_TEST
206 #undef COLLIDING_HASH_SALT
207
START_TEST(test_hash_salt_setter)208 START_TEST(test_hash_salt_setter) {
209 const uint8_t entropy[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
210 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
211 XML_Parser parser = XML_ParserCreate(NULL);
212
213 // NULL parser should be rejected
214 assert_true(XML_SetHashSalt16Bytes(NULL, entropy) == XML_FALSE);
215
216 // NULL entropy should be rejected
217 assert_true(XML_SetHashSalt16Bytes(parser, NULL) == XML_FALSE);
218
219 // Setting should be allowed more than once
220 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE);
221 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE);
222
223 // But not after parsing has started
224 assert_true(XML_Parse(parser, "", 0, XML_FALSE /* isFinal */)
225 == XML_STATUS_OK);
226 assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_FALSE);
227
228 XML_ParserFree(parser);
229 }
230 END_TEST
231
232 /* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)233 START_TEST(test_danish_latin1) {
234 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
235 "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
236 #ifdef XML_UNICODE
237 const XML_Char *expected
238 = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
239 #else
240 const XML_Char *expected
241 = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
242 #endif
243 run_character_check(text, expected);
244 }
245 END_TEST
246
247 /* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)248 START_TEST(test_french_charref_hexidecimal) {
249 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
250 "<doc>éèàçêÈ</doc>";
251 #ifdef XML_UNICODE
252 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
253 #else
254 const XML_Char *expected
255 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
256 #endif
257 run_character_check(text, expected);
258 }
259 END_TEST
260
START_TEST(test_french_charref_decimal)261 START_TEST(test_french_charref_decimal) {
262 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
263 "<doc>éèàçêÈ</doc>";
264 #ifdef XML_UNICODE
265 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
266 #else
267 const XML_Char *expected
268 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
269 #endif
270 run_character_check(text, expected);
271 }
272 END_TEST
273
START_TEST(test_french_latin1)274 START_TEST(test_french_latin1) {
275 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
276 "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
277 #ifdef XML_UNICODE
278 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
279 #else
280 const XML_Char *expected
281 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
282 #endif
283 run_character_check(text, expected);
284 }
285 END_TEST
286
START_TEST(test_french_utf8)287 START_TEST(test_french_utf8) {
288 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
289 "<doc>\xC3\xA9</doc>";
290 #ifdef XML_UNICODE
291 const XML_Char *expected = XCS("\x00e9");
292 #else
293 const XML_Char *expected = XCS("\xC3\xA9");
294 #endif
295 run_character_check(text, expected);
296 }
297 END_TEST
298
299 /* Regression test for SF bug #600479.
300 XXX There should be a test that exercises all legal XML Unicode
301 characters as PCDATA and attribute value content, and XML Name
302 characters as part of element and attribute names.
303 */
START_TEST(test_utf8_false_rejection)304 START_TEST(test_utf8_false_rejection) {
305 const char *text = "<doc>\xEF\xBA\xBF</doc>";
306 #ifdef XML_UNICODE
307 const XML_Char *expected = XCS("\xfebf");
308 #else
309 const XML_Char *expected = XCS("\xEF\xBA\xBF");
310 #endif
311 run_character_check(text, expected);
312 }
313 END_TEST
314
315 /* Regression test for SF bug #477667.
316 This test assures that any 8-bit character followed by a 7-bit
317 character will not be mistakenly interpreted as a valid UTF-8
318 sequence.
319 */
START_TEST(test_illegal_utf8)320 START_TEST(test_illegal_utf8) {
321 char text[100];
322 int i;
323
324 for (i = 128; i <= 255; ++i) {
325 snprintf(text, sizeof(text), "<e>%ccd</e>", i);
326 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
327 == XML_STATUS_OK) {
328 snprintf(text, sizeof(text),
329 "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
330 i);
331 fail(text);
332 } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
333 xml_failure(g_parser);
334 /* Reset the parser since we use the same parser repeatedly. */
335 XML_ParserReset(g_parser, NULL);
336 }
337 }
338 END_TEST
339
340 /* Examples, not masks: */
341 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
342 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
343 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
344 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
345 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
346
START_TEST(test_utf8_auto_align)347 START_TEST(test_utf8_auto_align) {
348 struct TestCase {
349 ptrdiff_t expectedMovementInChars;
350 const char *input;
351 };
352
353 struct TestCase cases[] = {
354 {00, ""},
355
356 {00, UTF8_LEAD_1},
357
358 {-1, UTF8_LEAD_2},
359 {00, UTF8_LEAD_2 UTF8_FOLLOW},
360
361 {-1, UTF8_LEAD_3},
362 {-2, UTF8_LEAD_3 UTF8_FOLLOW},
363 {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
364
365 {-1, UTF8_LEAD_4},
366 {-2, UTF8_LEAD_4 UTF8_FOLLOW},
367 {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
368 {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
369 };
370
371 size_t i = 0;
372 bool success = true;
373 for (; i < sizeof(cases) / sizeof(*cases); i++) {
374 const char *fromLim = cases[i].input + strlen(cases[i].input);
375 const char *const fromLimInitially = fromLim;
376 ptrdiff_t actualMovementInChars;
377
378 _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
379
380 actualMovementInChars = (fromLim - fromLimInitially);
381 if (actualMovementInChars != cases[i].expectedMovementInChars) {
382 size_t j = 0;
383 success = false;
384 printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
385 ", actually moved by %2d chars: \"",
386 (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
387 (int)actualMovementInChars);
388 for (; j < strlen(cases[i].input); j++) {
389 printf("\\x%02x", (unsigned char)cases[i].input[j]);
390 }
391 printf("\"\n");
392 }
393 }
394
395 if (! success) {
396 fail("UTF-8 auto-alignment is not bullet-proof\n");
397 }
398 }
399 END_TEST
400
START_TEST(test_utf16)401 START_TEST(test_utf16) {
402 /* <?xml version="1.0" encoding="UTF-16"?>
403 * <doc a='123'>some {A} text</doc>
404 *
405 * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
406 */
407 char text[]
408 = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
409 "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
410 "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
411 "\000'\000?\000>\000\n"
412 "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
413 "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
414 "<\000/\000d\000o\000c\000>";
415 #ifdef XML_UNICODE
416 const XML_Char *expected = XCS("some \xff21 text");
417 #else
418 const XML_Char *expected = XCS("some \357\274\241 text");
419 #endif
420 CharData storage;
421
422 CharData_Init(&storage);
423 XML_SetUserData(g_parser, &storage);
424 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
425 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
426 == XML_STATUS_ERROR)
427 xml_failure(g_parser);
428 CharData_CheckXMLChars(&storage, expected);
429 }
430 END_TEST
431
START_TEST(test_utf16_le_epilog_newline)432 START_TEST(test_utf16_le_epilog_newline) {
433 unsigned int first_chunk_bytes = 17;
434 char text[] = "\xFF\xFE" /* BOM */
435 "<\000e\000/\000>\000" /* document element */
436 "\r\000\n\000\r\000\n\000"; /* epilog */
437
438 if (first_chunk_bytes >= sizeof(text) - 1)
439 fail("bad value of first_chunk_bytes");
440 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)first_chunk_bytes, XML_FALSE)
441 == XML_STATUS_ERROR)
442 xml_failure(g_parser);
443 else {
444 enum XML_Status rc;
445 rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
446 (int)(sizeof(text) - first_chunk_bytes - 1),
447 XML_TRUE);
448 if (rc == XML_STATUS_ERROR)
449 xml_failure(g_parser);
450 }
451 }
452 END_TEST
453
454 /* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)455 START_TEST(test_not_utf16) {
456 const char *text = "<?xml version='1.0' encoding='utf-16'?>"
457 "<doc>Hi</doc>";
458
459 /* Use a handler to provoke the appropriate code paths */
460 XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
461 expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
462 "UTF-16 declared in UTF-8 not faulted");
463 }
464 END_TEST
465
466 /* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)467 START_TEST(test_bad_encoding) {
468 const char *text = "<doc>Hi</doc>";
469
470 if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
471 fail("XML_SetEncoding failed");
472 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
473 "Unknown encoding not faulted");
474 }
475 END_TEST
476
477 /* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)478 START_TEST(test_latin1_umlauts) {
479 const char *text
480 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
481 "<e a='\xE4 \xF6 \xFC ä ö ü ä ö ü >'\n"
482 " >\xE4 \xF6 \xFC ä ö ü ä ö ü ></e>";
483 #ifdef XML_UNICODE
484 /* Expected results in UTF-16 */
485 const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
486 XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
487 #else
488 /* Expected results in UTF-8 */
489 const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
490 XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
491 #endif
492
493 run_character_check(text, expected);
494 XML_ParserReset(g_parser, NULL);
495 run_attribute_check(text, expected);
496 /* Repeat with a default handler */
497 XML_ParserReset(g_parser, NULL);
498 XML_SetDefaultHandler(g_parser, dummy_default_handler);
499 run_character_check(text, expected);
500 XML_ParserReset(g_parser, NULL);
501 XML_SetDefaultHandler(g_parser, dummy_default_handler);
502 run_attribute_check(text, expected);
503 }
504 END_TEST
505
506 /* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)507 START_TEST(test_long_utf8_character) {
508 const char *text
509 = "<?xml version='1.0' encoding='utf-8'?>\n"
510 /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
511 "<do\xf0\x90\x80\x80/>";
512 expect_failure(text, XML_ERROR_INVALID_TOKEN,
513 "4-byte UTF-8 character in element name not faulted");
514 }
515 END_TEST
516
517 /* Test that a long latin-1 attribute (too long to convert in one go)
518 * is correctly converted
519 */
START_TEST(test_long_latin1_attribute)520 START_TEST(test_long_latin1_attribute) {
521 const char *text
522 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
523 "<doc att='"
524 /* 64 characters per line */
525 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
526 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
527 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
528 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
529 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
530 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
531 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
532 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
533 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
534 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
535 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
536 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
537 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
538 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
539 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
540 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
541 /* Last character splits across a buffer boundary */
542 "\xe4'>\n</doc>";
543
544 const XML_Char *expected =
545 /* 64 characters per line */
546 /* clang-format off */
547 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
548 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
549 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
550 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
551 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
552 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
553 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
554 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
555 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
556 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
557 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
558 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
559 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
560 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
561 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
562 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
563 /* clang-format on */
564 #ifdef XML_UNICODE
565 XCS("\x00e4");
566 #else
567 XCS("\xc3\xa4");
568 #endif
569
570 run_attribute_check(text, expected);
571 }
572 END_TEST
573
574 /* Test that a long ASCII attribute (too long to convert in one go)
575 * is correctly converted
576 */
START_TEST(test_long_ascii_attribute)577 START_TEST(test_long_ascii_attribute) {
578 const char *text
579 = "<?xml version='1.0' encoding='us-ascii'?>\n"
580 "<doc att='"
581 /* 64 characters per line */
582 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
583 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
584 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
585 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
586 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
587 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
588 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
589 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
590 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
591 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
592 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
593 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
594 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
595 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
596 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
597 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
598 "01234'>\n</doc>";
599 const XML_Char *expected =
600 /* 64 characters per line */
601 /* clang-format off */
602 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
603 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
604 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
605 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
606 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
607 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
608 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
609 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
610 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
611 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
612 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
613 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
614 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
615 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
616 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
617 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
618 XCS("01234");
619 /* clang-format on */
620
621 run_attribute_check(text, expected);
622 }
623 END_TEST
624
625 /* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)626 START_TEST(test_line_number_after_parse) {
627 const char *text = "<tag>\n"
628 "\n"
629 "\n</tag>";
630 XML_Size lineno;
631
632 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
633 == XML_STATUS_ERROR)
634 xml_failure(g_parser);
635 lineno = XML_GetCurrentLineNumber(g_parser);
636 if (lineno != 4) {
637 char buffer[100];
638 snprintf(buffer, sizeof(buffer),
639 "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
640 fail(buffer);
641 }
642 }
643 END_TEST
644
645 /* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)646 START_TEST(test_column_number_after_parse) {
647 const char *text = "<tag></tag>";
648 XML_Size colno;
649
650 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
651 == XML_STATUS_ERROR)
652 xml_failure(g_parser);
653 colno = XML_GetCurrentColumnNumber(g_parser);
654 if (colno != 11) {
655 char buffer[100];
656 snprintf(buffer, sizeof(buffer),
657 "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
658 fail(buffer);
659 }
660 }
661 END_TEST
662
663 /* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)664 START_TEST(test_line_and_column_numbers_inside_handlers) {
665 const char *text = "<a>\n" /* Unix end-of-line */
666 " <b>\r\n" /* Windows end-of-line */
667 " <c/>\r" /* Mac OS end-of-line */
668 " </b>\n"
669 " <d>\n"
670 " <f/>\n"
671 " </d>\n"
672 "</a>";
673 const StructDataEntry expected[]
674 = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
675 {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
676 {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG},
677 {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
678 {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}};
679 const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
680 StructData storage;
681
682 StructData_Init(&storage);
683 XML_SetUserData(g_parser, &storage);
684 XML_SetStartElementHandler(g_parser, start_element_event_handler2);
685 XML_SetEndElementHandler(g_parser, end_element_event_handler2);
686 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
687 == XML_STATUS_ERROR)
688 xml_failure(g_parser);
689
690 StructData_CheckItems(&storage, expected, expected_count);
691 StructData_Dispose(&storage);
692 }
693 END_TEST
694
695 /* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)696 START_TEST(test_line_number_after_error) {
697 const char *text = "<a>\n"
698 " <b>\n"
699 " </a>"; /* missing </b> */
700 XML_Size lineno;
701 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
702 != XML_STATUS_ERROR)
703 fail("Expected a parse error");
704
705 lineno = XML_GetCurrentLineNumber(g_parser);
706 if (lineno != 3) {
707 char buffer[100];
708 snprintf(buffer, sizeof(buffer),
709 "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
710 fail(buffer);
711 }
712 }
713 END_TEST
714
715 /* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)716 START_TEST(test_column_number_after_error) {
717 const char *text = "<a>\n"
718 " <b>\n"
719 " </a>"; /* missing </b> */
720 XML_Size colno;
721 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
722 != XML_STATUS_ERROR)
723 fail("Expected a parse error");
724
725 colno = XML_GetCurrentColumnNumber(g_parser);
726 if (colno != 4) {
727 char buffer[100];
728 snprintf(buffer, sizeof(buffer),
729 "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
730 fail(buffer);
731 }
732 }
733 END_TEST
734
735 /* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)736 START_TEST(test_really_long_lines) {
737 /* This parses an input line longer than INIT_DATA_BUF_SIZE
738 characters long (defined to be 1024 in xmlparse.c). We take a
739 really cheesy approach to building the input buffer, because
740 this avoids writing bugs in buffer-filling code.
741 */
742 const char *text
743 = "<e>"
744 /* 64 chars */
745 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
746 /* until we have at least 1024 characters on the line: */
747 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
748 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
749 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
750 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
751 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
752 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
753 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
754 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
755 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
757 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763 "</e>";
764 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
765 == XML_STATUS_ERROR)
766 xml_failure(g_parser);
767 }
768 END_TEST
769
770 /* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)771 START_TEST(test_really_long_encoded_lines) {
772 /* As above, except that we want to provoke an output buffer
773 * overflow with a non-trivial encoding. For this we need to pass
774 * the whole cdata in one go, not byte-by-byte.
775 */
776 void *buffer;
777 const char *text
778 = "<?xml version='1.0' encoding='iso-8859-1'?>"
779 "<e>"
780 /* 64 chars */
781 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
782 /* until we have at least 1024 characters on the line: */
783 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
784 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
785 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
786 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
787 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
788 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
789 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
790 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
791 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
792 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
793 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
794 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
795 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
796 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
797 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
798 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
799 "</e>";
800 int parse_len = (int)strlen(text);
801
802 /* Need a cdata handler to provoke the code path we want to test */
803 XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
804 buffer = XML_GetBuffer(g_parser, parse_len);
805 if (buffer == NULL)
806 fail("Could not allocate parse buffer");
807 assert(buffer != NULL);
808 memcpy(buffer, text, parse_len);
809 if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
810 xml_failure(g_parser);
811 }
812 END_TEST
813
814 /*
815 * Element event tests.
816 */
817
START_TEST(test_end_element_events)818 START_TEST(test_end_element_events) {
819 const char *text = "<a><b><c/></b><d><f/></d></a>";
820 const XML_Char *expected = XCS("/c/b/f/d/a");
821 CharData storage;
822
823 CharData_Init(&storage);
824 XML_SetUserData(g_parser, &storage);
825 XML_SetEndElementHandler(g_parser, end_element_event_handler);
826 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
827 == XML_STATUS_ERROR)
828 xml_failure(g_parser);
829 CharData_CheckXMLChars(&storage, expected);
830 }
831 END_TEST
832
833 /*
834 * Attribute tests.
835 */
836
837 /* Helper used by the following tests; this checks any "attr" and "refs"
838 attributes to make sure whitespace has been normalized.
839
840 Return true if whitespace has been normalized in a string, using
841 the rules for attribute value normalization. The 'is_cdata' flag
842 is needed since CDATA attributes don't need to have multiple
843 whitespace characters collapsed to a single space, while other
844 attribute data types do. (Section 3.3.3 of the recommendation.)
845 */
846 static int
is_whitespace_normalized(const XML_Char * s,int is_cdata)847 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
848 int blanks = 0;
849 int at_start = 1;
850 while (*s) {
851 if (*s == XCS(' '))
852 ++blanks;
853 else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
854 return 0;
855 else {
856 if (at_start) {
857 at_start = 0;
858 if (blanks && ! is_cdata)
859 /* illegal leading blanks */
860 return 0;
861 } else if (blanks > 1 && ! is_cdata)
862 return 0;
863 blanks = 0;
864 }
865 ++s;
866 }
867 if (blanks && ! is_cdata)
868 return 0;
869 return 1;
870 }
871
872 /* Check the attribute whitespace checker: */
START_TEST(test_helper_is_whitespace_normalized)873 START_TEST(test_helper_is_whitespace_normalized) {
874 assert(is_whitespace_normalized(XCS("abc"), 0));
875 assert(is_whitespace_normalized(XCS("abc"), 1));
876 assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
877 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
878 assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
879 assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
880 assert(! is_whitespace_normalized(XCS("abc def ghi"), 0));
881 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
882 assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
883 assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
884 assert(! is_whitespace_normalized(XCS(" "), 0));
885 assert(is_whitespace_normalized(XCS(" "), 1));
886 assert(! is_whitespace_normalized(XCS("\t"), 0));
887 assert(! is_whitespace_normalized(XCS("\t"), 1));
888 assert(! is_whitespace_normalized(XCS("\n"), 0));
889 assert(! is_whitespace_normalized(XCS("\n"), 1));
890 assert(! is_whitespace_normalized(XCS("\r"), 0));
891 assert(! is_whitespace_normalized(XCS("\r"), 1));
892 assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
893 }
894 END_TEST
895
896 static void XMLCALL
check_attr_contains_normalized_whitespace(void * userData,const XML_Char * name,const XML_Char ** atts)897 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
898 const XML_Char **atts) {
899 int i;
900 UNUSED_P(userData);
901 UNUSED_P(name);
902 for (i = 0; atts[i] != NULL; i += 2) {
903 const XML_Char *attrname = atts[i];
904 const XML_Char *value = atts[i + 1];
905 if (xcstrcmp(XCS("attr"), attrname) == 0
906 || xcstrcmp(XCS("ents"), attrname) == 0
907 || xcstrcmp(XCS("refs"), attrname) == 0) {
908 if (! is_whitespace_normalized(value, 0)) {
909 char buffer[256];
910 snprintf(buffer, sizeof(buffer),
911 "attribute value not normalized: %" XML_FMT_STR
912 "='%" XML_FMT_STR "'",
913 attrname, value);
914 fail(buffer);
915 }
916 }
917 }
918 }
919
START_TEST(test_attr_whitespace_normalization)920 START_TEST(test_attr_whitespace_normalization) {
921 const char *text
922 = "<!DOCTYPE doc [\n"
923 " <!ATTLIST doc\n"
924 " attr NMTOKENS #REQUIRED\n"
925 " ents ENTITIES #REQUIRED\n"
926 " refs IDREFS #REQUIRED>\n"
927 "]>\n"
928 "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n"
929 " ents=' ent-1 \t\r\n"
930 " ent-2 ' >\n"
931 " <e id='id-1'/>\n"
932 " <e id='id-2'/>\n"
933 "</doc>";
934
935 XML_SetStartElementHandler(g_parser,
936 check_attr_contains_normalized_whitespace);
937 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
938 == XML_STATUS_ERROR)
939 xml_failure(g_parser);
940 }
941 END_TEST
942
943 /*
944 * XML declaration tests.
945 */
946
START_TEST(test_xmldecl_misplaced)947 START_TEST(test_xmldecl_misplaced) {
948 expect_failure("\n"
949 "<?xml version='1.0'?>\n"
950 "<a/>",
951 XML_ERROR_MISPLACED_XML_PI,
952 "failed to report misplaced XML declaration");
953 }
954 END_TEST
955
START_TEST(test_xmldecl_invalid)956 START_TEST(test_xmldecl_invalid) {
957 expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
958 "Failed to report invalid XML declaration");
959 }
960 END_TEST
961
START_TEST(test_xmldecl_missing_attr)962 START_TEST(test_xmldecl_missing_attr) {
963 expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
964 "Failed to report missing XML declaration attribute");
965 }
966 END_TEST
967
START_TEST(test_xmldecl_missing_value)968 START_TEST(test_xmldecl_missing_value) {
969 expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
970 "<doc/>",
971 XML_ERROR_XML_DECL,
972 "Failed to report missing attribute value");
973 }
974 END_TEST
975
976 /* Regression test for SF bug #584832. */
START_TEST(test_unknown_encoding_internal_entity)977 START_TEST(test_unknown_encoding_internal_entity) {
978 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
979 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
980 "<test a='&foo;'/>";
981
982 XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
983 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
984 == XML_STATUS_ERROR)
985 xml_failure(g_parser);
986 }
987 END_TEST
988
989 /* Test unrecognised encoding handler */
START_TEST(test_unrecognised_encoding_internal_entity)990 START_TEST(test_unrecognised_encoding_internal_entity) {
991 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
992 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
993 "<test a='&foo;'/>";
994
995 XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
996 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
997 != XML_STATUS_ERROR)
998 fail("Unrecognised encoding not rejected");
999 }
1000 END_TEST
1001
1002 /* Regression test for SF bug #620106. */
START_TEST(test_ext_entity_set_encoding)1003 START_TEST(test_ext_entity_set_encoding) {
1004 const char *text = "<!DOCTYPE doc [\n"
1005 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1006 "]>\n"
1007 "<doc>&en;</doc>";
1008 ExtTest test_data
1009 = {/* This text says it's an unsupported encoding, but it's really
1010 UTF-8, which we tell Expat using XML_SetEncoding().
1011 */
1012 "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
1013 #ifdef XML_UNICODE
1014 const XML_Char *expected = XCS("\x00e9");
1015 #else
1016 const XML_Char *expected = XCS("\xc3\xa9");
1017 #endif
1018
1019 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1020 run_ext_character_check(text, &test_data, expected);
1021 }
1022 END_TEST
1023
1024 /* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)1025 START_TEST(test_ext_entity_no_handler) {
1026 const char *text = "<!DOCTYPE doc [\n"
1027 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1028 "]>\n"
1029 "<doc>&en;</doc>";
1030
1031 XML_SetDefaultHandler(g_parser, dummy_default_handler);
1032 run_character_check(text, XCS(""));
1033 }
1034 END_TEST
1035
1036 /* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)1037 START_TEST(test_ext_entity_set_bom) {
1038 const char *text = "<!DOCTYPE doc [\n"
1039 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1040 "]>\n"
1041 "<doc>&en;</doc>";
1042 ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1043 "<?xml encoding='iso-8859-3'?>"
1044 "\xC3\xA9",
1045 XCS("utf-8"), NULL};
1046 #ifdef XML_UNICODE
1047 const XML_Char *expected = XCS("\x00e9");
1048 #else
1049 const XML_Char *expected = XCS("\xc3\xa9");
1050 #endif
1051
1052 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1053 run_ext_character_check(text, &test_data, expected);
1054 }
1055 END_TEST
1056
1057 /* Test that bad encodings are faulted */
START_TEST(test_ext_entity_bad_encoding)1058 START_TEST(test_ext_entity_bad_encoding) {
1059 const char *text = "<!DOCTYPE doc [\n"
1060 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1061 "]>\n"
1062 "<doc>&en;</doc>";
1063 ExtFaults fault
1064 = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1065 XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1066
1067 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1068 XML_SetUserData(g_parser, &fault);
1069 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1070 "Bad encoding should not have been accepted");
1071 }
1072 END_TEST
1073
1074 /* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)1075 START_TEST(test_ext_entity_bad_encoding_2) {
1076 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1077 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1078 "<doc>&entity;</doc>";
1079 ExtFaults fault
1080 = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1081 XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1082
1083 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1084 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1085 XML_SetUserData(g_parser, &fault);
1086 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1087 "Bad encoding not faulted in external entity handler");
1088 }
1089 END_TEST
1090
1091 /* Test that no error is reported for unknown entities if we don't
1092 read an external subset. This was fixed in Expat 1.95.5.
1093 */
START_TEST(test_wfc_undeclared_entity_unread_external_subset)1094 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1095 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1096 "<doc>&entity;</doc>";
1097
1098 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1099 == XML_STATUS_ERROR)
1100 xml_failure(g_parser);
1101 }
1102 END_TEST
1103
1104 /* Test that an error is reported for unknown entities if we don't
1105 have an external subset.
1106 */
START_TEST(test_wfc_undeclared_entity_no_external_subset)1107 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1108 expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1109 "Parser did not report undefined entity w/out a DTD.");
1110 }
1111 END_TEST
1112
1113 /* Test that an error is reported for unknown entities if we don't
1114 read an external subset, but have been declared standalone.
1115 */
START_TEST(test_wfc_undeclared_entity_standalone)1116 START_TEST(test_wfc_undeclared_entity_standalone) {
1117 const char *text
1118 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1119 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1120 "<doc>&entity;</doc>";
1121
1122 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1123 "Parser did not report undefined entity (standalone).");
1124 }
1125 END_TEST
1126
1127 /* Test that an error is reported for unknown entities if we have read
1128 an external subset, and standalone is true.
1129 */
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone)1130 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1131 const char *text
1132 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1133 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1134 "<doc>&entity;</doc>";
1135 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1136
1137 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1138 XML_SetUserData(g_parser, &test_data);
1139 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1140 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1141 "Parser did not report undefined entity (external DTD).");
1142 }
1143 END_TEST
1144
1145 /* Test that external entity handling is not done if the parsing flag
1146 * is set to UNLESS_STANDALONE
1147 */
START_TEST(test_entity_with_external_subset_unless_standalone)1148 START_TEST(test_entity_with_external_subset_unless_standalone) {
1149 const char *text
1150 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1151 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1152 "<doc>&entity;</doc>";
1153 ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1154
1155 XML_SetParamEntityParsing(g_parser,
1156 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1157 XML_SetUserData(g_parser, &test_data);
1158 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1159 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1160 "Parser did not report undefined entity");
1161 }
1162 END_TEST
1163
1164 /* Test that no error is reported for unknown entities if we have read
1165 an external subset, and standalone is false.
1166 */
START_TEST(test_wfc_undeclared_entity_with_external_subset)1167 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1168 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1169 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1170 "<doc>&entity;</doc>";
1171 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1172
1173 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1174 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1175 run_ext_character_check(text, &test_data, XCS(""));
1176 }
1177 END_TEST
1178
1179 /* Test that an error is reported if our NotStandalone handler fails */
START_TEST(test_not_standalone_handler_reject)1180 START_TEST(test_not_standalone_handler_reject) {
1181 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1182 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1183 "<doc>&entity;</doc>";
1184 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1185
1186 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1187 XML_SetUserData(g_parser, &test_data);
1188 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1189 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1190 expect_failure(text, XML_ERROR_NOT_STANDALONE,
1191 "NotStandalone handler failed to reject");
1192
1193 /* Try again but without external entity handling */
1194 XML_ParserReset(g_parser, NULL);
1195 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1196 expect_failure(text, XML_ERROR_NOT_STANDALONE,
1197 "NotStandalone handler failed to reject");
1198 }
1199 END_TEST
1200
1201 /* Test that no error is reported if our NotStandalone handler succeeds */
START_TEST(test_not_standalone_handler_accept)1202 START_TEST(test_not_standalone_handler_accept) {
1203 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1204 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1205 "<doc>&entity;</doc>";
1206 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1207
1208 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1209 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1210 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1211 run_ext_character_check(text, &test_data, XCS(""));
1212
1213 /* Repeat without the external entity handler */
1214 XML_ParserReset(g_parser, NULL);
1215 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1216 run_character_check(text, XCS(""));
1217 }
1218 END_TEST
1219
START_TEST(test_entity_start_tag_level_greater_than_one)1220 START_TEST(test_entity_start_tag_level_greater_than_one) {
1221 const char *const text = "<!DOCTYPE t1 [\n"
1222 " <!ENTITY e1 'hello'>\n"
1223 "]>\n"
1224 "<t1>\n"
1225 " <t2>&e1;</t2>\n"
1226 "</t1>\n";
1227
1228 XML_Parser parser = XML_ParserCreate(NULL);
1229 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
1230 /*isFinal*/ XML_TRUE)
1231 == XML_STATUS_OK);
1232 XML_ParserFree(parser);
1233 }
1234 END_TEST
1235
START_TEST(test_wfc_no_recursive_entity_refs)1236 START_TEST(test_wfc_no_recursive_entity_refs) {
1237 const char *text = "<!DOCTYPE doc [\n"
1238 " <!ENTITY entity '&entity;'>\n"
1239 "]>\n"
1240 "<doc>&entity;</doc>";
1241
1242 expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1243 "Parser did not report recursive entity reference.");
1244 }
1245 END_TEST
1246
START_TEST(test_no_indirectly_recursive_entity_refs)1247 START_TEST(test_no_indirectly_recursive_entity_refs) {
1248 struct TestCase {
1249 const char *doc;
1250 bool usesParameterEntities;
1251 };
1252
1253 const struct TestCase cases[] = {
1254 // general entity + character data
1255 {"<!DOCTYPE a [\n"
1256 " <!ENTITY e1 '&e2;'>\n"
1257 " <!ENTITY e2 '&e1;'>\n"
1258 "]><a>&e2;</a>\n",
1259 false},
1260
1261 // general entity + attribute value
1262 {"<!DOCTYPE a [\n"
1263 " <!ENTITY e1 '&e2;'>\n"
1264 " <!ENTITY e2 '&e1;'>\n"
1265 "]><a k1='&e2;' />\n",
1266 false},
1267
1268 // parameter entity
1269 {"<!DOCTYPE doc [\n"
1270 " <!ENTITY % p1 '%p2;'>\n"
1271 " <!ENTITY % p2 '%p1;'>\n"
1272 " <!ENTITY % define_g \"<!ENTITY g '%p2;'>\">\n"
1273 " %define_g;\n"
1274 "]>\n"
1275 "<doc/>\n",
1276 true},
1277 };
1278 const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE};
1279
1280 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1281 for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]);
1282 j++) {
1283 const XML_Bool reset_wanted = reset_or_not[j];
1284 const char *const doc = cases[i].doc;
1285 const bool usesParameterEntities = cases[i].usesParameterEntities;
1286
1287 set_subtest("[%i,reset=%i] %s", (int)i, (int)j, doc);
1288
1289 #ifdef XML_DTD // both GE and DTD
1290 const bool rejection_expected = true;
1291 #elif XML_GE == 1 // GE but not DTD
1292 const bool rejection_expected = ! usesParameterEntities;
1293 #else // neither DTD nor GE
1294 const bool rejection_expected = false;
1295 #endif
1296
1297 XML_Parser parser = XML_ParserCreate(NULL);
1298
1299 #ifdef XML_DTD
1300 if (usesParameterEntities) {
1301 assert_true(
1302 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS)
1303 == 1);
1304 }
1305 #else
1306 UNUSED_P(usesParameterEntities);
1307 #endif // XML_DTD
1308
1309 const enum XML_Status status
1310 = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
1311 /*isFinal*/ XML_TRUE);
1312
1313 if (rejection_expected) {
1314 assert_true(status == XML_STATUS_ERROR);
1315 assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF);
1316 } else {
1317 assert_true(status == XML_STATUS_OK);
1318 }
1319
1320 if (reset_wanted) {
1321 // This covers free'ing of (eventually) all three open entity lists by
1322 // XML_ParserReset.
1323 XML_ParserReset(parser, NULL);
1324 }
1325
1326 // This covers free'ing of (eventually) all three open entity lists by
1327 // XML_ParserFree (unless XML_ParserReset has already done that above).
1328 XML_ParserFree(parser);
1329 }
1330 }
1331 }
1332 END_TEST
1333
START_TEST(test_recursive_external_parameter_entity_2)1334 START_TEST(test_recursive_external_parameter_entity_2) {
1335 struct TestCase {
1336 const char *doc;
1337 enum XML_Status expectedStatus;
1338 };
1339
1340 struct TestCase cases[] = {
1341 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1342 {"<!ENTITY % p1 '%p1;'>"
1343 "<!ENTITY % p1 'first declaration wins'>",
1344 XML_STATUS_ERROR},
1345 {"<!ENTITY % p1 'first declaration wins'>"
1346 "<!ENTITY % p1 '%p1;'>",
1347 XML_STATUS_OK},
1348 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_OK},
1349 };
1350
1351 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1352 const char *const doc = cases[i].doc;
1353 const enum XML_Status expectedStatus = cases[i].expectedStatus;
1354 set_subtest("%s", doc);
1355
1356 XML_Parser parser = XML_ParserCreate(NULL);
1357 assert_true(parser != NULL);
1358
1359 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1360 assert_true(ext_parser != NULL);
1361
1362 const enum XML_Status actualStatus
1363 = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1364
1365 assert_true(actualStatus == expectedStatus);
1366 if (actualStatus != XML_STATUS_OK) {
1367 assert_true(XML_GetErrorCode(ext_parser)
1368 == XML_ERROR_RECURSIVE_ENTITY_REF);
1369 }
1370
1371 XML_ParserFree(ext_parser);
1372 XML_ParserFree(parser);
1373 }
1374 }
1375 END_TEST
1376
1377 /* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)1378 START_TEST(test_ext_entity_invalid_parse) {
1379 const char *text = "<!DOCTYPE doc [\n"
1380 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1381 "]>\n"
1382 "<doc>&en;</doc>";
1383 const ExtFaults faults[]
1384 = {{"<", "Incomplete element declaration not faulted", NULL,
1385 XML_ERROR_UNCLOSED_TOKEN},
1386 {"<\xe2\x82", /* First two bytes of a three-byte char */
1387 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1388 {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1389 XML_ERROR_PARTIAL_CHAR},
1390 {NULL, NULL, NULL, XML_ERROR_NONE}};
1391 const ExtFaults *fault = faults;
1392
1393 for (; fault->parse_text != NULL; fault++) {
1394 set_subtest("\"%s\"", fault->parse_text);
1395 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1396 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1397 XML_SetUserData(g_parser, (void *)fault);
1398 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1399 "Parser did not report external entity error");
1400 XML_ParserReset(g_parser, NULL);
1401 }
1402 }
1403 END_TEST
1404
1405 /* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)1406 START_TEST(test_dtd_default_handling) {
1407 const char *text = "<!DOCTYPE doc [\n"
1408 "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1409 "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1410 "<!ELEMENT doc EMPTY>\n"
1411 "<!ATTLIST doc a CDATA #IMPLIED>\n"
1412 "<?pi in dtd?>\n"
1413 "<!--comment in dtd-->\n"
1414 "]><doc/>";
1415
1416 XML_SetDefaultHandler(g_parser, accumulate_characters);
1417 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1418 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1419 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1420 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1421 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1422 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1423 XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1424 XML_SetCommentHandler(g_parser, dummy_comment_handler);
1425 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1426 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1427 run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1428 }
1429 END_TEST
1430
1431 /* Test handling of attribute declarations */
START_TEST(test_dtd_attr_handling)1432 START_TEST(test_dtd_attr_handling) {
1433 const char *prolog = "<!DOCTYPE doc [\n"
1434 "<!ELEMENT doc EMPTY>\n";
1435 AttTest attr_data[]
1436 = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1437 "]>"
1438 "<doc a='two'/>",
1439 XCS("doc"), XCS("a"),
1440 XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1441 NULL, XML_TRUE},
1442 {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1443 "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1444 "]>"
1445 "<doc/>",
1446 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1447 {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1448 "]>"
1449 "<doc/>",
1450 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1451 {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1452 "]>"
1453 "<doc/>",
1454 XCS("doc"), XCS("a"), XCS("CDATA"),
1455 #ifdef XML_UNICODE
1456 XCS("\x06f2"),
1457 #else
1458 XCS("\xdb\xb2"),
1459 #endif
1460 XML_FALSE},
1461 {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1462 AttTest *test;
1463
1464 for (test = attr_data; test->definition != NULL; test++) {
1465 set_subtest("%s", test->definition);
1466 XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1467 XML_SetUserData(g_parser, test);
1468 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1469 XML_FALSE)
1470 == XML_STATUS_ERROR)
1471 xml_failure(g_parser);
1472 if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1473 (int)strlen(test->definition), XML_TRUE)
1474 == XML_STATUS_ERROR)
1475 xml_failure(g_parser);
1476 XML_ParserReset(g_parser, NULL);
1477 }
1478 }
1479 END_TEST
1480
1481 /* See related SF bug #673791.
1482 When namespace processing is enabled, setting the namespace URI for
1483 a prefix is not allowed; this test ensures that it *is* allowed
1484 when namespace processing is not enabled.
1485 (See Namespaces in XML, section 2.)
1486 */
START_TEST(test_empty_ns_without_namespaces)1487 START_TEST(test_empty_ns_without_namespaces) {
1488 const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1489 " <e xmlns:prefix=''/>\n"
1490 "</doc>";
1491
1492 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1493 == XML_STATUS_ERROR)
1494 xml_failure(g_parser);
1495 }
1496 END_TEST
1497
1498 /* Regression test for SF bug #824420.
1499 Checks that an xmlns:prefix attribute set in an attribute's default
1500 value isn't misinterpreted.
1501 */
START_TEST(test_ns_in_attribute_default_without_namespaces)1502 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1503 const char *text = "<!DOCTYPE e:element [\n"
1504 " <!ATTLIST e:element\n"
1505 " xmlns:e CDATA 'http://example.org/'>\n"
1506 " ]>\n"
1507 "<e:element/>";
1508
1509 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1510 == XML_STATUS_ERROR)
1511 xml_failure(g_parser);
1512 }
1513 END_TEST
1514
1515 /* Regression test for SF bug #1515266: missing check of stopped
1516 parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)1517 START_TEST(test_stop_parser_between_char_data_calls) {
1518 /* The sample data must be big enough that there are two calls to
1519 the character data handler from within the inner "for" loop of
1520 the XML_TOK_DATA_CHARS case in doContent(), and the character
1521 handler must stop the parser and clear the character data
1522 handler.
1523 */
1524 const char *text = long_character_data_text;
1525
1526 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1527 g_resumable = XML_FALSE;
1528 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1529 != XML_STATUS_ERROR)
1530 xml_failure(g_parser);
1531 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1532 xml_failure(g_parser);
1533 }
1534 END_TEST
1535
1536 /* Regression test for SF bug #1515266: missing check of stopped
1537 parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)1538 START_TEST(test_suspend_parser_between_char_data_calls) {
1539 /* The sample data must be big enough that there are two calls to
1540 the character data handler from within the inner "for" loop of
1541 the XML_TOK_DATA_CHARS case in doContent(), and the character
1542 handler must stop the parser and clear the character data
1543 handler.
1544 */
1545 const char *text = long_character_data_text;
1546
1547 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1548 g_resumable = XML_TRUE;
1549 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1550 // we won't know exactly how much input we actually managed to give Expat.
1551 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
1552 != XML_STATUS_SUSPENDED)
1553 xml_failure(g_parser);
1554 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1555 xml_failure(g_parser);
1556 /* Try parsing directly */
1557 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1558 != XML_STATUS_ERROR)
1559 fail("Attempt to continue parse while suspended not faulted");
1560 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1561 fail("Suspended parse not faulted with correct error");
1562 }
1563 END_TEST
1564
1565 /* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)1566 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1567 const char *text = long_character_data_text;
1568
1569 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1570 g_resumable = XML_FALSE;
1571 g_abortable = XML_FALSE;
1572 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1573 != XML_STATUS_ERROR)
1574 fail("Failed to double-stop parser");
1575
1576 XML_ParserReset(g_parser, NULL);
1577 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1578 g_resumable = XML_TRUE;
1579 g_abortable = XML_FALSE;
1580 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1581 // we won't know exactly how much input we actually managed to give Expat.
1582 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
1583 != XML_STATUS_SUSPENDED)
1584 fail("Failed to double-suspend parser");
1585
1586 XML_ParserReset(g_parser, NULL);
1587 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1588 g_resumable = XML_TRUE;
1589 g_abortable = XML_TRUE;
1590 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1591 != XML_STATUS_ERROR)
1592 fail("Failed to suspend-abort parser");
1593 }
1594 END_TEST
1595
START_TEST(test_good_cdata_ascii)1596 START_TEST(test_good_cdata_ascii) {
1597 const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1598 const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1599
1600 CharData storage;
1601 CharData_Init(&storage);
1602 XML_SetUserData(g_parser, &storage);
1603 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1604 /* Add start and end handlers for coverage */
1605 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1606 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1607
1608 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1609 == XML_STATUS_ERROR)
1610 xml_failure(g_parser);
1611 CharData_CheckXMLChars(&storage, expected);
1612
1613 /* Try again, this time with a default handler */
1614 XML_ParserReset(g_parser, NULL);
1615 CharData_Init(&storage);
1616 XML_SetUserData(g_parser, &storage);
1617 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1618 XML_SetDefaultHandler(g_parser, dummy_default_handler);
1619
1620 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1621 == XML_STATUS_ERROR)
1622 xml_failure(g_parser);
1623 CharData_CheckXMLChars(&storage, expected);
1624 }
1625 END_TEST
1626
START_TEST(test_good_cdata_utf16)1627 START_TEST(test_good_cdata_utf16) {
1628 /* Test data is:
1629 * <?xml version='1.0' encoding='utf-16'?>
1630 * <a><![CDATA[hello]]></a>
1631 */
1632 const char text[]
1633 = "\0<\0?\0x\0m\0l\0"
1634 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1635 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1636 "1\0"
1637 "6\0'"
1638 "\0?\0>\0\n"
1639 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1640 const XML_Char *expected = XCS("hello");
1641
1642 CharData storage;
1643 CharData_Init(&storage);
1644 XML_SetUserData(g_parser, &storage);
1645 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1646
1647 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1648 == XML_STATUS_ERROR)
1649 xml_failure(g_parser);
1650 CharData_CheckXMLChars(&storage, expected);
1651 }
1652 END_TEST
1653
START_TEST(test_good_cdata_utf16_le)1654 START_TEST(test_good_cdata_utf16_le) {
1655 /* Test data is:
1656 * <?xml version='1.0' encoding='utf-16'?>
1657 * <a><![CDATA[hello]]></a>
1658 */
1659 const char text[]
1660 = "<\0?\0x\0m\0l\0"
1661 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1662 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1663 "1\0"
1664 "6\0'"
1665 "\0?\0>\0\n"
1666 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1667 const XML_Char *expected = XCS("hello");
1668
1669 CharData storage;
1670 CharData_Init(&storage);
1671 XML_SetUserData(g_parser, &storage);
1672 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1673
1674 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1675 == XML_STATUS_ERROR)
1676 xml_failure(g_parser);
1677 CharData_CheckXMLChars(&storage, expected);
1678 }
1679 END_TEST
1680
1681 /* Test UTF16 conversion of a long cdata string */
1682
1683 /* 16 characters: handy macro to reduce visual clutter */
1684 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1685
START_TEST(test_long_cdata_utf16)1686 START_TEST(test_long_cdata_utf16) {
1687 /* Test data is:
1688 * <?xlm version='1.0' encoding='utf-16'?>
1689 * <a><![CDATA[
1690 * ABCDEFGHIJKLMNOP
1691 * ]]></a>
1692 */
1693 const char text[]
1694 = "\0<\0?\0x\0m\0l\0 "
1695 "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1696 "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1697 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1698 /* 64 characters per line */
1699 /* clang-format off */
1700 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1701 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1702 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1703 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1704 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1705 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1706 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1707 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1708 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1709 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1710 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1711 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1712 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1713 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1714 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1715 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1716 A_TO_P_IN_UTF16
1717 /* clang-format on */
1718 "\0]\0]\0>\0<\0/\0a\0>";
1719 const XML_Char *expected =
1720 /* clang-format off */
1721 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1722 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1723 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1724 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1725 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1726 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1727 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1728 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1729 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1730 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1731 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1732 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1733 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1734 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1735 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1736 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1737 XCS("ABCDEFGHIJKLMNOP");
1738 /* clang-format on */
1739 CharData storage;
1740 void *buffer;
1741
1742 CharData_Init(&storage);
1743 XML_SetUserData(g_parser, &storage);
1744 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1745 buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1746 if (buffer == NULL)
1747 fail("Could not allocate parse buffer");
1748 assert(buffer != NULL);
1749 memcpy(buffer, text, sizeof(text) - 1);
1750 if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1751 xml_failure(g_parser);
1752 CharData_CheckXMLChars(&storage, expected);
1753 }
1754 END_TEST
1755
1756 /* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)1757 START_TEST(test_multichar_cdata_utf16) {
1758 /* Test data is:
1759 * <?xml version='1.0' encoding='utf-16'?>
1760 * <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1761 *
1762 * where {MINIM} is U+1d15e (a minim or half-note)
1763 * UTF-16: 0xd834 0xdd5e
1764 * UTF-8: 0xf0 0x9d 0x85 0x9e
1765 * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1766 * UTF-16: 0xd834 0xdd5f
1767 * UTF-8: 0xf0 0x9d 0x85 0x9f
1768 */
1769 const char text[] = "\0<\0?\0x\0m\0l\0"
1770 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1771 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1772 "1\0"
1773 "6\0'"
1774 "\0?\0>\0\n"
1775 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1776 "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1777 "\0]\0]\0>\0<\0/\0a\0>";
1778 #ifdef XML_UNICODE
1779 const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1780 #else
1781 const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1782 #endif
1783 CharData storage;
1784
1785 CharData_Init(&storage);
1786 XML_SetUserData(g_parser, &storage);
1787 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1788
1789 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1790 == XML_STATUS_ERROR)
1791 xml_failure(g_parser);
1792 CharData_CheckXMLChars(&storage, expected);
1793 }
1794 END_TEST
1795
1796 /* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)1797 START_TEST(test_utf16_bad_surrogate_pair) {
1798 /* Test data is:
1799 * <?xml version='1.0' encoding='utf-16'?>
1800 * <a><![CDATA[{BADLINB}]]></a>
1801 *
1802 * where {BADLINB} is U+10000 (the first Linear B character)
1803 * with the UTF-16 surrogate pair in the wrong order, i.e.
1804 * 0xdc00 0xd800
1805 */
1806 const char text[] = "\0<\0?\0x\0m\0l\0"
1807 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1808 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1809 "1\0"
1810 "6\0'"
1811 "\0?\0>\0\n"
1812 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1813 "\xdc\x00\xd8\x00"
1814 "\0]\0]\0>\0<\0/\0a\0>";
1815
1816 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1817 != XML_STATUS_ERROR)
1818 fail("Reversed UTF-16 surrogate pair not faulted");
1819 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1820 xml_failure(g_parser);
1821 }
1822 END_TEST
1823
START_TEST(test_bad_cdata)1824 START_TEST(test_bad_cdata) {
1825 struct CaseData {
1826 const char *text;
1827 enum XML_Error expectedError;
1828 };
1829
1830 struct CaseData cases[]
1831 = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1832 {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1833 {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1834 {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1835 {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1836 {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1837 {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1838 {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1839
1840 {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1841 {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1842 {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1843
1844 {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1845 {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1846 {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1847 {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1848 {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1849 {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1850 {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1851
1852 {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1853 {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1854 {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1855
1856 size_t i = 0;
1857 for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1858 set_subtest("%s", cases[i].text);
1859 const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1860 g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1861 const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1862
1863 assert(actualStatus == XML_STATUS_ERROR);
1864
1865 if (actualError != cases[i].expectedError) {
1866 char message[100];
1867 snprintf(message, sizeof(message),
1868 "Expected error %d but got error %d for case %u: \"%s\"\n",
1869 cases[i].expectedError, actualError, (unsigned int)i + 1,
1870 cases[i].text);
1871 fail(message);
1872 }
1873
1874 XML_ParserReset(g_parser, NULL);
1875 }
1876 }
1877 END_TEST
1878
1879 /* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)1880 START_TEST(test_bad_cdata_utf16) {
1881 struct CaseData {
1882 size_t text_bytes;
1883 const char *text;
1884 enum XML_Error expected_error;
1885 };
1886
1887 const char prolog[] = "\0<\0?\0x\0m\0l\0"
1888 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1889 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1890 "1\0"
1891 "6\0'"
1892 "\0?\0>\0\n"
1893 "\0<\0a\0>";
1894 struct CaseData cases[] = {
1895 {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1896 {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1897 {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1898 {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1899 {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1900 {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1901 {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1902 {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1903 {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1904 {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1905 {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1906 {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1907 {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1908 {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1909 {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1910 {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1911 {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1912 {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1913 {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1914 {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1915 /* Now add a four-byte UTF-16 character */
1916 {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1917 XML_ERROR_UNCLOSED_CDATA_SECTION},
1918 {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1919 {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1920 XML_ERROR_PARTIAL_CHAR},
1921 {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1922 XML_ERROR_UNCLOSED_CDATA_SECTION}};
1923 size_t i;
1924
1925 for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1926 set_subtest("case %lu", (long unsigned)(i + 1));
1927 enum XML_Status actual_status;
1928 enum XML_Error actual_error;
1929
1930 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1931 XML_FALSE)
1932 == XML_STATUS_ERROR)
1933 xml_failure(g_parser);
1934 actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1935 (int)cases[i].text_bytes, XML_TRUE);
1936 assert(actual_status == XML_STATUS_ERROR);
1937 actual_error = XML_GetErrorCode(g_parser);
1938 if (actual_error != cases[i].expected_error) {
1939 char message[1024];
1940
1941 snprintf(message, sizeof(message),
1942 "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1943 ") for case %lu\n",
1944 cases[i].expected_error,
1945 XML_ErrorString(cases[i].expected_error), actual_error,
1946 XML_ErrorString(actual_error), (long unsigned)(i + 1));
1947 fail(message);
1948 }
1949 XML_ParserReset(g_parser, NULL);
1950 }
1951 }
1952 END_TEST
1953
1954 /* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)1955 START_TEST(test_stop_parser_between_cdata_calls) {
1956 const char *text = long_cdata_text;
1957
1958 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1959 g_resumable = XML_FALSE;
1960 expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1961 }
1962 END_TEST
1963
1964 /* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)1965 START_TEST(test_suspend_parser_between_cdata_calls) {
1966 if (g_chunkSize != 0) {
1967 // this test does not use SINGLE_BYTES, because of suspension
1968 return;
1969 }
1970
1971 const char *text = long_cdata_text;
1972 enum XML_Status result;
1973
1974 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1975 g_resumable = XML_TRUE;
1976 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1977 // we won't know exactly how much input we actually managed to give Expat.
1978 result = XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE);
1979 if (result != XML_STATUS_SUSPENDED) {
1980 if (result == XML_STATUS_ERROR)
1981 xml_failure(g_parser);
1982 fail("Parse not suspended in CDATA handler");
1983 }
1984 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1985 xml_failure(g_parser);
1986 }
1987 END_TEST
1988
1989 /* Test memory allocation functions */
START_TEST(test_memory_allocation)1990 START_TEST(test_memory_allocation) {
1991 char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1992 char *p;
1993
1994 if (buffer == NULL) {
1995 fail("Allocation failed");
1996 } else {
1997 /* Try writing to memory; some OSes try to cheat! */
1998 buffer[0] = 'T';
1999 buffer[1] = 'E';
2000 buffer[2] = 'S';
2001 buffer[3] = 'T';
2002 buffer[4] = '\0';
2003 if (strcmp(buffer, "TEST") != 0) {
2004 fail("Memory not writable");
2005 } else {
2006 p = (char *)XML_MemRealloc(g_parser, buffer, 512);
2007 if (p == NULL) {
2008 fail("Reallocation failed");
2009 } else {
2010 /* Write again, just to be sure */
2011 buffer = p;
2012 buffer[0] = 'V';
2013 if (strcmp(buffer, "VEST") != 0) {
2014 fail("Reallocated memory not writable");
2015 }
2016 }
2017 }
2018 XML_MemFree(g_parser, buffer);
2019 }
2020 }
2021 END_TEST
2022
2023 /* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)2024 START_TEST(test_default_current) {
2025 const char *text = "<doc>hell]</doc>";
2026 const char *entity_text = "<!DOCTYPE doc [\n"
2027 "<!ENTITY entity '%'>\n"
2028 "]>\n"
2029 "<doc>&entity;</doc>";
2030
2031 set_subtest("with defaulting");
2032 {
2033 struct handler_record_list storage;
2034 storage.count = 0;
2035 XML_SetDefaultHandler(g_parser, record_default_handler);
2036 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2037 XML_SetUserData(g_parser, &storage);
2038 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2039 == XML_STATUS_ERROR)
2040 xml_failure(g_parser);
2041 int i = 0;
2042 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
2043 // we should have gotten one or more cdata callbacks, totaling 5 chars
2044 int cdata_len_remaining = 5;
2045 while (cdata_len_remaining > 0) {
2046 const struct handler_record_entry *c_entry
2047 = handler_record_get(&storage, i++);
2048 assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
2049 assert_true(c_entry->arg > 0);
2050 assert_true(c_entry->arg <= cdata_len_remaining);
2051 cdata_len_remaining -= c_entry->arg;
2052 // default handler must follow, with the exact same len argument.
2053 assert_record_handler_called(&storage, i++, "record_default_handler",
2054 c_entry->arg);
2055 }
2056 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
2057 assert_true(storage.count == i);
2058 }
2059
2060 /* Again, without the defaulting */
2061 set_subtest("no defaulting");
2062 {
2063 struct handler_record_list storage;
2064 storage.count = 0;
2065 XML_ParserReset(g_parser, NULL);
2066 XML_SetDefaultHandler(g_parser, record_default_handler);
2067 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2068 XML_SetUserData(g_parser, &storage);
2069 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2070 == XML_STATUS_ERROR)
2071 xml_failure(g_parser);
2072 int i = 0;
2073 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
2074 // we should have gotten one or more cdata callbacks, totaling 5 chars
2075 int cdata_len_remaining = 5;
2076 while (cdata_len_remaining > 0) {
2077 const struct handler_record_entry *c_entry
2078 = handler_record_get(&storage, i++);
2079 assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
2080 assert_true(c_entry->arg > 0);
2081 assert_true(c_entry->arg <= cdata_len_remaining);
2082 cdata_len_remaining -= c_entry->arg;
2083 }
2084 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
2085 assert_true(storage.count == i);
2086 }
2087
2088 /* Now with an internal entity to complicate matters */
2089 set_subtest("with internal entity");
2090 {
2091 struct handler_record_list storage;
2092 storage.count = 0;
2093 XML_ParserReset(g_parser, NULL);
2094 XML_SetDefaultHandler(g_parser, record_default_handler);
2095 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2096 XML_SetUserData(g_parser, &storage);
2097 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2098 XML_TRUE)
2099 == XML_STATUS_ERROR)
2100 xml_failure(g_parser);
2101 /* The default handler suppresses the entity */
2102 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2103 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2104 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2105 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2106 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2107 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2108 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2109 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2110 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2111 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2112 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2113 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2114 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2115 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2116 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2117 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2118 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2119 assert_record_handler_called(&storage, 17, "record_default_handler", 8);
2120 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2121 assert_true(storage.count == 19);
2122 }
2123
2124 /* Again, with a skip handler */
2125 set_subtest("with skip handler");
2126 {
2127 struct handler_record_list storage;
2128 storage.count = 0;
2129 XML_ParserReset(g_parser, NULL);
2130 XML_SetDefaultHandler(g_parser, record_default_handler);
2131 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2132 XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
2133 XML_SetUserData(g_parser, &storage);
2134 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2135 XML_TRUE)
2136 == XML_STATUS_ERROR)
2137 xml_failure(g_parser);
2138 /* The default handler suppresses the entity */
2139 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2140 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2141 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2142 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2143 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2144 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2145 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2146 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2147 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2148 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2149 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2150 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2151 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2152 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2153 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2154 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2155 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2156 assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2157 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2158 assert_true(storage.count == 19);
2159 }
2160
2161 /* This time, allow the entity through */
2162 set_subtest("allow entity");
2163 {
2164 struct handler_record_list storage;
2165 storage.count = 0;
2166 XML_ParserReset(g_parser, NULL);
2167 XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2168 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2169 XML_SetUserData(g_parser, &storage);
2170 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2171 XML_TRUE)
2172 == XML_STATUS_ERROR)
2173 xml_failure(g_parser);
2174 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2175 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2176 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2177 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2178 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2179 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2180 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2181 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2182 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2183 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2184 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2185 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2186 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2187 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2188 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2189 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2190 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2191 assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2192 assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2193 assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2194 assert_true(storage.count == 20);
2195 }
2196
2197 /* Finally, without passing the cdata to the default handler */
2198 set_subtest("not passing cdata");
2199 {
2200 struct handler_record_list storage;
2201 storage.count = 0;
2202 XML_ParserReset(g_parser, NULL);
2203 XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2204 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2205 XML_SetUserData(g_parser, &storage);
2206 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2207 XML_TRUE)
2208 == XML_STATUS_ERROR)
2209 xml_failure(g_parser);
2210 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2211 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2212 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2213 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2214 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2215 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2216 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2217 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2218 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2219 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2220 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2221 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2222 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2223 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2224 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2225 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2226 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2227 assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2228 1);
2229 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2230 assert_true(storage.count == 19);
2231 }
2232 }
2233 END_TEST
2234
2235 /* Test DTD element parsing code paths */
START_TEST(test_dtd_elements)2236 START_TEST(test_dtd_elements) {
2237 const char *text = "<!DOCTYPE doc [\n"
2238 "<!ELEMENT doc (chapter)>\n"
2239 "<!ELEMENT chapter (#PCDATA)>\n"
2240 "]>\n"
2241 "<doc><chapter>Wombats are go</chapter></doc>";
2242
2243 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2244 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2245 == XML_STATUS_ERROR)
2246 xml_failure(g_parser);
2247 }
2248 END_TEST
2249
2250 static void XMLCALL
element_decl_check_model(void * userData,const XML_Char * name,XML_Content * model)2251 element_decl_check_model(void *userData, const XML_Char *name,
2252 XML_Content *model) {
2253 UNUSED_P(userData);
2254 uint32_t errorFlags = 0;
2255
2256 /* Expected model array structure is this:
2257 * [0] (type 6, quant 0)
2258 * [1] (type 5, quant 0)
2259 * [3] (type 4, quant 0, name "bar")
2260 * [4] (type 4, quant 0, name "foo")
2261 * [5] (type 4, quant 3, name "xyz")
2262 * [2] (type 4, quant 2, name "zebra")
2263 */
2264 errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2265 errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2266
2267 if (model != NULL) {
2268 errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2269 errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2270 errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2271 errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2272 errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2273
2274 errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2275 errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2276 errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2277 errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2278 errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2279
2280 errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2281 errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2282 errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2283 errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2284 errorFlags
2285 |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2286
2287 errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2288 errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2289 errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2290 errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2291 errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2292
2293 errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2294 errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2295 errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2296 errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2297 errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2298
2299 errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2300 errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2301 errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2302 errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2303 errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2304 }
2305
2306 XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2307 XML_FreeContentModel(g_parser, model);
2308 }
2309
START_TEST(test_dtd_elements_nesting)2310 START_TEST(test_dtd_elements_nesting) {
2311 // Payload inspired by a test in Perl's XML::Parser
2312 const char *text = "<!DOCTYPE foo [\n"
2313 "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2314 "]>\n"
2315 "<foo/>";
2316
2317 XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2318
2319 XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2320 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2321 == XML_STATUS_ERROR)
2322 xml_failure(g_parser);
2323
2324 if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2325 fail("Element declaration model regression detected");
2326 }
2327 END_TEST
2328
2329 /* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd)2330 START_TEST(test_set_foreign_dtd) {
2331 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2332 const char *text2 = "<doc>&entity;</doc>";
2333 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2334
2335 /* Check hash salt is passed through too */
2336 XML_SetHashSalt(g_parser, 0x12345678);
2337 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2338 XML_SetUserData(g_parser, &test_data);
2339 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2340 /* Add a default handler to exercise more code paths */
2341 XML_SetDefaultHandler(g_parser, dummy_default_handler);
2342 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2343 fail("Could not set foreign DTD");
2344 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2345 == XML_STATUS_ERROR)
2346 xml_failure(g_parser);
2347
2348 /* Ensure that trying to set the DTD after parsing has started
2349 * is faulted, even if it's the same setting.
2350 */
2351 if (XML_UseForeignDTD(g_parser, XML_TRUE)
2352 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2353 fail("Failed to reject late foreign DTD setting");
2354 /* Ditto for the hash salt */
2355 if (XML_SetHashSalt(g_parser, 0x23456789))
2356 fail("Failed to reject late hash salt change");
2357
2358 /* Now finish the parse */
2359 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2360 == XML_STATUS_ERROR)
2361 xml_failure(g_parser);
2362 }
2363 END_TEST
2364
2365 /* Test foreign DTD handling with a failing NotStandalone handler */
START_TEST(test_foreign_dtd_not_standalone)2366 START_TEST(test_foreign_dtd_not_standalone) {
2367 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2368 "<doc>&entity;</doc>";
2369 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2370
2371 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2372 XML_SetUserData(g_parser, &test_data);
2373 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2374 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2375 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2376 fail("Could not set foreign DTD");
2377 expect_failure(text, XML_ERROR_NOT_STANDALONE,
2378 "NotStandalonehandler failed to reject");
2379 }
2380 END_TEST
2381
2382 /* Test invalid character in a foreign DTD is faulted */
START_TEST(test_invalid_foreign_dtd)2383 START_TEST(test_invalid_foreign_dtd) {
2384 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2385 "<doc>&entity;</doc>";
2386 ExtFaults test_data
2387 = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2388
2389 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2390 XML_SetUserData(g_parser, &test_data);
2391 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2392 XML_UseForeignDTD(g_parser, XML_TRUE);
2393 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2394 "Bad DTD should not have been accepted");
2395 }
2396 END_TEST
2397
2398 /* Test foreign DTD use with a doctype */
START_TEST(test_foreign_dtd_with_doctype)2399 START_TEST(test_foreign_dtd_with_doctype) {
2400 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2401 "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2402 const char *text2 = "<doc>&entity;</doc>";
2403 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2404
2405 /* Check hash salt is passed through too */
2406 XML_SetHashSalt(g_parser, 0x12345678);
2407 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2408 XML_SetUserData(g_parser, &test_data);
2409 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2410 /* Add a default handler to exercise more code paths */
2411 XML_SetDefaultHandler(g_parser, dummy_default_handler);
2412 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2413 fail("Could not set foreign DTD");
2414 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2415 == XML_STATUS_ERROR)
2416 xml_failure(g_parser);
2417
2418 /* Ensure that trying to set the DTD after parsing has started
2419 * is faulted, even if it's the same setting.
2420 */
2421 if (XML_UseForeignDTD(g_parser, XML_TRUE)
2422 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2423 fail("Failed to reject late foreign DTD setting");
2424 /* Ditto for the hash salt */
2425 if (XML_SetHashSalt(g_parser, 0x23456789))
2426 fail("Failed to reject late hash salt change");
2427
2428 /* Now finish the parse */
2429 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2430 == XML_STATUS_ERROR)
2431 xml_failure(g_parser);
2432 }
2433 END_TEST
2434
2435 /* Test XML_UseForeignDTD with no external subset present */
START_TEST(test_foreign_dtd_without_external_subset)2436 START_TEST(test_foreign_dtd_without_external_subset) {
2437 const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2438 "<doc>&foo;</doc>";
2439
2440 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2441 XML_SetUserData(g_parser, NULL);
2442 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2443 XML_UseForeignDTD(g_parser, XML_TRUE);
2444 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2445 == XML_STATUS_ERROR)
2446 xml_failure(g_parser);
2447 }
2448 END_TEST
2449
START_TEST(test_empty_foreign_dtd)2450 START_TEST(test_empty_foreign_dtd) {
2451 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2452 "<doc>&entity;</doc>";
2453
2454 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2455 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2456 XML_UseForeignDTD(g_parser, XML_TRUE);
2457 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2458 "Undefined entity not faulted");
2459 }
2460 END_TEST
2461
2462 /* Test XML Base is set and unset appropriately */
START_TEST(test_set_base)2463 START_TEST(test_set_base) {
2464 const XML_Char *old_base;
2465 const XML_Char *new_base = XCS("/local/file/name.xml");
2466
2467 old_base = XML_GetBase(g_parser);
2468 if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2469 fail("Unable to set base");
2470 if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2471 fail("Base setting not correct");
2472 if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2473 fail("Unable to NULL base");
2474 if (XML_GetBase(g_parser) != NULL)
2475 fail("Base setting not nulled");
2476 XML_SetBase(g_parser, old_base);
2477 }
2478 END_TEST
2479
2480 /* Test attribute counts, indexing, etc */
START_TEST(test_attributes)2481 START_TEST(test_attributes) {
2482 const char *text = "<!DOCTYPE doc [\n"
2483 "<!ELEMENT doc (tag)>\n"
2484 "<!ATTLIST doc id ID #REQUIRED>\n"
2485 "]>"
2486 "<doc a='1' id='one' b='2'>"
2487 "<tag c='3'/>"
2488 "</doc>";
2489 AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2490 {XCS("b"), XCS("2")},
2491 {XCS("id"), XCS("one")},
2492 {NULL, NULL}};
2493 AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2494 ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2495 {XCS("tag"), 1, NULL, NULL},
2496 {NULL, 0, NULL, NULL}};
2497 info[0].attributes = doc_info;
2498 info[1].attributes = tag_info;
2499
2500 XML_Parser parser = XML_ParserCreate(NULL);
2501 assert_true(parser != NULL);
2502 ParserAndElementInfo parserAndElementInfos = {
2503 parser,
2504 info,
2505 };
2506
2507 XML_SetStartElementHandler(parser, counting_start_element_handler);
2508 XML_SetUserData(parser, &parserAndElementInfos);
2509 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2510 == XML_STATUS_ERROR)
2511 xml_failure(parser);
2512
2513 XML_ParserFree(parser);
2514 }
2515 END_TEST
2516
2517 /* Test reset works correctly in the middle of processing an internal
2518 * entity. Exercises some obscure code in XML_ParserReset().
2519 */
START_TEST(test_reset_in_entity)2520 START_TEST(test_reset_in_entity) {
2521 if (g_chunkSize != 0) {
2522 // this test does not use SINGLE_BYTES, because of suspension
2523 return;
2524 }
2525
2526 const char *text = "<!DOCTYPE doc [\n"
2527 "<!ENTITY wombat 'wom'>\n"
2528 "<!ENTITY entity 'hi &wom; there'>\n"
2529 "]>\n"
2530 "<doc>&entity;</doc>";
2531 XML_ParsingStatus status;
2532
2533 g_resumable = XML_TRUE;
2534 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2535 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
2536 // we won't know exactly how much input we actually managed to give Expat.
2537 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2538 == XML_STATUS_ERROR)
2539 xml_failure(g_parser);
2540 XML_GetParsingStatus(g_parser, &status);
2541 if (status.parsing != XML_SUSPENDED)
2542 fail("Parsing status not SUSPENDED");
2543 XML_ParserReset(g_parser, NULL);
2544 XML_GetParsingStatus(g_parser, &status);
2545 if (status.parsing != XML_INITIALIZED)
2546 fail("Parsing status doesn't reset to INITIALIZED");
2547 }
2548 END_TEST
2549
2550 /* Test that resume correctly passes through parse errors */
START_TEST(test_resume_invalid_parse)2551 START_TEST(test_resume_invalid_parse) {
2552 const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2553
2554 g_resumable = XML_TRUE;
2555 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2556 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2557 == XML_STATUS_ERROR)
2558 xml_failure(g_parser);
2559 if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2560 fail("Resumed invalid parse not faulted");
2561 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2562 fail("Invalid parse not correctly faulted");
2563 }
2564 END_TEST
2565
2566 /* Test that re-suspended parses are correctly passed through */
START_TEST(test_resume_resuspended)2567 START_TEST(test_resume_resuspended) {
2568 const char *text = "<doc>Hello<meep/>world</doc>";
2569
2570 g_resumable = XML_TRUE;
2571 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2572 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2573 == XML_STATUS_ERROR)
2574 xml_failure(g_parser);
2575 g_resumable = XML_TRUE;
2576 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2577 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2578 fail("Resumption not suspended");
2579 /* This one should succeed and finish up */
2580 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2581 xml_failure(g_parser);
2582 }
2583 END_TEST
2584
2585 /* Test that CDATA shows up correctly through a default handler */
START_TEST(test_cdata_default)2586 START_TEST(test_cdata_default) {
2587 const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2588 const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2589 CharData storage;
2590
2591 CharData_Init(&storage);
2592 XML_SetUserData(g_parser, &storage);
2593 XML_SetDefaultHandler(g_parser, accumulate_characters);
2594
2595 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2596 == XML_STATUS_ERROR)
2597 xml_failure(g_parser);
2598 CharData_CheckXMLChars(&storage, expected);
2599 }
2600 END_TEST
2601
2602 /* Test resetting a subordinate parser does exactly nothing */
START_TEST(test_subordinate_reset)2603 START_TEST(test_subordinate_reset) {
2604 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2605 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2606 "<doc>&entity;</doc>";
2607
2608 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2609 XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2610 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2611 == XML_STATUS_ERROR)
2612 xml_failure(g_parser);
2613 }
2614 END_TEST
2615
2616 /* Test suspending a subordinate parser */
START_TEST(test_subordinate_suspend)2617 START_TEST(test_subordinate_suspend) {
2618 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2619 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2620 "<doc>&entity;</doc>";
2621
2622 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2623 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2624 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2625 == XML_STATUS_ERROR)
2626 xml_failure(g_parser);
2627 }
2628 END_TEST
2629
2630 /* Test suspending a subordinate parser from an XML declaration */
2631 /* Increases code coverage of the tests */
2632
START_TEST(test_subordinate_xdecl_suspend)2633 START_TEST(test_subordinate_xdecl_suspend) {
2634 const char *text
2635 = "<!DOCTYPE doc [\n"
2636 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2637 "]>\n"
2638 "<doc>&entity;</doc>";
2639
2640 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2641 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2642 g_resumable = XML_TRUE;
2643 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2644 == XML_STATUS_ERROR)
2645 xml_failure(g_parser);
2646 }
2647 END_TEST
2648
START_TEST(test_subordinate_xdecl_abort)2649 START_TEST(test_subordinate_xdecl_abort) {
2650 const char *text
2651 = "<!DOCTYPE doc [\n"
2652 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2653 "]>\n"
2654 "<doc>&entity;</doc>";
2655
2656 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2657 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2658 g_resumable = XML_FALSE;
2659 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2660 == XML_STATUS_ERROR)
2661 xml_failure(g_parser);
2662 }
2663 END_TEST
2664
2665 /* Test external entity fault handling with suspension */
START_TEST(test_ext_entity_invalid_suspended_parse)2666 START_TEST(test_ext_entity_invalid_suspended_parse) {
2667 const char *text = "<!DOCTYPE doc [\n"
2668 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2669 "]>\n"
2670 "<doc>&en;</doc>";
2671 ExtFaults faults[]
2672 = {{"<?xml version='1.0' encoding='us-ascii'?><",
2673 "Incomplete element declaration not faulted", NULL,
2674 XML_ERROR_UNCLOSED_TOKEN},
2675 {/* First two bytes of a three-byte char */
2676 "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2677 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2678 {NULL, NULL, NULL, XML_ERROR_NONE}};
2679 ExtFaults *fault;
2680
2681 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2682 set_subtest("%s", fault->parse_text);
2683 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2684 XML_SetExternalEntityRefHandler(g_parser,
2685 external_entity_suspending_faulter);
2686 XML_SetUserData(g_parser, fault);
2687 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2688 "Parser did not report external entity error");
2689 XML_ParserReset(g_parser, NULL);
2690 }
2691 }
2692 END_TEST
2693
2694 /* Test setting an explicit encoding */
START_TEST(test_explicit_encoding)2695 START_TEST(test_explicit_encoding) {
2696 const char *text1 = "<doc>Hello ";
2697 const char *text2 = " World</doc>";
2698
2699 /* Just check that we can set the encoding to NULL before starting */
2700 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2701 fail("Failed to initialise encoding to NULL");
2702 /* Say we are UTF-8 */
2703 if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2704 fail("Failed to set explicit encoding");
2705 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2706 == XML_STATUS_ERROR)
2707 xml_failure(g_parser);
2708 /* Try to switch encodings mid-parse */
2709 if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2710 fail("Allowed encoding change");
2711 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2712 == XML_STATUS_ERROR)
2713 xml_failure(g_parser);
2714 /* Try now the parse is over */
2715 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2716 fail("Failed to unset encoding");
2717 }
2718 END_TEST
2719
2720 /* Test handling of trailing CR (rather than newline) */
START_TEST(test_trailing_cr)2721 START_TEST(test_trailing_cr) {
2722 const char *text = "<doc>\r";
2723 int found_cr;
2724
2725 /* Try with a character handler, for code coverage */
2726 XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2727 XML_SetUserData(g_parser, &found_cr);
2728 found_cr = 0;
2729 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2730 == XML_STATUS_OK)
2731 fail("Failed to fault unclosed doc");
2732 if (found_cr == 0)
2733 fail("Did not catch the carriage return");
2734 XML_ParserReset(g_parser, NULL);
2735
2736 /* Now with a default handler instead */
2737 XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2738 XML_SetUserData(g_parser, &found_cr);
2739 found_cr = 0;
2740 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2741 == XML_STATUS_OK)
2742 fail("Failed to fault unclosed doc");
2743 if (found_cr == 0)
2744 fail("Did not catch default carriage return");
2745 }
2746 END_TEST
2747
2748 /* Test trailing CR in an external entity parse */
START_TEST(test_ext_entity_trailing_cr)2749 START_TEST(test_ext_entity_trailing_cr) {
2750 const char *text = "<!DOCTYPE doc [\n"
2751 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2752 "]>\n"
2753 "<doc>&en;</doc>";
2754 int found_cr;
2755
2756 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2757 XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2758 XML_SetUserData(g_parser, &found_cr);
2759 found_cr = 0;
2760 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2761 != XML_STATUS_OK)
2762 xml_failure(g_parser);
2763 if (found_cr == 0)
2764 fail("No carriage return found");
2765 XML_ParserReset(g_parser, NULL);
2766
2767 /* Try again with a different trailing CR */
2768 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2769 XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2770 XML_SetUserData(g_parser, &found_cr);
2771 found_cr = 0;
2772 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2773 != XML_STATUS_OK)
2774 xml_failure(g_parser);
2775 if (found_cr == 0)
2776 fail("No carriage return found");
2777 }
2778 END_TEST
2779
2780 /* Test handling of trailing square bracket */
START_TEST(test_trailing_rsqb)2781 START_TEST(test_trailing_rsqb) {
2782 const char *text8 = "<doc>]";
2783 const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2784 int found_rsqb;
2785 int text8_len = (int)strlen(text8);
2786
2787 XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2788 XML_SetUserData(g_parser, &found_rsqb);
2789 found_rsqb = 0;
2790 if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2791 == XML_STATUS_OK)
2792 fail("Failed to fault unclosed doc");
2793 if (found_rsqb == 0)
2794 fail("Did not catch the right square bracket");
2795
2796 /* Try again with a different encoding */
2797 XML_ParserReset(g_parser, NULL);
2798 XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2799 XML_SetUserData(g_parser, &found_rsqb);
2800 found_rsqb = 0;
2801 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2802 XML_TRUE)
2803 == XML_STATUS_OK)
2804 fail("Failed to fault unclosed doc");
2805 if (found_rsqb == 0)
2806 fail("Did not catch the right square bracket");
2807
2808 /* And finally with a default handler */
2809 XML_ParserReset(g_parser, NULL);
2810 XML_SetDefaultHandler(g_parser, rsqb_handler);
2811 XML_SetUserData(g_parser, &found_rsqb);
2812 found_rsqb = 0;
2813 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2814 XML_TRUE)
2815 == XML_STATUS_OK)
2816 fail("Failed to fault unclosed doc");
2817 if (found_rsqb == 0)
2818 fail("Did not catch the right square bracket");
2819 }
2820 END_TEST
2821
2822 /* Test trailing right square bracket in an external entity parse */
START_TEST(test_ext_entity_trailing_rsqb)2823 START_TEST(test_ext_entity_trailing_rsqb) {
2824 const char *text = "<!DOCTYPE doc [\n"
2825 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2826 "]>\n"
2827 "<doc>&en;</doc>";
2828 int found_rsqb;
2829
2830 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2831 XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2832 XML_SetUserData(g_parser, &found_rsqb);
2833 found_rsqb = 0;
2834 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2835 != XML_STATUS_OK)
2836 xml_failure(g_parser);
2837 if (found_rsqb == 0)
2838 fail("No right square bracket found");
2839 }
2840 END_TEST
2841
2842 /* Test CDATA handling in an external entity */
START_TEST(test_ext_entity_good_cdata)2843 START_TEST(test_ext_entity_good_cdata) {
2844 const char *text = "<!DOCTYPE doc [\n"
2845 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2846 "]>\n"
2847 "<doc>&en;</doc>";
2848
2849 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2850 XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2851 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2852 != XML_STATUS_OK)
2853 xml_failure(g_parser);
2854 }
2855 END_TEST
2856
2857 /* Test user parameter settings */
START_TEST(test_user_parameters)2858 START_TEST(test_user_parameters) {
2859 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2860 "<!-- Primary parse -->\n"
2861 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2862 "<doc>&entity;";
2863 const char *epilog = "<!-- Back to primary parser -->\n"
2864 "</doc>";
2865
2866 g_comment_count = 0;
2867 g_skip_count = 0;
2868 g_xdecl_count = 0;
2869 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2870 XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2871 XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2872 XML_SetCommentHandler(g_parser, data_check_comment_handler);
2873 XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2874 XML_UseParserAsHandlerArg(g_parser);
2875 XML_SetUserData(g_parser, (void *)1);
2876 g_handler_data = g_parser;
2877 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2878 == XML_STATUS_ERROR)
2879 xml_failure(g_parser);
2880 /* Ensure we can't change policy mid-parse */
2881 if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2882 fail("Changed param entity parsing policy while parsing");
2883 if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2884 == XML_STATUS_ERROR)
2885 xml_failure(g_parser);
2886 if (g_comment_count != 3)
2887 fail("Comment handler not invoked enough times");
2888 if (g_skip_count != 1)
2889 fail("Skip handler not invoked enough times");
2890 if (g_xdecl_count != 1)
2891 fail("XML declaration handler not invoked");
2892 }
2893 END_TEST
2894
2895 /* Test that an explicit external entity handler argument replaces
2896 * the parser as the first argument.
2897 *
2898 * We do not call the first parameter to the external entity handler
2899 * 'parser' for once, since the first time the handler is called it
2900 * will actually be a text string. We need to be able to access the
2901 * global 'parser' variable to create our external entity parser from,
2902 * since there are code paths we need to ensure get executed.
2903 */
START_TEST(test_ext_entity_ref_parameter)2904 START_TEST(test_ext_entity_ref_parameter) {
2905 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2906 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2907 "<doc>&entity;</doc>";
2908
2909 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2910 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2911 /* Set a handler arg that is not NULL and not parser (which is
2912 * what NULL would cause to be passed.
2913 */
2914 XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2915 g_handler_data = text;
2916 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2917 == XML_STATUS_ERROR)
2918 xml_failure(g_parser);
2919
2920 /* Now try again with unset args */
2921 XML_ParserReset(g_parser, NULL);
2922 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2923 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2924 XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2925 g_handler_data = g_parser;
2926 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2927 == XML_STATUS_ERROR)
2928 xml_failure(g_parser);
2929 }
2930 END_TEST
2931
2932 /* Test the parsing of an empty string */
START_TEST(test_empty_parse)2933 START_TEST(test_empty_parse) {
2934 const char *text = "<doc></doc>";
2935 const char *partial = "<doc>";
2936
2937 if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2938 fail("Parsing empty string faulted");
2939 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2940 fail("Parsing final empty string not faulted");
2941 if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2942 fail("Parsing final empty string faulted for wrong reason");
2943
2944 /* Now try with valid text before the empty end */
2945 XML_ParserReset(g_parser, NULL);
2946 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2947 == XML_STATUS_ERROR)
2948 xml_failure(g_parser);
2949 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2950 fail("Parsing final empty string faulted");
2951
2952 /* Now try with invalid text before the empty end */
2953 XML_ParserReset(g_parser, NULL);
2954 if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2955 XML_FALSE)
2956 == XML_STATUS_ERROR)
2957 xml_failure(g_parser);
2958 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2959 fail("Parsing final incomplete empty string not faulted");
2960 }
2961 END_TEST
2962
2963 /* Test XML_Parse for len < 0 */
START_TEST(test_negative_len_parse)2964 START_TEST(test_negative_len_parse) {
2965 const char *const doc = "<root/>";
2966 for (int isFinal = 0; isFinal < 2; isFinal++) {
2967 set_subtest("isFinal=%d", isFinal);
2968
2969 XML_Parser parser = XML_ParserCreate(NULL);
2970
2971 if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2972 fail("There was not supposed to be any initial parse error.");
2973
2974 const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
2975
2976 if (status != XML_STATUS_ERROR)
2977 fail("Negative len was expected to fail the parse but did not.");
2978
2979 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2980 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2981
2982 XML_ParserFree(parser);
2983 }
2984 }
2985 END_TEST
2986
2987 /* Test XML_ParseBuffer for len < 0 */
START_TEST(test_negative_len_parse_buffer)2988 START_TEST(test_negative_len_parse_buffer) {
2989 const char *const doc = "<root/>";
2990 for (int isFinal = 0; isFinal < 2; isFinal++) {
2991 set_subtest("isFinal=%d", isFinal);
2992
2993 XML_Parser parser = XML_ParserCreate(NULL);
2994
2995 if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2996 fail("There was not supposed to be any initial parse error.");
2997
2998 void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
2999
3000 if (buffer == NULL)
3001 fail("XML_GetBuffer failed.");
3002
3003 memcpy(buffer, doc, strlen(doc));
3004
3005 const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
3006
3007 if (status != XML_STATUS_ERROR)
3008 fail("Negative len was expected to fail the parse but did not.");
3009
3010 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
3011 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
3012
3013 XML_ParserFree(parser);
3014 }
3015 }
3016 END_TEST
3017
3018 /* Test odd corners of the XML_GetBuffer interface */
3019 static enum XML_Status
get_feature(enum XML_FeatureEnum feature_id,long * presult)3020 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
3021 const XML_Feature *feature = XML_GetFeatureList();
3022
3023 if (feature == NULL)
3024 return XML_STATUS_ERROR;
3025 for (; feature->feature != XML_FEATURE_END; feature++) {
3026 if (feature->feature == feature_id) {
3027 *presult = feature->value;
3028 return XML_STATUS_OK;
3029 }
3030 }
3031 return XML_STATUS_ERROR;
3032 }
3033
3034 /* Test odd corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_1)3035 START_TEST(test_get_buffer_1) {
3036 const char *text = get_buffer_test_text;
3037 void *buffer;
3038 long context_bytes;
3039
3040 /* Attempt to allocate a negative length buffer */
3041 if (XML_GetBuffer(g_parser, -12) != NULL)
3042 fail("Negative length buffer not failed");
3043
3044 /* Now get a small buffer and extend it past valid length */
3045 buffer = XML_GetBuffer(g_parser, 1536);
3046 if (buffer == NULL)
3047 fail("1.5K buffer failed");
3048 assert(buffer != NULL);
3049 memcpy(buffer, text, strlen(text));
3050 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
3051 == XML_STATUS_ERROR)
3052 xml_failure(g_parser);
3053 if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
3054 fail("INT_MAX buffer not failed");
3055
3056 /* Now try extending it a more reasonable but still too large
3057 * amount. The allocator in XML_GetBuffer() doubles the buffer
3058 * size until it exceeds the requested amount or INT_MAX. If it
3059 * exceeds INT_MAX, it rejects the request, so we want a request
3060 * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable,
3061 * with an extra byte just to ensure that the request is off any
3062 * boundary. The request will be inflated internally by
3063 * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
3064 * request.
3065 */
3066 if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
3067 context_bytes = 0;
3068 if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
3069 fail("INT_MAX- buffer not failed");
3070
3071 /* Now try extending it a carefully crafted amount */
3072 if (XML_GetBuffer(g_parser, 1000) == NULL)
3073 fail("1000 buffer failed");
3074 }
3075 END_TEST
3076
3077 /* Test more corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_2)3078 START_TEST(test_get_buffer_2) {
3079 const char *text = get_buffer_test_text;
3080 void *buffer;
3081
3082 /* Now get a decent buffer */
3083 buffer = XML_GetBuffer(g_parser, 1536);
3084 if (buffer == NULL)
3085 fail("1.5K buffer failed");
3086 assert(buffer != NULL);
3087 memcpy(buffer, text, strlen(text));
3088 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
3089 == XML_STATUS_ERROR)
3090 xml_failure(g_parser);
3091
3092 /* Extend it, to catch a different code path */
3093 if (XML_GetBuffer(g_parser, 1024) == NULL)
3094 fail("1024 buffer failed");
3095 }
3096 END_TEST
3097
3098 /* Test for signed integer overflow CVE-2022-23852 */
3099 #if XML_CONTEXT_BYTES > 0
START_TEST(test_get_buffer_3_overflow)3100 START_TEST(test_get_buffer_3_overflow) {
3101 XML_Parser parser = XML_ParserCreate(NULL);
3102 assert(parser != NULL);
3103
3104 const char *const text = "\n";
3105 const int expectedKeepValue = (int)strlen(text);
3106
3107 // After this call, variable "keep" in XML_GetBuffer will
3108 // have value expectedKeepValue
3109 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
3110 XML_FALSE /* isFinal */)
3111 == XML_STATUS_ERROR)
3112 xml_failure(parser);
3113
3114 assert(expectedKeepValue > 0);
3115 if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
3116 fail("enlarging buffer not failed");
3117
3118 XML_ParserFree(parser);
3119 }
3120 END_TEST
3121 #endif // XML_CONTEXT_BYTES > 0
3122
START_TEST(test_buffer_can_grow_to_max)3123 START_TEST(test_buffer_can_grow_to_max) {
3124 const char *const prefixes[] = {
3125 "",
3126 "<",
3127 "<x a='",
3128 "<doc><x a='",
3129 "<document><x a='",
3130 "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
3131 "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
3132 "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
3133 "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
3134 "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
3135 const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
3136 int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
3137 #if defined(__MINGW32__) && ! defined(__MINGW64__)
3138 // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
3139 // Can we make a big allocation?
3140 for (int i = 1; i <= 2; i++) {
3141 void *const big = malloc(maxbuf);
3142 if (big != NULL) {
3143 free(big);
3144 break;
3145 }
3146 // The big allocation failed. Let's be a little lenient.
3147 maxbuf = maxbuf / 2;
3148 fprintf(stderr, "Reducing maxbuf to %d...\n", maxbuf);
3149 }
3150 #endif
3151
3152 for (int i = 0; i < num_prefixes; ++i) {
3153 set_subtest("\"%s\"", prefixes[i]);
3154 XML_Parser parser = XML_ParserCreate(NULL);
3155 #if XML_GE == 1
3156 assert_true(XML_SetAllocTrackerActivationThreshold(parser, (size_t)-1)
3157 == XML_TRUE); // i.e. deactivate
3158 #endif
3159 const int prefix_len = (int)strlen(prefixes[i]);
3160 const enum XML_Status s
3161 = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
3162 if (s != XML_STATUS_OK)
3163 xml_failure(parser);
3164
3165 // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
3166 // subtracting the whole prefix is easiest, and close enough.
3167 assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
3168 // The limit should be consistent; no prefix should allow us to
3169 // reach above the max buffer size.
3170 assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
3171 XML_ParserFree(parser);
3172 }
3173 }
3174 END_TEST
3175
START_TEST(test_getbuffer_allocates_on_zero_len)3176 START_TEST(test_getbuffer_allocates_on_zero_len) {
3177 for (int first_len = 1; first_len >= 0; first_len--) {
3178 set_subtest("with len=%d first", first_len);
3179 XML_Parser parser = XML_ParserCreate(NULL);
3180 assert_true(parser != NULL);
3181 assert_true(XML_GetBuffer(parser, first_len) != NULL);
3182 assert_true(XML_GetBuffer(parser, 0) != NULL);
3183 if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
3184 xml_failure(parser);
3185 XML_ParserFree(parser);
3186 }
3187 }
3188 END_TEST
3189
3190 /* Test position information macros */
START_TEST(test_byte_info_at_end)3191 START_TEST(test_byte_info_at_end) {
3192 const char *text = "<doc></doc>";
3193
3194 if (XML_GetCurrentByteIndex(g_parser) != -1
3195 || XML_GetCurrentByteCount(g_parser) != 0)
3196 fail("Byte index/count incorrect at start of parse");
3197 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3198 == XML_STATUS_ERROR)
3199 xml_failure(g_parser);
3200 /* At end, the count will be zero and the index the end of string */
3201 if (XML_GetCurrentByteCount(g_parser) != 0)
3202 fail("Terminal byte count incorrect");
3203 if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
3204 fail("Terminal byte index incorrect");
3205 }
3206 END_TEST
3207
3208 /* Test position information from errors */
3209 #define PRE_ERROR_STR "<doc></"
3210 #define POST_ERROR_STR "wombat></doc>"
START_TEST(test_byte_info_at_error)3211 START_TEST(test_byte_info_at_error) {
3212 const char *text = PRE_ERROR_STR POST_ERROR_STR;
3213
3214 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3215 == XML_STATUS_OK)
3216 fail("Syntax error not faulted");
3217 if (XML_GetCurrentByteCount(g_parser) != 0)
3218 fail("Error byte count incorrect");
3219 if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3220 fail("Error byte index incorrect");
3221 }
3222 END_TEST
3223 #undef PRE_ERROR_STR
3224 #undef POST_ERROR_STR
3225
3226 /* Test position information in handler */
3227 #define START_ELEMENT "<e>"
3228 #define CDATA_TEXT "Hello"
3229 #define END_ELEMENT "</e>"
START_TEST(test_byte_info_at_cdata)3230 START_TEST(test_byte_info_at_cdata) {
3231 const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3232 int offset, size;
3233 ByteTestData data;
3234
3235 /* Check initial context is empty */
3236 if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3237 fail("Unexpected context at start of parse");
3238
3239 data.start_element_len = (int)strlen(START_ELEMENT);
3240 data.cdata_len = (int)strlen(CDATA_TEXT);
3241 data.total_string_len = (int)strlen(text);
3242 XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3243 XML_SetUserData(g_parser, &data);
3244 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3245 xml_failure(g_parser);
3246 }
3247 END_TEST
3248 #undef START_ELEMENT
3249 #undef CDATA_TEXT
3250 #undef END_ELEMENT
3251
3252 /* Test predefined entities are correctly recognised */
START_TEST(test_predefined_entities)3253 START_TEST(test_predefined_entities) {
3254 const char *text = "<doc><>&"'</doc>";
3255 const XML_Char *expected = XCS("<doc><>&"'</doc>");
3256 const XML_Char *result = XCS("<>&\"'");
3257 CharData storage;
3258
3259 XML_SetDefaultHandler(g_parser, accumulate_characters);
3260 /* run_character_check uses XML_SetCharacterDataHandler(), which
3261 * unfortunately heads off a code path that we need to exercise.
3262 */
3263 CharData_Init(&storage);
3264 XML_SetUserData(g_parser, &storage);
3265 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3266 == XML_STATUS_ERROR)
3267 xml_failure(g_parser);
3268 /* The default handler doesn't translate the entities */
3269 CharData_CheckXMLChars(&storage, expected);
3270
3271 /* Now try again and check the translation */
3272 XML_ParserReset(g_parser, NULL);
3273 run_character_check(text, result);
3274 }
3275 END_TEST
3276
3277 /* Regression test that an invalid tag in an external parameter
3278 * reference in an external DTD is correctly faulted.
3279 *
3280 * Only a few specific tags are legal in DTDs ignoring comments and
3281 * processing instructions, all of which begin with an exclamation
3282 * mark. "<el/>" is not one of them, so the parser should raise an
3283 * error on encountering it.
3284 */
START_TEST(test_invalid_tag_in_dtd)3285 START_TEST(test_invalid_tag_in_dtd) {
3286 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3287 "<doc></doc>\n";
3288
3289 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3290 XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3291 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3292 "Invalid tag IN DTD external param not rejected");
3293 }
3294 END_TEST
3295
3296 /* Test entities not quite the predefined ones are not mis-recognised */
START_TEST(test_not_predefined_entities)3297 START_TEST(test_not_predefined_entities) {
3298 const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3299 "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3300 int i = 0;
3301
3302 while (text[i] != NULL) {
3303 expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3304 "Undefined entity not rejected");
3305 XML_ParserReset(g_parser, NULL);
3306 i++;
3307 }
3308 }
3309 END_TEST
3310
3311 /* Test conditional inclusion (IGNORE) */
START_TEST(test_ignore_section)3312 START_TEST(test_ignore_section) {
3313 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3314 "<doc><e>&entity;</e></doc>";
3315 const XML_Char *expected
3316 = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3317 CharData storage;
3318
3319 CharData_Init(&storage);
3320 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3321 XML_SetUserData(g_parser, &storage);
3322 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3323 XML_SetDefaultHandler(g_parser, accumulate_characters);
3324 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3325 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3326 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3327 XML_SetStartElementHandler(g_parser, dummy_start_element);
3328 XML_SetEndElementHandler(g_parser, dummy_end_element);
3329 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3330 == XML_STATUS_ERROR)
3331 xml_failure(g_parser);
3332 CharData_CheckXMLChars(&storage, expected);
3333 }
3334 END_TEST
3335
START_TEST(test_ignore_section_utf16)3336 START_TEST(test_ignore_section_utf16) {
3337 const char text[] =
3338 /* <!DOCTYPE d SYSTEM 's'> */
3339 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3340 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3341 /* <d><e>&en;</e></d> */
3342 "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3343 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3344 CharData storage;
3345
3346 CharData_Init(&storage);
3347 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3348 XML_SetUserData(g_parser, &storage);
3349 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3350 XML_SetDefaultHandler(g_parser, accumulate_characters);
3351 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3352 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3353 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3354 XML_SetStartElementHandler(g_parser, dummy_start_element);
3355 XML_SetEndElementHandler(g_parser, dummy_end_element);
3356 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3357 == XML_STATUS_ERROR)
3358 xml_failure(g_parser);
3359 CharData_CheckXMLChars(&storage, expected);
3360 }
3361 END_TEST
3362
START_TEST(test_ignore_section_utf16_be)3363 START_TEST(test_ignore_section_utf16_be) {
3364 const char text[] =
3365 /* <!DOCTYPE d SYSTEM 's'> */
3366 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3367 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3368 /* <d><e>&en;</e></d> */
3369 "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3370 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3371 CharData storage;
3372
3373 CharData_Init(&storage);
3374 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3375 XML_SetUserData(g_parser, &storage);
3376 XML_SetExternalEntityRefHandler(g_parser,
3377 external_entity_load_ignore_utf16_be);
3378 XML_SetDefaultHandler(g_parser, accumulate_characters);
3379 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3380 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3381 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3382 XML_SetStartElementHandler(g_parser, dummy_start_element);
3383 XML_SetEndElementHandler(g_parser, dummy_end_element);
3384 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3385 == XML_STATUS_ERROR)
3386 xml_failure(g_parser);
3387 CharData_CheckXMLChars(&storage, expected);
3388 }
3389 END_TEST
3390
3391 /* Test mis-formatted conditional exclusion */
START_TEST(test_bad_ignore_section)3392 START_TEST(test_bad_ignore_section) {
3393 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3394 "<doc><e>&entity;</e></doc>";
3395 ExtFaults faults[]
3396 = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3397 XML_ERROR_SYNTAX},
3398 {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3399 XML_ERROR_INVALID_TOKEN},
3400 {/* FIrst two bytes of a three-byte char */
3401 "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3402 XML_ERROR_PARTIAL_CHAR},
3403 {NULL, NULL, NULL, XML_ERROR_NONE}};
3404 ExtFaults *fault;
3405
3406 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3407 set_subtest("%s", fault->parse_text);
3408 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3409 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3410 XML_SetUserData(g_parser, fault);
3411 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3412 "Incomplete IGNORE section not failed");
3413 XML_ParserReset(g_parser, NULL);
3414 }
3415 }
3416 END_TEST
3417
3418 struct bom_testdata {
3419 const char *external;
3420 int split;
3421 XML_Bool nested_callback_happened;
3422 };
3423
3424 static int XMLCALL
external_bom_checker(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)3425 external_bom_checker(XML_Parser parser, const XML_Char *context,
3426 const XML_Char *base, const XML_Char *systemId,
3427 const XML_Char *publicId) {
3428 const char *text;
3429 UNUSED_P(base);
3430 UNUSED_P(systemId);
3431 UNUSED_P(publicId);
3432
3433 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3434 if (ext_parser == NULL)
3435 fail("Could not create external entity parser");
3436
3437 if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3438 struct bom_testdata *const testdata
3439 = (struct bom_testdata *)XML_GetUserData(parser);
3440 const char *const external = testdata->external;
3441 const int split = testdata->split;
3442 testdata->nested_callback_happened = XML_TRUE;
3443
3444 if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3445 != XML_STATUS_OK) {
3446 xml_failure(ext_parser);
3447 }
3448 text = external + split; // the parse below will continue where we left off.
3449 } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3450 text = "<!ELEMENT doc EMPTY>\n"
3451 "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3452 "<!ENTITY % e2 '%e1;'>\n";
3453 } else {
3454 fail("unknown systemId");
3455 }
3456
3457 if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3458 != XML_STATUS_OK)
3459 xml_failure(ext_parser);
3460
3461 XML_ParserFree(ext_parser);
3462 return XML_STATUS_OK;
3463 }
3464
3465 /* regression test: BOM should be consumed when followed by a partial token. */
START_TEST(test_external_bom_consumed)3466 START_TEST(test_external_bom_consumed) {
3467 const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3468 "<doc></doc>\n";
3469 const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3470 const int len = (int)strlen(external);
3471 for (int split = 0; split <= len; ++split) {
3472 set_subtest("split at byte %d", split);
3473
3474 struct bom_testdata testdata;
3475 testdata.external = external;
3476 testdata.split = split;
3477 testdata.nested_callback_happened = XML_FALSE;
3478
3479 XML_Parser parser = XML_ParserCreate(NULL);
3480 if (parser == NULL) {
3481 fail("Couldn't create parser");
3482 }
3483 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3484 XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3485 XML_SetUserData(parser, &testdata);
3486 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3487 == XML_STATUS_ERROR)
3488 xml_failure(parser);
3489 if (! testdata.nested_callback_happened) {
3490 fail("ref handler not called");
3491 }
3492 XML_ParserFree(parser);
3493 }
3494 }
3495 END_TEST
3496
3497 /* Test recursive parsing */
START_TEST(test_external_entity_values)3498 START_TEST(test_external_entity_values) {
3499 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3500 "<doc></doc>\n";
3501 ExtFaults data_004_2[] = {
3502 {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3503 {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3504 XML_ERROR_INVALID_TOKEN},
3505 {"'wombat", "Unterminated string not faulted", NULL,
3506 XML_ERROR_UNCLOSED_TOKEN},
3507 {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3508 XML_ERROR_PARTIAL_CHAR},
3509 {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3510 {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3511 XML_ERROR_XML_DECL},
3512 {/* UTF-8 BOM */
3513 "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3514 XML_ERROR_NONE},
3515 {"<?xml version='1.0' encoding='utf-8'?>\n$",
3516 "Invalid token after text declaration not faulted", NULL,
3517 XML_ERROR_INVALID_TOKEN},
3518 {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3519 "Unterminated string after text decl not faulted", NULL,
3520 XML_ERROR_UNCLOSED_TOKEN},
3521 {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3522 "Partial UTF-8 character after text decl not faulted", NULL,
3523 XML_ERROR_PARTIAL_CHAR},
3524 {"%e1;", "Recursive parameter entity not faulted", NULL,
3525 XML_ERROR_RECURSIVE_ENTITY_REF},
3526 {NULL, NULL, NULL, XML_ERROR_NONE}};
3527 int i;
3528
3529 for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3530 set_subtest("%s", data_004_2[i].parse_text);
3531 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3532 XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3533 XML_SetUserData(g_parser, &data_004_2[i]);
3534 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3535 == XML_STATUS_ERROR)
3536 xml_failure(g_parser);
3537 XML_ParserReset(g_parser, NULL);
3538 }
3539 }
3540 END_TEST
3541
3542 /* Test the recursive parse interacts with a not standalone handler */
START_TEST(test_ext_entity_not_standalone)3543 START_TEST(test_ext_entity_not_standalone) {
3544 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3545 "<doc></doc>";
3546
3547 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3548 XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3549 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3550 "Standalone rejection not caught");
3551 }
3552 END_TEST
3553
START_TEST(test_ext_entity_value_abort)3554 START_TEST(test_ext_entity_value_abort) {
3555 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3556 "<doc></doc>\n";
3557
3558 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3559 XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3560 g_resumable = XML_FALSE;
3561 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3562 == XML_STATUS_ERROR)
3563 xml_failure(g_parser);
3564 }
3565 END_TEST
3566
START_TEST(test_bad_public_doctype)3567 START_TEST(test_bad_public_doctype) {
3568 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3569 "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3570 "<doc></doc>";
3571
3572 /* Setting a handler provokes a particular code path */
3573 XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3574 dummy_end_doctype_handler);
3575 expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3576 }
3577 END_TEST
3578
3579 /* Test based on ibm/valid/P32/ibm32v04.xml */
START_TEST(test_attribute_enum_value)3580 START_TEST(test_attribute_enum_value) {
3581 const char *text = "<?xml version='1.0' standalone='no'?>\n"
3582 "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3583 "<animal>This is a \n <a/> \n\nyellow tiger</animal>";
3584 ExtTest dtd_data
3585 = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3586 "<!ELEMENT a EMPTY>\n"
3587 "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3588 NULL, NULL};
3589 const XML_Char *expected = XCS("This is a \n \n\nyellow tiger");
3590
3591 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3592 XML_SetUserData(g_parser, &dtd_data);
3593 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3594 /* An attribute list handler provokes a different code path */
3595 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3596 run_ext_character_check(text, &dtd_data, expected);
3597 }
3598 END_TEST
3599
3600 /* Slightly bizarrely, the library seems to silently ignore entity
3601 * definitions for predefined entities, even when they are wrong. The
3602 * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3603 * to happen, so this is currently treated as acceptable.
3604 */
START_TEST(test_predefined_entity_redefinition)3605 START_TEST(test_predefined_entity_redefinition) {
3606 const char *text = "<!DOCTYPE doc [\n"
3607 "<!ENTITY apos 'foo'>\n"
3608 "]>\n"
3609 "<doc>'</doc>";
3610 run_character_check(text, XCS("'"));
3611 }
3612 END_TEST
3613
3614 /* Test that the parser stops processing the DTD after an unresolved
3615 * parameter entity is encountered.
3616 */
START_TEST(test_dtd_stop_processing)3617 START_TEST(test_dtd_stop_processing) {
3618 const char *text = "<!DOCTYPE doc [\n"
3619 "%foo;\n"
3620 "<!ENTITY bar 'bas'>\n"
3621 "]><doc/>";
3622
3623 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3624 init_dummy_handlers();
3625 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3626 == XML_STATUS_ERROR)
3627 xml_failure(g_parser);
3628 if (get_dummy_handler_flags() != 0)
3629 fail("DTD processing still going after undefined PE");
3630 }
3631 END_TEST
3632
3633 /* Test public notations with no system ID */
START_TEST(test_public_notation_no_sysid)3634 START_TEST(test_public_notation_no_sysid) {
3635 const char *text = "<!DOCTYPE doc [\n"
3636 "<!NOTATION note PUBLIC 'foo'>\n"
3637 "<!ELEMENT doc EMPTY>\n"
3638 "]>\n<doc/>";
3639
3640 init_dummy_handlers();
3641 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3642 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3643 == XML_STATUS_ERROR)
3644 xml_failure(g_parser);
3645 if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3646 fail("Notation declaration handler not called");
3647 }
3648 END_TEST
3649
START_TEST(test_nested_groups)3650 START_TEST(test_nested_groups) {
3651 const char *text
3652 = "<!DOCTYPE doc [\n"
3653 "<!ELEMENT doc "
3654 /* Sixteen elements per line */
3655 "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3656 "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3657 "))))))))))))))))))))))))))))))))>\n"
3658 "<!ELEMENT e EMPTY>"
3659 "]>\n"
3660 "<doc><e/></doc>";
3661 CharData storage;
3662
3663 CharData_Init(&storage);
3664 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3665 XML_SetStartElementHandler(g_parser, record_element_start_handler);
3666 XML_SetUserData(g_parser, &storage);
3667 init_dummy_handlers();
3668 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3669 == XML_STATUS_ERROR)
3670 xml_failure(g_parser);
3671 CharData_CheckXMLChars(&storage, XCS("doce"));
3672 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3673 fail("Element handler not fired");
3674 }
3675 END_TEST
3676
START_TEST(test_group_choice)3677 START_TEST(test_group_choice) {
3678 const char *text = "<!DOCTYPE doc [\n"
3679 "<!ELEMENT doc (a|b|c)+>\n"
3680 "<!ELEMENT a EMPTY>\n"
3681 "<!ELEMENT b (#PCDATA)>\n"
3682 "<!ELEMENT c ANY>\n"
3683 "]>\n"
3684 "<doc>\n"
3685 "<a/>\n"
3686 "<b attr='foo'>This is a foo</b>\n"
3687 "<c></c>\n"
3688 "</doc>\n";
3689
3690 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3691 init_dummy_handlers();
3692 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3693 == XML_STATUS_ERROR)
3694 xml_failure(g_parser);
3695 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3696 fail("Element handler flag not raised");
3697 }
3698 END_TEST
3699
START_TEST(test_standalone_parameter_entity)3700 START_TEST(test_standalone_parameter_entity) {
3701 const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3702 "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3703 "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3704 "%entity;\n"
3705 "]>\n"
3706 "<doc></doc>";
3707 char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3708
3709 XML_SetUserData(g_parser, dtd_data);
3710 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3711 XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3712 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3713 == XML_STATUS_ERROR)
3714 xml_failure(g_parser);
3715 }
3716 END_TEST
3717
3718 /* Test skipping of parameter entity in an external DTD */
3719 /* Derived from ibm/invalid/P69/ibm69i01.xml */
START_TEST(test_skipped_parameter_entity)3720 START_TEST(test_skipped_parameter_entity) {
3721 const char *text = "<?xml version='1.0'?>\n"
3722 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3723 "<!ELEMENT root (#PCDATA|a)* >\n"
3724 "]>\n"
3725 "<root></root>";
3726 ExtTest dtd_data = {"%pe2;", NULL, NULL};
3727
3728 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3729 XML_SetUserData(g_parser, &dtd_data);
3730 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3731 XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3732 init_dummy_handlers();
3733 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3734 == XML_STATUS_ERROR)
3735 xml_failure(g_parser);
3736 if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3737 fail("Skip handler not executed");
3738 }
3739 END_TEST
3740
3741 /* Test recursive parameter entity definition rejected in external DTD */
START_TEST(test_recursive_external_parameter_entity)3742 START_TEST(test_recursive_external_parameter_entity) {
3743 const char *text = "<?xml version='1.0'?>\n"
3744 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3745 "<!ELEMENT root (#PCDATA|a)* >\n"
3746 "]>\n"
3747 "<root></root>";
3748 ExtFaults dtd_data = {"<!ENTITY % pe2 '%pe2;'>\n%pe2;",
3749 "Recursive external parameter entity not faulted", NULL,
3750 XML_ERROR_RECURSIVE_ENTITY_REF};
3751
3752 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3753 XML_SetUserData(g_parser, &dtd_data);
3754 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3755 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3756 "Recursive external parameter not spotted");
3757 }
3758 END_TEST
3759
3760 /* Test undefined parameter entity in external entity handler */
START_TEST(test_undefined_ext_entity_in_external_dtd)3761 START_TEST(test_undefined_ext_entity_in_external_dtd) {
3762 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3763 "<doc></doc>\n";
3764
3765 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3766 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3767 XML_SetUserData(g_parser, NULL);
3768 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3769 == XML_STATUS_ERROR)
3770 xml_failure(g_parser);
3771
3772 /* Now repeat without the external entity ref handler invoking
3773 * another copy of itself.
3774 */
3775 XML_ParserReset(g_parser, NULL);
3776 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3777 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3778 XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3779 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3780 == XML_STATUS_ERROR)
3781 xml_failure(g_parser);
3782 }
3783 END_TEST
3784
3785 /* Test suspending the parse on receiving an XML declaration works */
START_TEST(test_suspend_xdecl)3786 START_TEST(test_suspend_xdecl) {
3787 const char *text = long_character_data_text;
3788
3789 XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3790 XML_SetUserData(g_parser, g_parser);
3791 g_resumable = XML_TRUE;
3792 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3793 // we won't know exactly how much input we actually managed to give Expat.
3794 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3795 != XML_STATUS_SUSPENDED)
3796 xml_failure(g_parser);
3797 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3798 xml_failure(g_parser);
3799 /* Attempt to start a new parse while suspended */
3800 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3801 != XML_STATUS_ERROR)
3802 fail("Attempt to parse while suspended not faulted");
3803 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3804 fail("Suspended parse not faulted with correct error");
3805 }
3806 END_TEST
3807
3808 /* Test aborting the parse in an epilog works */
START_TEST(test_abort_epilog)3809 START_TEST(test_abort_epilog) {
3810 const char *text = "<doc></doc>\n\r\n";
3811 XML_Char trigger_char = XCS('\r');
3812
3813 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3814 XML_SetUserData(g_parser, &trigger_char);
3815 g_resumable = XML_FALSE;
3816 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3817 != XML_STATUS_ERROR)
3818 fail("Abort not triggered");
3819 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3820 xml_failure(g_parser);
3821 }
3822 END_TEST
3823
3824 /* Test a different code path for abort in the epilog */
START_TEST(test_abort_epilog_2)3825 START_TEST(test_abort_epilog_2) {
3826 const char *text = "<doc></doc>\n";
3827 XML_Char trigger_char = XCS('\n');
3828
3829 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3830 XML_SetUserData(g_parser, &trigger_char);
3831 g_resumable = XML_FALSE;
3832 expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3833 }
3834 END_TEST
3835
3836 /* Test suspension from the epilog */
START_TEST(test_suspend_epilog)3837 START_TEST(test_suspend_epilog) {
3838 const char *text = "<doc></doc>\n";
3839 XML_Char trigger_char = XCS('\n');
3840
3841 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3842 XML_SetUserData(g_parser, &trigger_char);
3843 g_resumable = XML_TRUE;
3844 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3845 != XML_STATUS_SUSPENDED)
3846 xml_failure(g_parser);
3847 }
3848 END_TEST
3849
START_TEST(test_suspend_in_sole_empty_tag)3850 START_TEST(test_suspend_in_sole_empty_tag) {
3851 const char *text = "<doc/>";
3852 enum XML_Status rc;
3853
3854 XML_SetEndElementHandler(g_parser, suspending_end_handler);
3855 XML_SetUserData(g_parser, g_parser);
3856 rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3857 if (rc == XML_STATUS_ERROR)
3858 xml_failure(g_parser);
3859 else if (rc != XML_STATUS_SUSPENDED)
3860 fail("Suspend not triggered");
3861 rc = XML_ResumeParser(g_parser);
3862 if (rc == XML_STATUS_ERROR)
3863 xml_failure(g_parser);
3864 else if (rc != XML_STATUS_OK)
3865 fail("Resume failed");
3866 }
3867 END_TEST
3868
START_TEST(test_unfinished_epilog)3869 START_TEST(test_unfinished_epilog) {
3870 const char *text = "<doc></doc><";
3871
3872 expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3873 "Incomplete epilog entry not faulted");
3874 }
3875 END_TEST
3876
START_TEST(test_partial_char_in_epilog)3877 START_TEST(test_partial_char_in_epilog) {
3878 const char *text = "<doc></doc>\xe2\x82";
3879
3880 /* First check that no fault is raised if the parse is not finished */
3881 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3882 == XML_STATUS_ERROR)
3883 xml_failure(g_parser);
3884 /* Now check that it is faulted once we finish */
3885 if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3886 fail("Partial character in epilog not faulted");
3887 if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3888 xml_failure(g_parser);
3889 }
3890 END_TEST
3891
3892 /* Test resuming a parse suspended in entity substitution */
START_TEST(test_suspend_resume_internal_entity)3893 START_TEST(test_suspend_resume_internal_entity) {
3894 const char *text
3895 = "<!DOCTYPE doc [\n"
3896 "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3897 "]>\n"
3898 "<doc>&foo;</doc>\n";
3899 const XML_Char *expected1 = XCS("Hi");
3900 const XML_Char *expected2 = XCS("HiHo");
3901 CharData storage;
3902
3903 CharData_Init(&storage);
3904 XML_SetStartElementHandler(g_parser, start_element_suspender);
3905 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3906 XML_SetUserData(g_parser, &storage);
3907 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3908 // we won't know exactly how much input we actually managed to give Expat.
3909 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3910 != XML_STATUS_SUSPENDED)
3911 xml_failure(g_parser);
3912 CharData_CheckXMLChars(&storage, XCS(""));
3913 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3914 xml_failure(g_parser);
3915 CharData_CheckXMLChars(&storage, expected1);
3916 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3917 xml_failure(g_parser);
3918 CharData_CheckXMLChars(&storage, expected2);
3919 }
3920 END_TEST
3921
START_TEST(test_suspend_resume_internal_entity_issue_629)3922 START_TEST(test_suspend_resume_internal_entity_issue_629) {
3923 const char *const text
3924 = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3925 "<"
3926 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3927 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3928 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3929 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3930 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3931 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3932 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3933 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3934 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3935 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3936 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3937 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3938 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3939 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3940 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3941 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3942 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3943 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3944 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3945 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3946 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3947 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3948 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3949 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3950 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3951 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3952 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3953 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3954 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3955 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3956 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3957 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3958 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3959 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3960 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3961 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3962 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3963 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3964 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3965 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3966 "/>"
3967 "</b></a>";
3968 const size_t firstChunkSizeBytes = 54;
3969
3970 XML_Parser parser = XML_ParserCreate(NULL);
3971 XML_SetUserData(parser, parser);
3972 XML_SetCommentHandler(parser, suspending_comment_handler);
3973
3974 if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3975 != XML_STATUS_SUSPENDED)
3976 xml_failure(parser);
3977 if (XML_ResumeParser(parser) != XML_STATUS_OK)
3978 xml_failure(parser);
3979 if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3980 (int)(strlen(text) - firstChunkSizeBytes),
3981 XML_TRUE)
3982 != XML_STATUS_OK)
3983 xml_failure(parser);
3984 XML_ParserFree(parser);
3985 }
3986 END_TEST
3987
3988 /* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error)3989 START_TEST(test_resume_entity_with_syntax_error) {
3990 if (g_chunkSize != 0) {
3991 // this test does not use SINGLE_BYTES, because of suspension
3992 return;
3993 }
3994
3995 const char *text = "<!DOCTYPE doc [\n"
3996 "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3997 "]>\n"
3998 "<doc>&foo;</doc>\n";
3999
4000 XML_SetStartElementHandler(g_parser, start_element_suspender);
4001 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
4002 // we won't know exactly how much input we actually managed to give Expat.
4003 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4004 != XML_STATUS_SUSPENDED)
4005 xml_failure(g_parser);
4006 if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
4007 fail("Syntax error in entity not faulted");
4008 if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
4009 xml_failure(g_parser);
4010 }
4011 END_TEST
4012
4013 /* Test suspending and resuming in a parameter entity substitution */
START_TEST(test_suspend_resume_parameter_entity)4014 START_TEST(test_suspend_resume_parameter_entity) {
4015 const char *text = "<!DOCTYPE doc [\n"
4016 "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
4017 "%foo;\n"
4018 "]>\n"
4019 "<doc>Hello, world</doc>";
4020 const XML_Char *expected = XCS("Hello, world");
4021 CharData storage;
4022
4023 CharData_Init(&storage);
4024 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4025 XML_SetElementDeclHandler(g_parser, element_decl_suspender);
4026 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
4027 XML_SetUserData(g_parser, &storage);
4028 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4029 != XML_STATUS_SUSPENDED)
4030 xml_failure(g_parser);
4031 CharData_CheckXMLChars(&storage, XCS(""));
4032 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
4033 xml_failure(g_parser);
4034 CharData_CheckXMLChars(&storage, expected);
4035 }
4036 END_TEST
4037
4038 /* Test attempting to use parser after an error is faulted */
START_TEST(test_restart_on_error)4039 START_TEST(test_restart_on_error) {
4040 const char *text = "<$doc><doc></doc>";
4041
4042 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4043 != XML_STATUS_ERROR)
4044 fail("Invalid tag name not faulted");
4045 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
4046 xml_failure(g_parser);
4047 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
4048 fail("Restarting invalid parse not faulted");
4049 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
4050 xml_failure(g_parser);
4051 }
4052 END_TEST
4053
4054 /* Test that angle brackets in an attribute default value are faulted */
START_TEST(test_reject_lt_in_attribute_value)4055 START_TEST(test_reject_lt_in_attribute_value) {
4056 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
4057 "<doc></doc>";
4058
4059 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4060 "Bad attribute default not faulted");
4061 }
4062 END_TEST
4063
START_TEST(test_reject_unfinished_param_in_att_value)4064 START_TEST(test_reject_unfinished_param_in_att_value) {
4065 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
4066 "<doc></doc>";
4067
4068 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4069 "Bad attribute default not faulted");
4070 }
4071 END_TEST
4072
START_TEST(test_trailing_cr_in_att_value)4073 START_TEST(test_trailing_cr_in_att_value) {
4074 const char *text = "<doc a='value\r'/>";
4075
4076 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4077 == XML_STATUS_ERROR)
4078 xml_failure(g_parser);
4079 }
4080 END_TEST
4081
4082 /* Try parsing a general entity within a parameter entity in a
4083 * standalone internal DTD. Covers a corner case in the parser.
4084 */
START_TEST(test_standalone_internal_entity)4085 START_TEST(test_standalone_internal_entity) {
4086 const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
4087 "<!DOCTYPE doc [\n"
4088 " <!ELEMENT doc (#PCDATA)>\n"
4089 " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"≥\">'>\n"
4090 " <!ENTITY ge 'AttDefaultValue'>\n"
4091 " %pe;\n"
4092 "]>\n"
4093 "<doc att2='any'/>";
4094
4095 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4096 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4097 == XML_STATUS_ERROR)
4098 xml_failure(g_parser);
4099 }
4100 END_TEST
4101
4102 /* Test that a reference to an unknown external entity is skipped */
START_TEST(test_skipped_external_entity)4103 START_TEST(test_skipped_external_entity) {
4104 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
4105 "<doc></doc>\n";
4106 ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
4107 "<!ENTITY % e2 '%e1;'>\n",
4108 NULL, NULL};
4109
4110 XML_SetUserData(g_parser, &test_data);
4111 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4112 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4113 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4114 == XML_STATUS_ERROR)
4115 xml_failure(g_parser);
4116 }
4117 END_TEST
4118
4119 /* Test a different form of unknown external entity */
START_TEST(test_skipped_null_loaded_ext_entity)4120 START_TEST(test_skipped_null_loaded_ext_entity) {
4121 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
4122 "<doc />";
4123 ExtHdlrData test_data
4124 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
4125 "<!ENTITY % pe2 '%pe1;'>\n"
4126 "%pe2;\n",
4127 external_entity_null_loader, NULL};
4128
4129 XML_SetUserData(g_parser, &test_data);
4130 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4131 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
4132 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4133 == XML_STATUS_ERROR)
4134 xml_failure(g_parser);
4135 }
4136 END_TEST
4137
START_TEST(test_skipped_unloaded_ext_entity)4138 START_TEST(test_skipped_unloaded_ext_entity) {
4139 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
4140 "<doc />";
4141 ExtHdlrData test_data
4142 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
4143 "<!ENTITY % pe2 '%pe1;'>\n"
4144 "%pe2;\n",
4145 NULL, NULL};
4146
4147 XML_SetUserData(g_parser, &test_data);
4148 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4149 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
4150 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4151 == XML_STATUS_ERROR)
4152 xml_failure(g_parser);
4153 }
4154 END_TEST
4155
4156 /* Test that a parameter entity value ending with a carriage return
4157 * has it translated internally into a newline.
4158 */
START_TEST(test_param_entity_with_trailing_cr)4159 START_TEST(test_param_entity_with_trailing_cr) {
4160 #define PARAM_ENTITY_NAME "pe"
4161 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
4162 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
4163 "<doc/>";
4164 ExtTest test_data
4165 = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
4166 "%" PARAM_ENTITY_NAME ";\n",
4167 NULL, NULL};
4168
4169 XML_SetUserData(g_parser, &test_data);
4170 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4171 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4172 XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
4173 param_entity_match_init(XCS(PARAM_ENTITY_NAME),
4174 XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
4175 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4176 == XML_STATUS_ERROR)
4177 xml_failure(g_parser);
4178 int entity_match_flag = get_param_entity_match_flag();
4179 if (entity_match_flag == ENTITY_MATCH_FAIL)
4180 fail("Parameter entity CR->NEWLINE conversion failed");
4181 else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
4182 fail("Parameter entity not parsed");
4183 }
4184 #undef PARAM_ENTITY_NAME
4185 #undef PARAM_ENTITY_CORE_VALUE
4186 END_TEST
4187
START_TEST(test_invalid_character_entity)4188 START_TEST(test_invalid_character_entity) {
4189 const char *text = "<!DOCTYPE doc [\n"
4190 " <!ENTITY entity '�'>\n"
4191 "]>\n"
4192 "<doc>&entity;</doc>";
4193
4194 expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4195 "Out of range character reference not faulted");
4196 }
4197 END_TEST
4198
START_TEST(test_invalid_character_entity_2)4199 START_TEST(test_invalid_character_entity_2) {
4200 const char *text = "<!DOCTYPE doc [\n"
4201 " <!ENTITY entity '&#xg0;'>\n"
4202 "]>\n"
4203 "<doc>&entity;</doc>";
4204
4205 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4206 "Out of range character reference not faulted");
4207 }
4208 END_TEST
4209
START_TEST(test_invalid_character_entity_3)4210 START_TEST(test_invalid_character_entity_3) {
4211 const char text[] =
4212 /* <!DOCTYPE doc [\n */
4213 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4214 /* U+0E04 = KHO KHWAI
4215 * U+0E08 = CHO CHAN */
4216 /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
4217 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
4218 "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
4219 /* ]>\n */
4220 "\0]\0>\0\n"
4221 /* <doc>&entity;</doc> */
4222 "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
4223
4224 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4225 != XML_STATUS_ERROR)
4226 fail("Invalid start of entity name not faulted");
4227 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4228 xml_failure(g_parser);
4229 }
4230 END_TEST
4231
START_TEST(test_invalid_character_entity_4)4232 START_TEST(test_invalid_character_entity_4) {
4233 const char *text = "<!DOCTYPE doc [\n"
4234 " <!ENTITY entity '�'>\n" /* = � */
4235 "]>\n"
4236 "<doc>&entity;</doc>";
4237
4238 expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4239 "Out of range character reference not faulted");
4240 }
4241 END_TEST
4242
4243 /* Test that processing instructions are picked up by a default handler */
START_TEST(test_pi_handled_in_default)4244 START_TEST(test_pi_handled_in_default) {
4245 const char *text = "<?test processing instruction?>\n<doc/>";
4246 const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4247 CharData storage;
4248
4249 CharData_Init(&storage);
4250 XML_SetDefaultHandler(g_parser, accumulate_characters);
4251 XML_SetUserData(g_parser, &storage);
4252 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4253 == XML_STATUS_ERROR)
4254 xml_failure(g_parser);
4255 CharData_CheckXMLChars(&storage, expected);
4256 }
4257 END_TEST
4258
4259 /* Test that comments are picked up by a default handler */
START_TEST(test_comment_handled_in_default)4260 START_TEST(test_comment_handled_in_default) {
4261 const char *text = "<!-- This is a comment -->\n<doc/>";
4262 const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4263 CharData storage;
4264
4265 CharData_Init(&storage);
4266 XML_SetDefaultHandler(g_parser, accumulate_characters);
4267 XML_SetUserData(g_parser, &storage);
4268 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4269 == XML_STATUS_ERROR)
4270 xml_failure(g_parser);
4271 CharData_CheckXMLChars(&storage, expected);
4272 }
4273 END_TEST
4274
4275 /* Test PIs that look almost but not quite like XML declarations */
START_TEST(test_pi_yml)4276 START_TEST(test_pi_yml) {
4277 const char *text = "<?yml something like data?><doc/>";
4278 const XML_Char *expected = XCS("yml: something like data\n");
4279 CharData storage;
4280
4281 CharData_Init(&storage);
4282 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4283 XML_SetUserData(g_parser, &storage);
4284 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4285 == XML_STATUS_ERROR)
4286 xml_failure(g_parser);
4287 CharData_CheckXMLChars(&storage, expected);
4288 }
4289 END_TEST
4290
START_TEST(test_pi_xnl)4291 START_TEST(test_pi_xnl) {
4292 const char *text = "<?xnl nothing like data?><doc/>";
4293 const XML_Char *expected = XCS("xnl: nothing like data\n");
4294 CharData storage;
4295
4296 CharData_Init(&storage);
4297 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4298 XML_SetUserData(g_parser, &storage);
4299 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4300 == XML_STATUS_ERROR)
4301 xml_failure(g_parser);
4302 CharData_CheckXMLChars(&storage, expected);
4303 }
4304 END_TEST
4305
START_TEST(test_pi_xmm)4306 START_TEST(test_pi_xmm) {
4307 const char *text = "<?xmm everything like data?><doc/>";
4308 const XML_Char *expected = XCS("xmm: everything like data\n");
4309 CharData storage;
4310
4311 CharData_Init(&storage);
4312 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4313 XML_SetUserData(g_parser, &storage);
4314 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4315 == XML_STATUS_ERROR)
4316 xml_failure(g_parser);
4317 CharData_CheckXMLChars(&storage, expected);
4318 }
4319 END_TEST
4320
START_TEST(test_utf16_pi)4321 START_TEST(test_utf16_pi) {
4322 const char text[] =
4323 /* <?{KHO KHWAI}{CHO CHAN}?>
4324 * where {KHO KHWAI} = U+0E04
4325 * and {CHO CHAN} = U+0E08
4326 */
4327 "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4328 /* <q/> */
4329 "<\0q\0/\0>\0";
4330 #ifdef XML_UNICODE
4331 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4332 #else
4333 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4334 #endif
4335 CharData storage;
4336
4337 CharData_Init(&storage);
4338 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4339 XML_SetUserData(g_parser, &storage);
4340 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4341 == XML_STATUS_ERROR)
4342 xml_failure(g_parser);
4343 CharData_CheckXMLChars(&storage, expected);
4344 }
4345 END_TEST
4346
START_TEST(test_utf16_be_pi)4347 START_TEST(test_utf16_be_pi) {
4348 const char text[] =
4349 /* <?{KHO KHWAI}{CHO CHAN}?>
4350 * where {KHO KHWAI} = U+0E04
4351 * and {CHO CHAN} = U+0E08
4352 */
4353 "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4354 /* <q/> */
4355 "\0<\0q\0/\0>";
4356 #ifdef XML_UNICODE
4357 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4358 #else
4359 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4360 #endif
4361 CharData storage;
4362
4363 CharData_Init(&storage);
4364 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4365 XML_SetUserData(g_parser, &storage);
4366 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4367 == XML_STATUS_ERROR)
4368 xml_failure(g_parser);
4369 CharData_CheckXMLChars(&storage, expected);
4370 }
4371 END_TEST
4372
4373 /* Test that comments can be picked up and translated */
START_TEST(test_utf16_be_comment)4374 START_TEST(test_utf16_be_comment) {
4375 const char text[] =
4376 /* <!-- Comment A --> */
4377 "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4378 /* <doc/> */
4379 "\0<\0d\0o\0c\0/\0>";
4380 const XML_Char *expected = XCS(" Comment A ");
4381 CharData storage;
4382
4383 CharData_Init(&storage);
4384 XML_SetCommentHandler(g_parser, accumulate_comment);
4385 XML_SetUserData(g_parser, &storage);
4386 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4387 == XML_STATUS_ERROR)
4388 xml_failure(g_parser);
4389 CharData_CheckXMLChars(&storage, expected);
4390 }
4391 END_TEST
4392
START_TEST(test_utf16_le_comment)4393 START_TEST(test_utf16_le_comment) {
4394 const char text[] =
4395 /* <!-- Comment B --> */
4396 "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4397 /* <doc/> */
4398 "<\0d\0o\0c\0/\0>\0";
4399 const XML_Char *expected = XCS(" Comment B ");
4400 CharData storage;
4401
4402 CharData_Init(&storage);
4403 XML_SetCommentHandler(g_parser, accumulate_comment);
4404 XML_SetUserData(g_parser, &storage);
4405 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4406 == XML_STATUS_ERROR)
4407 xml_failure(g_parser);
4408 CharData_CheckXMLChars(&storage, expected);
4409 }
4410 END_TEST
4411
4412 /* Test that the unknown encoding handler with map entries that expect
4413 * conversion but no conversion function is faulted
4414 */
START_TEST(test_missing_encoding_conversion_fn)4415 START_TEST(test_missing_encoding_conversion_fn) {
4416 const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4417 "<doc>\x81</doc>";
4418
4419 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4420 /* MiscEncodingHandler sets up an encoding with every top-bit-set
4421 * character introducing a two-byte sequence. For this, it
4422 * requires a convert function. The above function call doesn't
4423 * pass one through, so when BadEncodingHandler actually gets
4424 * called it should supply an invalid encoding.
4425 */
4426 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4427 "Encoding with missing convert() not faulted");
4428 }
4429 END_TEST
4430
START_TEST(test_failing_encoding_conversion_fn)4431 START_TEST(test_failing_encoding_conversion_fn) {
4432 const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4433 "<doc>\x81</doc>";
4434
4435 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4436 /* BadEncodingHandler sets up an encoding with every top-bit-set
4437 * character introducing a two-byte sequence. For this, it
4438 * requires a convert function. The above function call passes
4439 * one that insists all possible sequences are invalid anyway.
4440 */
4441 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4442 "Encoding with failing convert() not faulted");
4443 }
4444 END_TEST
4445
4446 /* Test unknown encoding conversions */
START_TEST(test_unknown_encoding_success)4447 START_TEST(test_unknown_encoding_success) {
4448 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4449 /* Equivalent to <eoc>Hello, world</eoc> */
4450 "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4451
4452 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4453 run_character_check(text, XCS("Hello, world"));
4454 }
4455 END_TEST
4456
4457 /* Test bad name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name)4458 START_TEST(test_unknown_encoding_bad_name) {
4459 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4460 "<\xff\x64oc>Hello, world</\xff\x64oc>";
4461
4462 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4463 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4464 "Bad name start in unknown encoding not faulted");
4465 }
4466 END_TEST
4467
4468 /* Test bad mid-name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name_2)4469 START_TEST(test_unknown_encoding_bad_name_2) {
4470 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4471 "<d\xffoc>Hello, world</d\xffoc>";
4472
4473 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4474 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4475 "Bad name in unknown encoding not faulted");
4476 }
4477 END_TEST
4478
4479 /* Test element name that is long enough to fill the conversion buffer
4480 * in an unknown encoding, finishing with an encoded character.
4481 */
START_TEST(test_unknown_encoding_long_name_1)4482 START_TEST(test_unknown_encoding_long_name_1) {
4483 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4484 "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4485 "Hi"
4486 "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4487 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4488 CharData storage;
4489
4490 CharData_Init(&storage);
4491 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4492 XML_SetStartElementHandler(g_parser, record_element_start_handler);
4493 XML_SetUserData(g_parser, &storage);
4494 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4495 == XML_STATUS_ERROR)
4496 xml_failure(g_parser);
4497 CharData_CheckXMLChars(&storage, expected);
4498 }
4499 END_TEST
4500
4501 /* Test element name that is long enough to fill the conversion buffer
4502 * in an unknown encoding, finishing with an simple character.
4503 */
START_TEST(test_unknown_encoding_long_name_2)4504 START_TEST(test_unknown_encoding_long_name_2) {
4505 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4506 "<abcdefghabcdefghabcdefghijklmnop>"
4507 "Hi"
4508 "</abcdefghabcdefghabcdefghijklmnop>";
4509 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4510 CharData storage;
4511
4512 CharData_Init(&storage);
4513 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4514 XML_SetStartElementHandler(g_parser, record_element_start_handler);
4515 XML_SetUserData(g_parser, &storage);
4516 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4517 == XML_STATUS_ERROR)
4518 xml_failure(g_parser);
4519 CharData_CheckXMLChars(&storage, expected);
4520 }
4521 END_TEST
4522
START_TEST(test_invalid_unknown_encoding)4523 START_TEST(test_invalid_unknown_encoding) {
4524 const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4525 "<doc>Hello world</doc>";
4526
4527 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4528 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4529 "Invalid unknown encoding not faulted");
4530 }
4531 END_TEST
4532
START_TEST(test_unknown_ascii_encoding_ok)4533 START_TEST(test_unknown_ascii_encoding_ok) {
4534 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4535 "<doc>Hello, world</doc>";
4536
4537 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4538 run_character_check(text, XCS("Hello, world"));
4539 }
4540 END_TEST
4541
START_TEST(test_unknown_ascii_encoding_fail)4542 START_TEST(test_unknown_ascii_encoding_fail) {
4543 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4544 "<doc>Hello, \x80 world</doc>";
4545
4546 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4547 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4548 "Invalid character not faulted");
4549 }
4550 END_TEST
4551
START_TEST(test_unknown_encoding_invalid_length)4552 START_TEST(test_unknown_encoding_invalid_length) {
4553 const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4554 "<doc>Hello, world</doc>";
4555
4556 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4557 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4558 "Invalid unknown encoding not faulted");
4559 }
4560 END_TEST
4561
START_TEST(test_unknown_encoding_invalid_topbit)4562 START_TEST(test_unknown_encoding_invalid_topbit) {
4563 const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4564 "<doc>Hello, world</doc>";
4565
4566 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4567 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4568 "Invalid unknown encoding not faulted");
4569 }
4570 END_TEST
4571
START_TEST(test_unknown_encoding_invalid_surrogate)4572 START_TEST(test_unknown_encoding_invalid_surrogate) {
4573 const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4574 "<doc>Hello, \x82 world</doc>";
4575
4576 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4577 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4578 "Invalid unknown encoding not faulted");
4579 }
4580 END_TEST
4581
START_TEST(test_unknown_encoding_invalid_high)4582 START_TEST(test_unknown_encoding_invalid_high) {
4583 const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4584 "<doc>Hello, world</doc>";
4585
4586 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4587 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4588 "Invalid unknown encoding not faulted");
4589 }
4590 END_TEST
4591
START_TEST(test_unknown_encoding_invalid_attr_value)4592 START_TEST(test_unknown_encoding_invalid_attr_value) {
4593 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4594 "<doc attr='\xff\x30'/>";
4595
4596 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4597 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4598 "Invalid attribute valid not faulted");
4599 }
4600 END_TEST
4601
START_TEST(test_unknown_encoding_user_data_primary)4602 START_TEST(test_unknown_encoding_user_data_primary) {
4603 // This test is based on ideas contributed by Artiphishell Inc.
4604 const char *const text = "<?xml version='1.0' encoding='x-unk'?>\n"
4605 "<root />\n";
4606 XML_Parser parser = XML_ParserCreate(NULL);
4607 XML_SetUnknownEncodingHandler(parser,
4608 user_data_checking_unknown_encoding_handler,
4609 (void *)(intptr_t)0xC0FFEE);
4610
4611 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
4612 == XML_STATUS_OK);
4613
4614 XML_ParserFree(parser);
4615 }
4616 END_TEST
4617
START_TEST(test_unknown_encoding_user_data_secondary)4618 START_TEST(test_unknown_encoding_user_data_secondary) {
4619 // This test is based on ideas contributed by Artiphishell Inc.
4620 const char *const text_main = "<!DOCTYPE r [\n"
4621 " <!ENTITY ext SYSTEM 'ext.ent'>\n"
4622 "]>\n"
4623 "<r>&ext;</r>\n";
4624 const char *const text_external = "<?xml version='1.0' encoding='x-unk'?>\n"
4625 "<e>data</e>";
4626 ExtTest2 test_data = {text_external, (int)strlen(text_external), NULL, NULL};
4627 XML_Parser parser = XML_ParserCreate(NULL);
4628 XML_SetExternalEntityRefHandler(parser, external_entity_loader2);
4629 XML_SetUnknownEncodingHandler(parser,
4630 user_data_checking_unknown_encoding_handler,
4631 (void *)(intptr_t)0xC0FFEE);
4632 XML_SetUserData(parser, &test_data);
4633
4634 assert_true(_XML_Parse_SINGLE_BYTES(parser, text_main, (int)strlen(text_main),
4635 XML_TRUE)
4636 == XML_STATUS_OK);
4637
4638 XML_ParserFree(parser);
4639 }
4640 END_TEST
4641
4642 /* Test an external entity parser set to use latin-1 detects UTF-16
4643 * BOMs correctly.
4644 */
4645 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
START_TEST(test_ext_entity_latin1_utf16le_bom)4646 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4647 const char *text = "<!DOCTYPE doc [\n"
4648 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4649 "]>\n"
4650 "<doc>&en;</doc>";
4651 ExtTest2 test_data
4652 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4653 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4654 * 0x4c = L and 0x20 is a space
4655 */
4656 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4657 #ifdef XML_UNICODE
4658 const XML_Char *expected = XCS("\x00ff\x00feL ");
4659 #else
4660 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4661 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4662 #endif
4663 CharData storage;
4664
4665 CharData_Init(&storage);
4666 test_data.storage = &storage;
4667 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4668 XML_SetUserData(g_parser, &test_data);
4669 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4670 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4671 == XML_STATUS_ERROR)
4672 xml_failure(g_parser);
4673 CharData_CheckXMLChars(&storage, expected);
4674 }
4675 END_TEST
4676
START_TEST(test_ext_entity_latin1_utf16be_bom)4677 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4678 const char *text = "<!DOCTYPE doc [\n"
4679 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4680 "]>\n"
4681 "<doc>&en;</doc>";
4682 ExtTest2 test_data
4683 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4684 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4685 * 0x4c = L and 0x20 is a space
4686 */
4687 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4688 #ifdef XML_UNICODE
4689 const XML_Char *expected = XCS("\x00fe\x00ff L");
4690 #else
4691 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4692 const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4693 #endif
4694 CharData storage;
4695
4696 CharData_Init(&storage);
4697 test_data.storage = &storage;
4698 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4699 XML_SetUserData(g_parser, &test_data);
4700 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4701 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4702 == XML_STATUS_ERROR)
4703 xml_failure(g_parser);
4704 CharData_CheckXMLChars(&storage, expected);
4705 }
4706 END_TEST
4707
4708 /* Parsing the full buffer rather than a byte at a time makes a
4709 * difference to the encoding scanning code, so repeat the above tests
4710 * without breaking them down by byte.
4711 */
START_TEST(test_ext_entity_latin1_utf16le_bom2)4712 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4713 const char *text = "<!DOCTYPE doc [\n"
4714 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4715 "]>\n"
4716 "<doc>&en;</doc>";
4717 ExtTest2 test_data
4718 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4719 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4720 * 0x4c = L and 0x20 is a space
4721 */
4722 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4723 #ifdef XML_UNICODE
4724 const XML_Char *expected = XCS("\x00ff\x00feL ");
4725 #else
4726 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4727 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4728 #endif
4729 CharData storage;
4730
4731 CharData_Init(&storage);
4732 test_data.storage = &storage;
4733 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4734 XML_SetUserData(g_parser, &test_data);
4735 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4736 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4737 == XML_STATUS_ERROR)
4738 xml_failure(g_parser);
4739 CharData_CheckXMLChars(&storage, expected);
4740 }
4741 END_TEST
4742
START_TEST(test_ext_entity_latin1_utf16be_bom2)4743 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4744 const char *text = "<!DOCTYPE doc [\n"
4745 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4746 "]>\n"
4747 "<doc>&en;</doc>";
4748 ExtTest2 test_data
4749 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4750 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4751 * 0x4c = L and 0x20 is a space
4752 */
4753 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4754 #ifdef XML_UNICODE
4755 const XML_Char *expected = XCS("\x00fe\x00ff L");
4756 #else
4757 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4758 const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4759 #endif
4760 CharData storage;
4761
4762 CharData_Init(&storage);
4763 test_data.storage = &storage;
4764 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4765 XML_SetUserData(g_parser, &test_data);
4766 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4767 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4768 == XML_STATUS_ERROR)
4769 xml_failure(g_parser);
4770 CharData_CheckXMLChars(&storage, expected);
4771 }
4772 END_TEST
4773
4774 /* Test little-endian UTF-16 given an explicit big-endian encoding */
START_TEST(test_ext_entity_utf16_be)4775 START_TEST(test_ext_entity_utf16_be) {
4776 const char *text = "<!DOCTYPE doc [\n"
4777 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4778 "]>\n"
4779 "<doc>&en;</doc>";
4780 ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4781 #ifdef XML_UNICODE
4782 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4783 #else
4784 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
4785 "\xe6\x94\x80" /* U+6500 */
4786 "\xe2\xbc\x80" /* U+2F00 */
4787 "\xe3\xb8\x80"); /* U+3E00 */
4788 #endif
4789 CharData storage;
4790
4791 CharData_Init(&storage);
4792 test_data.storage = &storage;
4793 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4794 XML_SetUserData(g_parser, &test_data);
4795 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4796 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4797 == XML_STATUS_ERROR)
4798 xml_failure(g_parser);
4799 CharData_CheckXMLChars(&storage, expected);
4800 }
4801 END_TEST
4802
4803 /* Test big-endian UTF-16 given an explicit little-endian encoding */
START_TEST(test_ext_entity_utf16_le)4804 START_TEST(test_ext_entity_utf16_le) {
4805 const char *text = "<!DOCTYPE doc [\n"
4806 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4807 "]>\n"
4808 "<doc>&en;</doc>";
4809 ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4810 #ifdef XML_UNICODE
4811 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4812 #else
4813 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
4814 "\xe6\x94\x80" /* U+6500 */
4815 "\xe2\xbc\x80" /* U+2F00 */
4816 "\xe3\xb8\x80"); /* U+3E00 */
4817 #endif
4818 CharData storage;
4819
4820 CharData_Init(&storage);
4821 test_data.storage = &storage;
4822 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4823 XML_SetUserData(g_parser, &test_data);
4824 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4825 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4826 == XML_STATUS_ERROR)
4827 xml_failure(g_parser);
4828 CharData_CheckXMLChars(&storage, expected);
4829 }
4830 END_TEST
4831
4832 /* Test little-endian UTF-16 given no explicit encoding.
4833 * The existing default encoding (UTF-8) is assumed to hold without a
4834 * BOM to contradict it, so the entity value will in fact provoke an
4835 * error because 0x00 is not a valid XML character. We parse the
4836 * whole buffer in one go rather than feeding it in byte by byte to
4837 * exercise different code paths in the initial scanning routines.
4838 */
START_TEST(test_ext_entity_utf16_unknown)4839 START_TEST(test_ext_entity_utf16_unknown) {
4840 const char *text = "<!DOCTYPE doc [\n"
4841 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4842 "]>\n"
4843 "<doc>&en;</doc>";
4844 ExtFaults2 test_data
4845 = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4846 XML_ERROR_INVALID_TOKEN};
4847
4848 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4849 XML_SetUserData(g_parser, &test_data);
4850 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4851 "Invalid character should not have been accepted");
4852 }
4853 END_TEST
4854
4855 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
START_TEST(test_ext_entity_utf8_non_bom)4856 START_TEST(test_ext_entity_utf8_non_bom) {
4857 const char *text = "<!DOCTYPE doc [\n"
4858 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4859 "]>\n"
4860 "<doc>&en;</doc>";
4861 ExtTest2 test_data
4862 = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4863 3, NULL, NULL};
4864 #ifdef XML_UNICODE
4865 const XML_Char *expected = XCS("\xfec0");
4866 #else
4867 const XML_Char *expected = XCS("\xef\xbb\x80");
4868 #endif
4869 CharData storage;
4870
4871 CharData_Init(&storage);
4872 test_data.storage = &storage;
4873 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4874 XML_SetUserData(g_parser, &test_data);
4875 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4876 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4877 == XML_STATUS_ERROR)
4878 xml_failure(g_parser);
4879 CharData_CheckXMLChars(&storage, expected);
4880 }
4881 END_TEST
4882
4883 /* Test that UTF-8 in a CDATA section is correctly passed through */
START_TEST(test_utf8_in_cdata_section)4884 START_TEST(test_utf8_in_cdata_section) {
4885 const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4886 #ifdef XML_UNICODE
4887 const XML_Char *expected = XCS("one \x00e9 two");
4888 #else
4889 const XML_Char *expected = XCS("one \xc3\xa9 two");
4890 #endif
4891
4892 run_character_check(text, expected);
4893 }
4894 END_TEST
4895
4896 /* Test that little-endian UTF-16 in a CDATA section is handled */
START_TEST(test_utf8_in_cdata_section_2)4897 START_TEST(test_utf8_in_cdata_section_2) {
4898 const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4899 #ifdef XML_UNICODE
4900 const XML_Char *expected = XCS("\x00e9]\x00e9two");
4901 #else
4902 const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4903 #endif
4904
4905 run_character_check(text, expected);
4906 }
4907 END_TEST
4908
START_TEST(test_utf8_in_start_tags)4909 START_TEST(test_utf8_in_start_tags) {
4910 struct test_case {
4911 bool goodName;
4912 bool goodNameStart;
4913 const char *tagName;
4914 };
4915
4916 // The idea with the tests below is this:
4917 // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4918 // go to isNever and are hence not a concern.
4919 //
4920 // We start with a character that is a valid name character
4921 // (or even name-start character, see XML 1.0r4 spec) and then we flip
4922 // single bits at places where (1) the result leaves the UTF-8 encoding space
4923 // and (2) we stay in the same n-byte sequence family.
4924 //
4925 // The flipped bits are highlighted in angle brackets in comments,
4926 // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4927 // the most significant bit to 1 to leave UTF-8 encoding space.
4928 struct test_case cases[] = {
4929 // 1-byte UTF-8: [0xxx xxxx]
4930 {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
4931 {false, false, "\xBA"}, // [<1>011 1010]
4932 {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
4933 {false, false, "\xB9"}, // [<1>011 1001]
4934
4935 // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4936 {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
4937 // Arabic small waw U+06E5
4938 {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4939 {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4940 {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4941 {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
4942 // combining char U+0301
4943 {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4944 {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4945 {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4946
4947 // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4948 {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
4949 // Devanagari Letter A U+0905
4950 {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4951 {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4952 {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4953 {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4954 {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4955 {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
4956 // combining char U+0901
4957 {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4958 {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4959 {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4960 {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4961 {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4962 };
4963 const bool atNameStart[] = {true, false};
4964
4965 size_t i = 0;
4966 char doc[1024];
4967 size_t failCount = 0;
4968
4969 // we need all the bytes to be parsed, but we don't want the errors that can
4970 // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4971 if (g_reparseDeferralEnabledDefault) {
4972 return;
4973 }
4974
4975 for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4976 size_t j = 0;
4977 for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4978 const bool expectedSuccess
4979 = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4980 snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4981 cases[i].tagName);
4982 XML_Parser parser = XML_ParserCreate(NULL);
4983
4984 const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4985 parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4986
4987 bool success = true;
4988 if ((status == XML_STATUS_OK) != expectedSuccess) {
4989 success = false;
4990 }
4991 if ((status == XML_STATUS_ERROR)
4992 && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4993 success = false;
4994 }
4995
4996 if (! success) {
4997 fprintf(
4998 stderr,
4999 "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
5000 (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
5001 (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
5002 failCount++;
5003 }
5004
5005 XML_ParserFree(parser);
5006 }
5007 }
5008
5009 if (failCount > 0) {
5010 fail("UTF-8 regression detected");
5011 }
5012 }
5013 END_TEST
5014
5015 /* Test trailing spaces in elements are accepted */
START_TEST(test_trailing_spaces_in_elements)5016 START_TEST(test_trailing_spaces_in_elements) {
5017 const char *text = "<doc >Hi</doc >";
5018 const XML_Char *expected = XCS("doc/doc");
5019 CharData storage;
5020
5021 CharData_Init(&storage);
5022 XML_SetElementHandler(g_parser, record_element_start_handler,
5023 record_element_end_handler);
5024 XML_SetUserData(g_parser, &storage);
5025 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5026 == XML_STATUS_ERROR)
5027 xml_failure(g_parser);
5028 CharData_CheckXMLChars(&storage, expected);
5029 }
5030 END_TEST
5031
START_TEST(test_utf16_attribute)5032 START_TEST(test_utf16_attribute) {
5033 const char text[] =
5034 /* <d {KHO KHWAI}{CHO CHAN}='a'/>
5035 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5036 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5037 */
5038 "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
5039 const XML_Char *expected = XCS("a");
5040 CharData storage;
5041
5042 CharData_Init(&storage);
5043 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5044 XML_SetUserData(g_parser, &storage);
5045 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5046 == XML_STATUS_ERROR)
5047 xml_failure(g_parser);
5048 CharData_CheckXMLChars(&storage, expected);
5049 }
5050 END_TEST
5051
START_TEST(test_utf16_second_attr)5052 START_TEST(test_utf16_second_attr) {
5053 /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
5054 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5055 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5056 */
5057 const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
5058 "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
5059 const XML_Char *expected = XCS("1");
5060 CharData storage;
5061
5062 CharData_Init(&storage);
5063 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5064 XML_SetUserData(g_parser, &storage);
5065 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5066 == XML_STATUS_ERROR)
5067 xml_failure(g_parser);
5068 CharData_CheckXMLChars(&storage, expected);
5069 }
5070 END_TEST
5071
START_TEST(test_attr_after_solidus)5072 START_TEST(test_attr_after_solidus) {
5073 const char *text = "<doc attr1='a' / attr2='b'>";
5074
5075 expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
5076 }
5077 END_TEST
5078
START_TEST(test_utf16_pe)5079 START_TEST(test_utf16_pe) {
5080 /* <!DOCTYPE doc [
5081 * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
5082 * %{KHO KHWAI}{CHO CHAN};
5083 * ]>
5084 * <doc></doc>
5085 *
5086 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5087 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5088 */
5089 const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
5090 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
5091 "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
5092 "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
5093 "\0%\x0e\x04\x0e\x08\0;\0\n"
5094 "\0]\0>\0\n"
5095 "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
5096 #ifdef XML_UNICODE
5097 const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
5098 #else
5099 const XML_Char *expected
5100 = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
5101 #endif
5102 CharData storage;
5103
5104 CharData_Init(&storage);
5105 XML_SetUserData(g_parser, &storage);
5106 XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
5107 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5108 == XML_STATUS_ERROR)
5109 xml_failure(g_parser);
5110 CharData_CheckXMLChars(&storage, expected);
5111 }
5112 END_TEST
5113
5114 /* Test that duff attribute description keywords are rejected */
START_TEST(test_bad_attr_desc_keyword)5115 START_TEST(test_bad_attr_desc_keyword) {
5116 const char *text = "<!DOCTYPE doc [\n"
5117 " <!ATTLIST doc attr CDATA #!IMPLIED>\n"
5118 "]>\n"
5119 "<doc />";
5120
5121 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5122 "Bad keyword !IMPLIED not faulted");
5123 }
5124 END_TEST
5125
5126 /* Test that an invalid attribute description keyword consisting of
5127 * UTF-16 characters with their top bytes non-zero are correctly
5128 * faulted
5129 */
START_TEST(test_bad_attr_desc_keyword_utf16)5130 START_TEST(test_bad_attr_desc_keyword_utf16) {
5131 /* <!DOCTYPE d [
5132 * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
5133 * ]><d/>
5134 *
5135 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5136 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5137 */
5138 const char text[]
5139 = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5140 "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
5141 "\0#\x0e\x04\x0e\x08\0>\0\n"
5142 "\0]\0>\0<\0d\0/\0>";
5143
5144 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5145 != XML_STATUS_ERROR)
5146 fail("Invalid UTF16 attribute keyword not faulted");
5147 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
5148 xml_failure(g_parser);
5149 }
5150 END_TEST
5151
5152 /* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this
5153 * using prefix-encoding (see above) to trigger specific code paths
5154 */
START_TEST(test_bad_doctype)5155 START_TEST(test_bad_doctype) {
5156 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
5157 "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
5158
5159 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5160 expect_failure(text, XML_ERROR_SYNTAX,
5161 "Invalid bytes in DOCTYPE not faulted");
5162 }
5163 END_TEST
5164
START_TEST(test_bad_doctype_utf8)5165 START_TEST(test_bad_doctype_utf8) {
5166 const char *text = "<!DOCTYPE \xDB\x25"
5167 "doc><doc/>"; // [1101 1011] [<0>010 0101]
5168 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5169 "Invalid UTF-8 in DOCTYPE not faulted");
5170 }
5171 END_TEST
5172
START_TEST(test_bad_doctype_utf16)5173 START_TEST(test_bad_doctype_utf16) {
5174 const char text[] =
5175 /* <!DOCTYPE doc [ \x06f2 ]><doc/>
5176 *
5177 * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
5178 * (name character) but not a valid letter (name start character)
5179 */
5180 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
5181 "\x06\xf2"
5182 "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
5183
5184 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5185 != XML_STATUS_ERROR)
5186 fail("Invalid bytes in DOCTYPE not faulted");
5187 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
5188 xml_failure(g_parser);
5189 }
5190 END_TEST
5191
START_TEST(test_bad_doctype_plus)5192 START_TEST(test_bad_doctype_plus) {
5193 const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
5194 "<1+>&foo;</1+>";
5195
5196 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5197 "'+' in document name not faulted");
5198 }
5199 END_TEST
5200
START_TEST(test_bad_doctype_star)5201 START_TEST(test_bad_doctype_star) {
5202 const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
5203 "<1*>&foo;</1*>";
5204
5205 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5206 "'*' in document name not faulted");
5207 }
5208 END_TEST
5209
START_TEST(test_bad_doctype_query)5210 START_TEST(test_bad_doctype_query) {
5211 const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
5212 "<1?>&foo;</1?>";
5213
5214 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5215 "'?' in document name not faulted");
5216 }
5217 END_TEST
5218
START_TEST(test_unknown_encoding_bad_ignore)5219 START_TEST(test_unknown_encoding_bad_ignore) {
5220 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
5221 "<!DOCTYPE doc SYSTEM 'foo'>"
5222 "<doc><e>&entity;</e></doc>";
5223 ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
5224 "Invalid character not faulted", XCS("prefix-conv"),
5225 XML_ERROR_INVALID_TOKEN};
5226
5227 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5228 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5229 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
5230 XML_SetUserData(g_parser, &fault);
5231 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5232 "Bad IGNORE section with unknown encoding not failed");
5233 }
5234 END_TEST
5235
START_TEST(test_entity_in_utf16_be_attr)5236 START_TEST(test_entity_in_utf16_be_attr) {
5237 const char text[] =
5238 /* <e a='ä ä'></e> */
5239 "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
5240 "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
5241 #ifdef XML_UNICODE
5242 const XML_Char *expected = XCS("\x00e4 \x00e4");
5243 #else
5244 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5245 #endif
5246 CharData storage;
5247
5248 CharData_Init(&storage);
5249 XML_SetUserData(g_parser, &storage);
5250 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5251 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5252 == XML_STATUS_ERROR)
5253 xml_failure(g_parser);
5254 CharData_CheckXMLChars(&storage, expected);
5255 }
5256 END_TEST
5257
START_TEST(test_entity_in_utf16_le_attr)5258 START_TEST(test_entity_in_utf16_le_attr) {
5259 const char text[] =
5260 /* <e a='ä ä'></e> */
5261 "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
5262 "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
5263 #ifdef XML_UNICODE
5264 const XML_Char *expected = XCS("\x00e4 \x00e4");
5265 #else
5266 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5267 #endif
5268 CharData storage;
5269
5270 CharData_Init(&storage);
5271 XML_SetUserData(g_parser, &storage);
5272 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5273 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5274 == XML_STATUS_ERROR)
5275 xml_failure(g_parser);
5276 CharData_CheckXMLChars(&storage, expected);
5277 }
5278 END_TEST
5279
START_TEST(test_entity_public_utf16_be)5280 START_TEST(test_entity_public_utf16_be) {
5281 const char text[] =
5282 /* <!DOCTYPE d [ */
5283 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5284 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5285 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5286 "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5287 /* %e; */
5288 "\0%\0e\0;\0\n"
5289 /* ]> */
5290 "\0]\0>\0\n"
5291 /* <d>&j;</d> */
5292 "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5293 ExtTest2 test_data
5294 = {/* <!ENTITY j 'baz'> */
5295 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5296 const XML_Char *expected = XCS("baz");
5297 CharData storage;
5298
5299 CharData_Init(&storage);
5300 test_data.storage = &storage;
5301 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5302 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5303 XML_SetUserData(g_parser, &test_data);
5304 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5305 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5306 == XML_STATUS_ERROR)
5307 xml_failure(g_parser);
5308 CharData_CheckXMLChars(&storage, expected);
5309 }
5310 END_TEST
5311
START_TEST(test_entity_public_utf16_le)5312 START_TEST(test_entity_public_utf16_le) {
5313 const char text[] =
5314 /* <!DOCTYPE d [ */
5315 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5316 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5317 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5318 "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5319 /* %e; */
5320 "%\0e\0;\0\n\0"
5321 /* ]> */
5322 "]\0>\0\n\0"
5323 /* <d>&j;</d> */
5324 "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5325 ExtTest2 test_data
5326 = {/* <!ENTITY j 'baz'> */
5327 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5328 const XML_Char *expected = XCS("baz");
5329 CharData storage;
5330
5331 CharData_Init(&storage);
5332 test_data.storage = &storage;
5333 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5334 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5335 XML_SetUserData(g_parser, &test_data);
5336 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5337 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5338 == XML_STATUS_ERROR)
5339 xml_failure(g_parser);
5340 CharData_CheckXMLChars(&storage, expected);
5341 }
5342 END_TEST
5343
5344 /* Test that a doctype with neither an internal nor external subset is
5345 * faulted
5346 */
START_TEST(test_short_doctype)5347 START_TEST(test_short_doctype) {
5348 const char *text = "<!DOCTYPE doc></doc>";
5349 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5350 "DOCTYPE without subset not rejected");
5351 }
5352 END_TEST
5353
START_TEST(test_short_doctype_2)5354 START_TEST(test_short_doctype_2) {
5355 const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5356 expect_failure(text, XML_ERROR_SYNTAX,
5357 "DOCTYPE without Public ID not rejected");
5358 }
5359 END_TEST
5360
START_TEST(test_short_doctype_3)5361 START_TEST(test_short_doctype_3) {
5362 const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5363 expect_failure(text, XML_ERROR_SYNTAX,
5364 "DOCTYPE without System ID not rejected");
5365 }
5366 END_TEST
5367
START_TEST(test_long_doctype)5368 START_TEST(test_long_doctype) {
5369 const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5370 expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5371 }
5372 END_TEST
5373
START_TEST(test_bad_entity)5374 START_TEST(test_bad_entity) {
5375 const char *text = "<!DOCTYPE doc [\n"
5376 " <!ENTITY foo PUBLIC>\n"
5377 "]>\n"
5378 "<doc/>";
5379 expect_failure(text, XML_ERROR_SYNTAX,
5380 "ENTITY without Public ID is not rejected");
5381 }
5382 END_TEST
5383
5384 /* Test unquoted value is faulted */
START_TEST(test_bad_entity_2)5385 START_TEST(test_bad_entity_2) {
5386 const char *text = "<!DOCTYPE doc [\n"
5387 " <!ENTITY % foo bar>\n"
5388 "]>\n"
5389 "<doc/>";
5390 expect_failure(text, XML_ERROR_SYNTAX,
5391 "ENTITY without Public ID is not rejected");
5392 }
5393 END_TEST
5394
START_TEST(test_bad_entity_3)5395 START_TEST(test_bad_entity_3) {
5396 const char *text = "<!DOCTYPE doc [\n"
5397 " <!ENTITY % foo PUBLIC>\n"
5398 "]>\n"
5399 "<doc/>";
5400 expect_failure(text, XML_ERROR_SYNTAX,
5401 "Parameter ENTITY without Public ID is not rejected");
5402 }
5403 END_TEST
5404
START_TEST(test_bad_entity_4)5405 START_TEST(test_bad_entity_4) {
5406 const char *text = "<!DOCTYPE doc [\n"
5407 " <!ENTITY % foo SYSTEM>\n"
5408 "]>\n"
5409 "<doc/>";
5410 expect_failure(text, XML_ERROR_SYNTAX,
5411 "Parameter ENTITY without Public ID is not rejected");
5412 }
5413 END_TEST
5414
START_TEST(test_bad_notation)5415 START_TEST(test_bad_notation) {
5416 const char *text = "<!DOCTYPE doc [\n"
5417 " <!NOTATION n SYSTEM>\n"
5418 "]>\n"
5419 "<doc/>";
5420 expect_failure(text, XML_ERROR_SYNTAX,
5421 "Notation without System ID is not rejected");
5422 }
5423 END_TEST
5424
5425 /* Test for issue #11, wrongly suppressed default handler */
START_TEST(test_default_doctype_handler)5426 START_TEST(test_default_doctype_handler) {
5427 const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5428 " <!ENTITY foo 'bar'>\n"
5429 "]>\n"
5430 "<doc>&foo;</doc>";
5431 DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5432 {XCS("'test.dtd'"), 10, XML_FALSE},
5433 {NULL, 0, XML_FALSE}};
5434 int i;
5435
5436 XML_SetUserData(g_parser, &test_data);
5437 XML_SetDefaultHandler(g_parser, checking_default_handler);
5438 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5439 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5440 == XML_STATUS_ERROR)
5441 xml_failure(g_parser);
5442 for (i = 0; test_data[i].expected != NULL; i++)
5443 if (! test_data[i].seen)
5444 fail("Default handler not run for public !DOCTYPE");
5445 }
5446 END_TEST
5447
START_TEST(test_empty_element_abort)5448 START_TEST(test_empty_element_abort) {
5449 const char *text = "<abort/>";
5450
5451 XML_SetStartElementHandler(g_parser, start_element_suspender);
5452 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5453 != XML_STATUS_ERROR)
5454 fail("Expected to error on abort");
5455 }
5456 END_TEST
5457
5458 /* Regression test for GH issue #612: unfinished m_declAttributeType
5459 * allocation in ->m_tempPool can corrupt following allocation.
5460 */
START_TEST(test_pool_integrity_with_unfinished_attr)5461 START_TEST(test_pool_integrity_with_unfinished_attr) {
5462 const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5463 "<!DOCTYPE foo [\n"
5464 "<!ELEMENT foo ANY>\n"
5465 "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5466 "%entp;\n"
5467 "]>\n"
5468 "<a></a>\n";
5469 const XML_Char *expected = XCS("COMMENT");
5470 CharData storage;
5471
5472 CharData_Init(&storage);
5473 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5474 XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5475 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5476 XML_SetCommentHandler(g_parser, accumulate_comment);
5477 XML_SetUserData(g_parser, &storage);
5478 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5479 == XML_STATUS_ERROR)
5480 xml_failure(g_parser);
5481 CharData_CheckXMLChars(&storage, expected);
5482 }
5483 END_TEST
5484
5485 /* Test a possible early return location in internalEntityProcessor */
START_TEST(test_entity_ref_no_elements)5486 START_TEST(test_entity_ref_no_elements) {
5487 const char *const text = "<!DOCTYPE foo [\n"
5488 "<!ENTITY e1 \"test\">\n"
5489 "]> <foo>&e1;"; // intentionally missing newline
5490
5491 XML_Parser parser = XML_ParserCreate(NULL);
5492 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5493 == XML_STATUS_ERROR);
5494 assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS);
5495 XML_ParserFree(parser);
5496 }
5497 END_TEST
5498
5499 /* Tests if chained entity references lead to unbounded recursion */
START_TEST(test_deep_nested_entity)5500 START_TEST(test_deep_nested_entity) {
5501 const size_t N_LINES = 60000;
5502 const size_t SIZE_PER_LINE = 50;
5503
5504 char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5505 if (text == NULL) {
5506 fail("malloc failed");
5507 }
5508
5509 char *textPtr = text;
5510
5511 // Create the XML
5512 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5513 "<!DOCTYPE foo [\n"
5514 " <!ENTITY s0 'deepText'>\n");
5515
5516 for (size_t i = 1; i < N_LINES; ++i) {
5517 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n",
5518 (long unsigned)i, (long unsigned)(i - 1));
5519 }
5520
5521 snprintf(textPtr, SIZE_PER_LINE, "]> <foo>&s%lu;</foo>\n",
5522 (long unsigned)(N_LINES - 1));
5523
5524 const XML_Char *const expected = XCS("deepText");
5525
5526 CharData storage;
5527 CharData_Init(&storage);
5528
5529 XML_Parser parser = XML_ParserCreate(NULL);
5530
5531 XML_SetCharacterDataHandler(parser, accumulate_characters);
5532 XML_SetUserData(parser, &storage);
5533
5534 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5535 == XML_STATUS_ERROR)
5536 xml_failure(parser);
5537
5538 CharData_CheckXMLChars(&storage, expected);
5539 XML_ParserFree(parser);
5540 free(text);
5541 }
5542 END_TEST
5543
5544 /* Tests if chained entity references in attributes
5545 lead to unbounded recursion */
START_TEST(test_deep_nested_attribute_entity)5546 START_TEST(test_deep_nested_attribute_entity) {
5547 const size_t N_LINES = 60000;
5548 const size_t SIZE_PER_LINE = 100;
5549
5550 char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5551 if (text == NULL) {
5552 fail("malloc failed");
5553 }
5554
5555 char *textPtr = text;
5556
5557 // Create the XML
5558 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5559 "<!DOCTYPE foo [\n"
5560 " <!ENTITY s0 'deepText'>\n");
5561
5562 for (size_t i = 1; i < N_LINES; ++i) {
5563 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n",
5564 (long unsigned)i, (long unsigned)(i - 1));
5565 }
5566
5567 snprintf(textPtr, SIZE_PER_LINE, "]> <foo name='&s%lu;'>mainText</foo>\n",
5568 (long unsigned)(N_LINES - 1));
5569
5570 AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}};
5571 ElementInfo info[] = {{XCS("foo"), 1, NULL, NULL}, {NULL, 0, NULL, NULL}};
5572 info[0].attributes = doc_info;
5573
5574 XML_Parser parser = XML_ParserCreate(NULL);
5575 ParserAndElementInfo parserPlusElemenInfo = {parser, info};
5576
5577 XML_SetStartElementHandler(parser, counting_start_element_handler);
5578 XML_SetUserData(parser, &parserPlusElemenInfo);
5579
5580 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5581 == XML_STATUS_ERROR)
5582 xml_failure(parser);
5583
5584 XML_ParserFree(parser);
5585 free(text);
5586 }
5587 END_TEST
5588
START_TEST(test_deep_nested_entity_delayed_interpretation)5589 START_TEST(test_deep_nested_entity_delayed_interpretation) {
5590 const size_t N_LINES = 70000;
5591 const size_t SIZE_PER_LINE = 100;
5592
5593 char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5594 if (text == NULL) {
5595 fail("malloc failed");
5596 }
5597
5598 char *textPtr = text;
5599
5600 // Create the XML
5601 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5602 "<!DOCTYPE foo [\n"
5603 " <!ENTITY %% s0 'deepText'>\n");
5604
5605 for (size_t i = 1; i < N_LINES; ++i) {
5606 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5607 " <!ENTITY %% s%lu '%s%lu;'>\n", (long unsigned)i,
5608 (long unsigned)(i - 1));
5609 }
5610
5611 snprintf(textPtr, SIZE_PER_LINE,
5612 " <!ENTITY %% define_g \"<!ENTITY g '%s%lu;'>\">\n"
5613 " %%define_g;\n"
5614 "]>\n"
5615 "<foo/>\n",
5616 (long unsigned)(N_LINES - 1));
5617
5618 XML_Parser parser = XML_ParserCreate(NULL);
5619
5620 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5621 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5622 == XML_STATUS_ERROR)
5623 xml_failure(parser);
5624
5625 XML_ParserFree(parser);
5626 free(text);
5627 }
5628 END_TEST
5629
START_TEST(test_nested_entity_suspend)5630 START_TEST(test_nested_entity_suspend) {
5631 const char *const text = "<!DOCTYPE a [\n"
5632 " <!ENTITY e1 '<!--e1-->'>\n"
5633 " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5634 " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5635 "]>\n"
5636 "<a><!--start-->&e3;<!--end--></a>";
5637 const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5638 XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5639 CharData storage;
5640 CharData_Init(&storage);
5641 XML_Parser parser = XML_ParserCreate(NULL);
5642 ParserPlusStorage parserPlusStorage = {parser, &storage};
5643
5644 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5645 XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5646 XML_SetUserData(parser, &parserPlusStorage);
5647
5648 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5649 while (status == XML_STATUS_SUSPENDED) {
5650 status = XML_ResumeParser(parser);
5651 }
5652 if (status != XML_STATUS_OK)
5653 xml_failure(parser);
5654
5655 CharData_CheckXMLChars(&storage, expected);
5656 XML_ParserFree(parser);
5657 }
5658 END_TEST
5659
START_TEST(test_nested_entity_suspend_2)5660 START_TEST(test_nested_entity_suspend_2) {
5661 const char *const text = "<!DOCTYPE doc [\n"
5662 " <!ENTITY ge1 'head1Ztail1'>\n"
5663 " <!ENTITY ge2 'head2&ge1;tail2'>\n"
5664 " <!ENTITY ge3 'head3&ge2;tail3'>\n"
5665 "]>\n"
5666 "<doc>&ge3;</doc>";
5667 const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1")
5668 XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3");
5669 CharData storage;
5670 CharData_Init(&storage);
5671 XML_Parser parser = XML_ParserCreate(NULL);
5672 ParserPlusStorage parserPlusStorage = {parser, &storage};
5673
5674 XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend);
5675 XML_SetUserData(parser, &parserPlusStorage);
5676
5677 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5678 while (status == XML_STATUS_SUSPENDED) {
5679 status = XML_ResumeParser(parser);
5680 }
5681 if (status != XML_STATUS_OK)
5682 xml_failure(parser);
5683
5684 CharData_CheckXMLChars(&storage, expected);
5685 XML_ParserFree(parser);
5686 }
5687 END_TEST
5688
5689 /* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_scale_linearly)5690 START_TEST(test_big_tokens_scale_linearly) {
5691 const struct {
5692 const char *pre;
5693 const char *post;
5694 } text[] = {
5695 {"<a>", "</a>"}, // assumed good, used as baseline
5696 {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5697 {"<c attr='", "'></c>"}, // big attribute, used to be O(N²)
5698 {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²)
5699 {"<e><", "/></e>"}, // big elem name, used to be O(N²)
5700 };
5701 const int num_cases = sizeof(text) / sizeof(text[0]);
5702 char aaaaaa[4096];
5703 const int fillsize = (int)sizeof(aaaaaa);
5704 const int fillcount = 100;
5705 const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5706 const unsigned max_factor = 4;
5707 const unsigned max_scanned = max_factor * approx_bytes;
5708
5709 memset(aaaaaa, 'a', fillsize);
5710
5711 if (! g_reparseDeferralEnabledDefault) {
5712 return; // heuristic is disabled; we would get O(n^2) and fail.
5713 }
5714
5715 for (int i = 0; i < num_cases; ++i) {
5716 XML_Parser parser = XML_ParserCreate(NULL);
5717 assert_true(parser != NULL);
5718 enum XML_Status status;
5719 set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
5720
5721 // parse the start text
5722 g_bytesScanned = 0;
5723 status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5724 (int)strlen(text[i].pre), XML_FALSE);
5725 if (status != XML_STATUS_OK) {
5726 xml_failure(parser);
5727 }
5728
5729 // parse lots of 'a', failing the test early if it takes too long
5730 unsigned past_max_count = 0;
5731 for (int f = 0; f < fillcount; ++f) {
5732 status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5733 if (status != XML_STATUS_OK) {
5734 xml_failure(parser);
5735 }
5736 if (g_bytesScanned > max_scanned) {
5737 // We're not done, and have already passed the limit -- the test will
5738 // definitely fail. This block allows us to save time by failing early.
5739 const unsigned pushed
5740 = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
5741 fprintf(
5742 stderr,
5743 "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5744 f + 1, fillcount, pushed, g_bytesScanned,
5745 g_bytesScanned / (double)pushed, max_scanned, max_factor);
5746 past_max_count++;
5747 // We are failing, but allow a few log prints first. If we don't reach
5748 // a count of five, the test will fail after the loop instead.
5749 assert_true(past_max_count < 5);
5750 }
5751 }
5752
5753 // parse the end text
5754 status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5755 (int)strlen(text[i].post), XML_TRUE);
5756 if (status != XML_STATUS_OK) {
5757 xml_failure(parser);
5758 }
5759
5760 assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
5761 if (g_bytesScanned > max_scanned) {
5762 fprintf(
5763 stderr,
5764 "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5765 g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
5766 max_factor);
5767 fail("scanned too many bytes");
5768 }
5769
5770 XML_ParserFree(parser);
5771 }
5772 }
5773 END_TEST
5774
START_TEST(test_set_reparse_deferral)5775 START_TEST(test_set_reparse_deferral) {
5776 const char *const pre = "<d>";
5777 const char *const start = "<x attr='";
5778 const char *const end = "'></x>";
5779 char eeeeee[100];
5780 const int fillsize = (int)sizeof(eeeeee);
5781 memset(eeeeee, 'e', fillsize);
5782
5783 for (int enabled = 0; enabled <= 1; enabled += 1) {
5784 set_subtest("deferral=%d", enabled);
5785
5786 XML_Parser parser = XML_ParserCreate(NULL);
5787 assert_true(parser != NULL);
5788 assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5789 // pre-grow the buffer to avoid reparsing due to almost-fullness
5790 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5791
5792 CharData storage;
5793 CharData_Init(&storage);
5794 XML_SetUserData(parser, &storage);
5795 XML_SetStartElementHandler(parser, start_element_event_handler);
5796
5797 enum XML_Status status;
5798 // parse the start text
5799 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5800 if (status != XML_STATUS_OK) {
5801 xml_failure(parser);
5802 }
5803 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5804
5805 // ..and the start of the token
5806 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5807 if (status != XML_STATUS_OK) {
5808 xml_failure(parser);
5809 }
5810 CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5811
5812 // try to parse lots of 'e', but the token isn't finished
5813 for (int c = 0; c < 100; ++c) {
5814 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5815 if (status != XML_STATUS_OK) {
5816 xml_failure(parser);
5817 }
5818 }
5819 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5820
5821 // end the <x> token.
5822 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5823 if (status != XML_STATUS_OK) {
5824 xml_failure(parser);
5825 }
5826
5827 if (enabled) {
5828 // In general, we may need to push more data to trigger a reparse attempt,
5829 // but in this test, the data is constructed to always require it.
5830 CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5831 // 2x the token length should suffice; the +1 covers the start and end.
5832 for (int c = 0; c < 101; ++c) {
5833 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5834 if (status != XML_STATUS_OK) {
5835 xml_failure(parser);
5836 }
5837 }
5838 }
5839 CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5840
5841 XML_ParserFree(parser);
5842 }
5843 }
5844 END_TEST
5845
5846 struct element_decl_data {
5847 XML_Parser parser;
5848 int count;
5849 };
5850
5851 static void
element_decl_counter(void * userData,const XML_Char * name,XML_Content * model)5852 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5853 UNUSED_P(name);
5854 struct element_decl_data *testdata = (struct element_decl_data *)userData;
5855 testdata->count += 1;
5856 XML_FreeContentModel(testdata->parser, model);
5857 }
5858
5859 static int
external_inherited_parser(XML_Parser p,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)5860 external_inherited_parser(XML_Parser p, const XML_Char *context,
5861 const XML_Char *base, const XML_Char *systemId,
5862 const XML_Char *publicId) {
5863 UNUSED_P(base);
5864 UNUSED_P(systemId);
5865 UNUSED_P(publicId);
5866 const char *const pre = "<!ELEMENT document ANY>\n";
5867 const char *const start = "<!ELEMENT ";
5868 const char *const end = " ANY>\n";
5869 const char *const post = "<!ELEMENT xyz ANY>\n";
5870 const int enabled = *(int *)XML_GetUserData(p);
5871 char eeeeee[100];
5872 char spaces[100];
5873 const int fillsize = (int)sizeof(eeeeee);
5874 assert_true(fillsize == (int)sizeof(spaces));
5875 memset(eeeeee, 'e', fillsize);
5876 memset(spaces, ' ', fillsize);
5877
5878 XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5879 assert_true(parser != NULL);
5880 // pre-grow the buffer to avoid reparsing due to almost-fullness
5881 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5882
5883 struct element_decl_data testdata;
5884 testdata.parser = parser;
5885 testdata.count = 0;
5886 XML_SetUserData(parser, &testdata);
5887 XML_SetElementDeclHandler(parser, element_decl_counter);
5888
5889 enum XML_Status status;
5890 // parse the initial text
5891 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5892 if (status != XML_STATUS_OK) {
5893 xml_failure(parser);
5894 }
5895 assert_true(testdata.count == 1); // first element should be done
5896
5897 // ..and the start of the big token
5898 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5899 if (status != XML_STATUS_OK) {
5900 xml_failure(parser);
5901 }
5902 assert_true(testdata.count == 1); // still just the first one
5903
5904 // try to parse lots of 'e', but the token isn't finished
5905 for (int c = 0; c < 100; ++c) {
5906 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5907 if (status != XML_STATUS_OK) {
5908 xml_failure(parser);
5909 }
5910 }
5911 assert_true(testdata.count == 1); // *still* just the first one
5912
5913 // end the big token.
5914 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5915 if (status != XML_STATUS_OK) {
5916 xml_failure(parser);
5917 }
5918
5919 if (enabled) {
5920 // In general, we may need to push more data to trigger a reparse attempt,
5921 // but in this test, the data is constructed to always require it.
5922 assert_true(testdata.count == 1); // or the test is incorrect
5923 // 2x the token length should suffice; the +1 covers the start and end.
5924 for (int c = 0; c < 101; ++c) {
5925 status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5926 if (status != XML_STATUS_OK) {
5927 xml_failure(parser);
5928 }
5929 }
5930 }
5931 assert_true(testdata.count == 2); // the big token should be done
5932
5933 // parse the final text
5934 status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5935 if (status != XML_STATUS_OK) {
5936 xml_failure(parser);
5937 }
5938 assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5939
5940 XML_ParserFree(parser);
5941 return XML_STATUS_OK;
5942 }
5943
START_TEST(test_reparse_deferral_is_inherited)5944 START_TEST(test_reparse_deferral_is_inherited) {
5945 const char *const text
5946 = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5947 for (int enabled = 0; enabled <= 1; ++enabled) {
5948 set_subtest("deferral=%d", enabled);
5949
5950 XML_Parser parser = XML_ParserCreate(NULL);
5951 assert_true(parser != NULL);
5952 XML_SetUserData(parser, (void *)&enabled);
5953 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5954 // this handler creates a sub-parser and checks that its deferral behavior
5955 // is what we expected, based on the value of `enabled` (in userdata).
5956 XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5957 assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5958 if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5959 xml_failure(parser);
5960
5961 XML_ParserFree(parser);
5962 }
5963 }
5964 END_TEST
5965
START_TEST(test_set_reparse_deferral_on_null_parser)5966 START_TEST(test_set_reparse_deferral_on_null_parser) {
5967 assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5968 assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5969 assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5970 assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5971 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5972 == XML_FALSE);
5973 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5974 == XML_FALSE);
5975 }
5976 END_TEST
5977
START_TEST(test_set_reparse_deferral_on_the_fly)5978 START_TEST(test_set_reparse_deferral_on_the_fly) {
5979 const char *const pre = "<d><x attr='";
5980 const char *const end = "'></x>";
5981 char iiiiii[100];
5982 const int fillsize = (int)sizeof(iiiiii);
5983 memset(iiiiii, 'i', fillsize);
5984
5985 XML_Parser parser = XML_ParserCreate(NULL);
5986 assert_true(parser != NULL);
5987 assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5988
5989 CharData storage;
5990 CharData_Init(&storage);
5991 XML_SetUserData(parser, &storage);
5992 XML_SetStartElementHandler(parser, start_element_event_handler);
5993
5994 enum XML_Status status;
5995 // parse the start text
5996 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5997 if (status != XML_STATUS_OK) {
5998 xml_failure(parser);
5999 }
6000 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
6001
6002 // try to parse some 'i', but the token isn't finished
6003 status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
6004 if (status != XML_STATUS_OK) {
6005 xml_failure(parser);
6006 }
6007 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
6008
6009 // end the <x> token.
6010 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
6011 if (status != XML_STATUS_OK) {
6012 xml_failure(parser);
6013 }
6014 CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
6015
6016 // now change the heuristic setting and add *no* data
6017 assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
6018 // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
6019 status = XML_Parse(parser, "", 0, XML_FALSE);
6020 if (status != XML_STATUS_OK) {
6021 xml_failure(parser);
6022 }
6023 CharData_CheckXMLChars(&storage, XCS("dx"));
6024
6025 XML_ParserFree(parser);
6026 }
6027 END_TEST
6028
START_TEST(test_set_bad_reparse_option)6029 START_TEST(test_set_bad_reparse_option) {
6030 XML_Parser parser = XML_ParserCreate(NULL);
6031 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
6032 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
6033 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
6034 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
6035 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
6036 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
6037 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
6038 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
6039 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
6040 XML_ParserFree(parser);
6041 }
6042 END_TEST
6043
6044 static size_t g_totalAlloc = 0;
6045 static size_t g_biggestAlloc = 0;
6046
6047 static void *
counting_realloc(void * ptr,size_t size)6048 counting_realloc(void *ptr, size_t size) {
6049 g_totalAlloc += size;
6050 if (size > g_biggestAlloc) {
6051 g_biggestAlloc = size;
6052 }
6053 return realloc(ptr, size);
6054 }
6055
6056 static void *
counting_malloc(size_t size)6057 counting_malloc(size_t size) {
6058 return counting_realloc(NULL, size);
6059 }
6060
START_TEST(test_bypass_heuristic_when_close_to_bufsize)6061 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
6062 if (g_chunkSize != 0) {
6063 // this test does not use SINGLE_BYTES, because it depends on very precise
6064 // buffer fills.
6065 return;
6066 }
6067 if (! g_reparseDeferralEnabledDefault) {
6068 return; // this test is irrelevant when the deferral heuristic is disabled.
6069 }
6070
6071 const int document_length = 65536;
6072 char *const document = (char *)malloc(document_length);
6073 assert_true(document != NULL);
6074
6075 const XML_Memory_Handling_Suite memfuncs = {
6076 counting_malloc,
6077 counting_realloc,
6078 free,
6079 };
6080
6081 const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
6082 const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
6083 const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
6084
6085 for (const int *leading = leading_list; *leading >= 0; leading++) {
6086 for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
6087 for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
6088 set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
6089 *fillsize);
6090 // start by checking that the test looks reasonably valid
6091 assert_true(*leading + *bigtoken <= document_length);
6092
6093 // put 'x' everywhere; some will be overwritten by elements.
6094 memset(document, 'x', document_length);
6095 // maybe add an initial tag
6096 if (*leading) {
6097 assert_true(*leading >= 3); // or the test case is invalid
6098 memcpy(document, "<a>", 3);
6099 }
6100 // add the large token
6101 document[*leading + 0] = '<';
6102 document[*leading + 1] = 'b';
6103 memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
6104 document[*leading + *bigtoken - 1] = '>';
6105
6106 // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
6107 const int expected_elem_total = 1 + (*leading ? 1 : 0);
6108
6109 XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
6110 assert_true(parser != NULL);
6111
6112 CharData storage;
6113 CharData_Init(&storage);
6114 XML_SetUserData(parser, &storage);
6115 XML_SetStartElementHandler(parser, start_element_event_handler);
6116
6117 g_biggestAlloc = 0;
6118 g_totalAlloc = 0;
6119 int offset = 0;
6120 // fill data until the big token is covered (but not necessarily parsed)
6121 while (offset < *leading + *bigtoken) {
6122 assert_true(offset + *fillsize <= document_length);
6123 const enum XML_Status status
6124 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6125 if (status != XML_STATUS_OK) {
6126 xml_failure(parser);
6127 }
6128 offset += *fillsize;
6129 }
6130 // Now, check that we've had a buffer allocation that could fit the
6131 // context bytes and our big token. In order to detect a special case,
6132 // we need to know how many bytes of our big token were included in the
6133 // first push that contained _any_ bytes of the big token:
6134 const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
6135 if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
6136 // Special case: we aren't saving any context, and the whole big token
6137 // was covered by a single fill, so Expat may have parsed directly
6138 // from our input pointer, without allocating an internal buffer.
6139 } else if (*leading < XML_CONTEXT_BYTES) {
6140 assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
6141 } else {
6142 assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
6143 }
6144 // fill data until the big token is actually parsed
6145 while (storage.count < expected_elem_total) {
6146 const size_t alloc_before = g_totalAlloc;
6147 assert_true(offset + *fillsize <= document_length);
6148 const enum XML_Status status
6149 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6150 if (status != XML_STATUS_OK) {
6151 xml_failure(parser);
6152 }
6153 offset += *fillsize;
6154 // since all the bytes of the big token are already in the buffer,
6155 // the bufsize ceiling should make us finish its parsing without any
6156 // further buffer allocations. We assume that there will be no other
6157 // large allocations in this test.
6158 assert_true(g_totalAlloc - alloc_before < 4096);
6159 }
6160 // test-the-test: was our alloc even called?
6161 assert_true(g_totalAlloc > 0);
6162 // test-the-test: there shouldn't be any extra start elements
6163 assert_true(storage.count == expected_elem_total);
6164
6165 XML_ParserFree(parser);
6166 }
6167 }
6168 }
6169 free(document);
6170 }
6171 END_TEST
6172
START_TEST(test_varying_buffer_fills)6173 START_TEST(test_varying_buffer_fills) {
6174 const int KiB = 1024;
6175 const int MiB = 1024 * KiB;
6176 const int document_length = 16 * MiB;
6177 const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
6178
6179 if (g_chunkSize != 0) {
6180 return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
6181 }
6182
6183 char *const document = (char *)malloc(document_length);
6184 assert_true(document != NULL);
6185 memset(document, 'x', document_length);
6186 document[0] = '<';
6187 document[1] = 't';
6188 memset(&document[2], ' ', big - 2); // a very spacy token
6189 document[big - 1] = '>';
6190
6191 // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
6192 // When reparse deferral is enabled, the final (negated) value is the expected
6193 // maximum number of bytes scanned in parse attempts.
6194 const int testcases[][30] = {
6195 {8 * MiB, -8 * MiB},
6196 {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
6197 // zero-size fills shouldn't trigger the bypass
6198 {4 * MiB, 0, 4 * MiB, -12 * MiB},
6199 {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
6200 {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
6201 // try to hit the buffer ceiling only once (at the end)
6202 {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
6203 // try to hit the same buffer ceiling multiple times
6204 {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
6205
6206 // try to hit every ceiling, by always landing 1K shy of the buffer size
6207 {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
6208 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
6209
6210 // try to avoid every ceiling, by always landing 1B past the buffer size
6211 // the normal 2x heuristic threshold still forces parse attempts.
6212 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
6213 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
6214 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
6215 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
6216 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6217 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6218 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7
6219 -(10 * MiB + 682 * KiB + 7)},
6220 // try to avoid every ceiling again, except on our last fill.
6221 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
6222 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
6223 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
6224 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
6225 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6226 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6227 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
6228 -(10 * MiB + 682 * KiB + 6)},
6229
6230 // try to hit ceilings on the way multiple times
6231 {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
6232 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
6233 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer
6234 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer
6235 // we'll make a parse attempt at every parse call
6236 -(45 * MiB + 12)},
6237 };
6238 const int testcount = sizeof(testcases) / sizeof(testcases[0]);
6239 for (int test_i = 0; test_i < testcount; test_i++) {
6240 const int *fillsize = testcases[test_i];
6241 set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
6242 fillsize[2], fillsize[3]);
6243 XML_Parser parser = XML_ParserCreate(NULL);
6244 assert_true(parser != NULL);
6245
6246 CharData storage;
6247 CharData_Init(&storage);
6248 XML_SetUserData(parser, &storage);
6249 XML_SetStartElementHandler(parser, start_element_event_handler);
6250
6251 g_bytesScanned = 0;
6252 int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
6253 int offset = 0;
6254 while (*fillsize >= 0) {
6255 assert_true(offset + *fillsize <= document_length); // or test is invalid
6256 const enum XML_Status status
6257 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6258 if (status != XML_STATUS_OK) {
6259 xml_failure(parser);
6260 }
6261 offset += *fillsize;
6262 fillsize++;
6263 assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
6264 worstcase_bytes += offset; // we might've tried to parse all pending bytes
6265 }
6266 assert_true(storage.count == 1); // the big token should've been parsed
6267 assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
6268 if (g_reparseDeferralEnabledDefault) {
6269 // heuristic is enabled; some XML_Parse calls may have deferred reparsing
6270 const unsigned max_bytes_scanned = -*fillsize;
6271 if (g_bytesScanned > max_bytes_scanned) {
6272 fprintf(stderr,
6273 "bytes scanned in parse attempts: actual=%u limit=%u \n",
6274 g_bytesScanned, max_bytes_scanned);
6275 fail("too many bytes scanned in parse attempts");
6276 }
6277 }
6278 assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
6279
6280 XML_ParserFree(parser);
6281 }
6282 free(document);
6283 }
6284 END_TEST
6285
START_TEST(test_empty_ext_param_entity_in_value)6286 START_TEST(test_empty_ext_param_entity_in_value) {
6287 const char *text = "<!DOCTYPE r SYSTEM \"ext.dtd\"><r/>";
6288 ExtOption options[] = {
6289 {XCS("ext.dtd"), "<!ENTITY % pe SYSTEM \"empty\">"
6290 "<!ENTITY ge \"%pe;\">"},
6291 {XCS("empty"), ""},
6292 {NULL, NULL},
6293 };
6294
6295 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
6296 XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner);
6297 XML_SetUserData(g_parser, options);
6298 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
6299 == XML_STATUS_ERROR)
6300 xml_failure(g_parser);
6301 }
6302 END_TEST
6303
6304 void
make_basic_test_case(Suite * s)6305 make_basic_test_case(Suite *s) {
6306 TCase *tc_basic = tcase_create("basic tests");
6307
6308 suite_add_tcase(s, tc_basic);
6309 tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
6310
6311 tcase_add_test(tc_basic, test_nul_byte);
6312 tcase_add_test(tc_basic, test_u0000_char);
6313 tcase_add_test(tc_basic, test_siphash_self);
6314 tcase_add_test(tc_basic, test_siphash_spec);
6315 tcase_add_test(tc_basic, test_bom_utf8);
6316 tcase_add_test(tc_basic, test_bom_utf16_be);
6317 tcase_add_test(tc_basic, test_bom_utf16_le);
6318 tcase_add_test(tc_basic, test_nobom_utf16_le);
6319 tcase_add_test(tc_basic, test_hash_collision);
6320 tcase_add_test(tc_basic, test_hash_salt_setter);
6321 tcase_add_test(tc_basic, test_illegal_utf8);
6322 tcase_add_test(tc_basic, test_utf8_auto_align);
6323 tcase_add_test(tc_basic, test_utf16);
6324 tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
6325 tcase_add_test(tc_basic, test_not_utf16);
6326 tcase_add_test(tc_basic, test_bad_encoding);
6327 tcase_add_test(tc_basic, test_latin1_umlauts);
6328 tcase_add_test(tc_basic, test_long_utf8_character);
6329 tcase_add_test(tc_basic, test_long_latin1_attribute);
6330 tcase_add_test(tc_basic, test_long_ascii_attribute);
6331 /* Regression test for SF bug #491986. */
6332 tcase_add_test(tc_basic, test_danish_latin1);
6333 /* Regression test for SF bug #514281. */
6334 tcase_add_test(tc_basic, test_french_charref_hexidecimal);
6335 tcase_add_test(tc_basic, test_french_charref_decimal);
6336 tcase_add_test(tc_basic, test_french_latin1);
6337 tcase_add_test(tc_basic, test_french_utf8);
6338 tcase_add_test(tc_basic, test_utf8_false_rejection);
6339 tcase_add_test(tc_basic, test_line_number_after_parse);
6340 tcase_add_test(tc_basic, test_column_number_after_parse);
6341 tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
6342 tcase_add_test(tc_basic, test_line_number_after_error);
6343 tcase_add_test(tc_basic, test_column_number_after_error);
6344 tcase_add_test(tc_basic, test_really_long_lines);
6345 tcase_add_test(tc_basic, test_really_long_encoded_lines);
6346 tcase_add_test(tc_basic, test_end_element_events);
6347 tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
6348 tcase_add_test(tc_basic, test_attr_whitespace_normalization);
6349 tcase_add_test(tc_basic, test_xmldecl_misplaced);
6350 tcase_add_test(tc_basic, test_xmldecl_invalid);
6351 tcase_add_test(tc_basic, test_xmldecl_missing_attr);
6352 tcase_add_test(tc_basic, test_xmldecl_missing_value);
6353 tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
6354 tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
6355 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
6356 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
6357 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
6358 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
6359 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
6360 tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
6361 tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
6362 tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
6363 tcase_add_test(tc_basic,
6364 test_wfc_undeclared_entity_with_external_subset_standalone);
6365 tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
6366 tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
6367 tcase_add_test(tc_basic, test_not_standalone_handler_reject);
6368 tcase_add_test(tc_basic, test_not_standalone_handler_accept);
6369 tcase_add_test(tc_basic, test_entity_start_tag_level_greater_than_one);
6370 tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
6371 tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs);
6372 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
6373 tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
6374 tcase_add_test(tc_basic, test_dtd_attr_handling);
6375 tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
6376 tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
6377 tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
6378 tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
6379 tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
6380 tcase_add_test(tc_basic, test_good_cdata_ascii);
6381 tcase_add_test(tc_basic, test_good_cdata_utf16);
6382 tcase_add_test(tc_basic, test_good_cdata_utf16_le);
6383 tcase_add_test(tc_basic, test_long_cdata_utf16);
6384 tcase_add_test(tc_basic, test_multichar_cdata_utf16);
6385 tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
6386 tcase_add_test(tc_basic, test_bad_cdata);
6387 tcase_add_test(tc_basic, test_bad_cdata_utf16);
6388 tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
6389 tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
6390 tcase_add_test(tc_basic, test_memory_allocation);
6391 tcase_add_test__if_xml_ge(tc_basic, test_default_current);
6392 tcase_add_test(tc_basic, test_dtd_elements);
6393 tcase_add_test(tc_basic, test_dtd_elements_nesting);
6394 tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
6395 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
6396 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
6397 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
6398 tcase_add_test__ifdef_xml_dtd(tc_basic,
6399 test_foreign_dtd_without_external_subset);
6400 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
6401 tcase_add_test(tc_basic, test_set_base);
6402 tcase_add_test(tc_basic, test_attributes);
6403 tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
6404 tcase_add_test(tc_basic, test_resume_invalid_parse);
6405 tcase_add_test(tc_basic, test_resume_resuspended);
6406 tcase_add_test(tc_basic, test_cdata_default);
6407 tcase_add_test(tc_basic, test_subordinate_reset);
6408 tcase_add_test(tc_basic, test_subordinate_suspend);
6409 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
6410 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
6411 tcase_add_test__ifdef_xml_dtd(tc_basic,
6412 test_ext_entity_invalid_suspended_parse);
6413 tcase_add_test(tc_basic, test_explicit_encoding);
6414 tcase_add_test(tc_basic, test_trailing_cr);
6415 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
6416 tcase_add_test(tc_basic, test_trailing_rsqb);
6417 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
6418 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
6419 tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
6420 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
6421 tcase_add_test(tc_basic, test_empty_parse);
6422 tcase_add_test(tc_basic, test_negative_len_parse);
6423 tcase_add_test(tc_basic, test_negative_len_parse_buffer);
6424 tcase_add_test(tc_basic, test_get_buffer_1);
6425 tcase_add_test(tc_basic, test_get_buffer_2);
6426 #if XML_CONTEXT_BYTES > 0
6427 tcase_add_test(tc_basic, test_get_buffer_3_overflow);
6428 #endif
6429 tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
6430 tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
6431 tcase_add_test(tc_basic, test_byte_info_at_end);
6432 tcase_add_test(tc_basic, test_byte_info_at_error);
6433 tcase_add_test(tc_basic, test_byte_info_at_cdata);
6434 tcase_add_test(tc_basic, test_predefined_entities);
6435 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
6436 tcase_add_test(tc_basic, test_not_predefined_entities);
6437 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
6438 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
6439 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
6440 tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
6441 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
6442 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
6443 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
6444 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
6445 tcase_add_test(tc_basic, test_bad_public_doctype);
6446 tcase_add_test(tc_basic, test_attribute_enum_value);
6447 tcase_add_test(tc_basic, test_predefined_entity_redefinition);
6448 tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
6449 tcase_add_test(tc_basic, test_public_notation_no_sysid);
6450 tcase_add_test(tc_basic, test_nested_groups);
6451 tcase_add_test(tc_basic, test_group_choice);
6452 tcase_add_test(tc_basic, test_standalone_parameter_entity);
6453 tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
6454 tcase_add_test__ifdef_xml_dtd(tc_basic,
6455 test_recursive_external_parameter_entity);
6456 tcase_add_test__ifdef_xml_dtd(tc_basic,
6457 test_recursive_external_parameter_entity_2);
6458 tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
6459 tcase_add_test(tc_basic, test_suspend_xdecl);
6460 tcase_add_test(tc_basic, test_abort_epilog);
6461 tcase_add_test(tc_basic, test_abort_epilog_2);
6462 tcase_add_test(tc_basic, test_suspend_epilog);
6463 tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
6464 tcase_add_test(tc_basic, test_unfinished_epilog);
6465 tcase_add_test(tc_basic, test_partial_char_in_epilog);
6466 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6467 tcase_add_test__ifdef_xml_dtd(tc_basic,
6468 test_suspend_resume_internal_entity_issue_629);
6469 tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6470 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6471 tcase_add_test(tc_basic, test_restart_on_error);
6472 tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6473 tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6474 tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6475 tcase_add_test(tc_basic, test_standalone_internal_entity);
6476 tcase_add_test(tc_basic, test_skipped_external_entity);
6477 tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6478 tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6479 tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6480 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6481 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6482 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6483 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6484 tcase_add_test(tc_basic, test_pi_handled_in_default);
6485 tcase_add_test(tc_basic, test_comment_handled_in_default);
6486 tcase_add_test(tc_basic, test_pi_yml);
6487 tcase_add_test(tc_basic, test_pi_xnl);
6488 tcase_add_test(tc_basic, test_pi_xmm);
6489 tcase_add_test(tc_basic, test_utf16_pi);
6490 tcase_add_test(tc_basic, test_utf16_be_pi);
6491 tcase_add_test(tc_basic, test_utf16_be_comment);
6492 tcase_add_test(tc_basic, test_utf16_le_comment);
6493 tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6494 tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6495 tcase_add_test(tc_basic, test_unknown_encoding_success);
6496 tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6497 tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6498 tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6499 tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6500 tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6501 tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6502 tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6503 tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6504 tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6505 tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6506 tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6507 tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6508 tcase_add_test(tc_basic, test_unknown_encoding_user_data_primary);
6509 tcase_add_test(tc_basic, test_unknown_encoding_user_data_secondary);
6510 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6511 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6512 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6513 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6514 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6515 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6516 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6517 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6518 tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6519 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6520 tcase_add_test(tc_basic, test_utf8_in_start_tags);
6521 tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6522 tcase_add_test(tc_basic, test_utf16_attribute);
6523 tcase_add_test(tc_basic, test_utf16_second_attr);
6524 tcase_add_test(tc_basic, test_attr_after_solidus);
6525 tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6526 tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6527 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6528 tcase_add_test(tc_basic, test_bad_doctype);
6529 tcase_add_test(tc_basic, test_bad_doctype_utf8);
6530 tcase_add_test(tc_basic, test_bad_doctype_utf16);
6531 tcase_add_test(tc_basic, test_bad_doctype_plus);
6532 tcase_add_test(tc_basic, test_bad_doctype_star);
6533 tcase_add_test(tc_basic, test_bad_doctype_query);
6534 tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6535 tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6536 tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6537 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6538 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6539 tcase_add_test(tc_basic, test_short_doctype);
6540 tcase_add_test(tc_basic, test_short_doctype_2);
6541 tcase_add_test(tc_basic, test_short_doctype_3);
6542 tcase_add_test(tc_basic, test_long_doctype);
6543 tcase_add_test(tc_basic, test_bad_entity);
6544 tcase_add_test(tc_basic, test_bad_entity_2);
6545 tcase_add_test(tc_basic, test_bad_entity_3);
6546 tcase_add_test(tc_basic, test_bad_entity_4);
6547 tcase_add_test(tc_basic, test_bad_notation);
6548 tcase_add_test(tc_basic, test_default_doctype_handler);
6549 tcase_add_test(tc_basic, test_empty_element_abort);
6550 tcase_add_test__ifdef_xml_dtd(tc_basic,
6551 test_pool_integrity_with_unfinished_attr);
6552 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_ext_param_entity_in_value);
6553 tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements);
6554 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity);
6555 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity);
6556 tcase_add_test__if_xml_ge(tc_basic,
6557 test_deep_nested_entity_delayed_interpretation);
6558 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6559 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2);
6560 tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6561 tcase_add_test(tc_basic, test_set_reparse_deferral);
6562 tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6563 tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6564 tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6565 tcase_add_test(tc_basic, test_set_bad_reparse_option);
6566 tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6567 tcase_add_test(tc_basic, test_varying_buffer_fills);
6568 }
6569