xref: /freebsd/contrib/expat/tests/basic_tests.c (revision 627b778d9e6b603a44a010d22d823ca7c392b363)
1 /* Tests in the "basic" test case for the Expat test suite
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org>
14    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
17    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21    Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22    Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com>
23    Licensed under the MIT license:
24 
25    Permission is  hereby granted,  free of charge,  to any  person obtaining
26    a  copy  of  this  software   and  associated  documentation  files  (the
27    "Software"),  to  deal in  the  Software  without restriction,  including
28    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
29    distribute, sublicense, and/or sell copies of the Software, and to permit
30    persons  to whom  the Software  is  furnished to  do so,  subject to  the
31    following conditions:
32 
33    The above copyright  notice and this permission notice  shall be included
34    in all copies or substantial portions of the Software.
35 
36    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
37    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
38    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
39    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
40    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
41    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
42    USE OR OTHER DEALINGS IN THE SOFTWARE.
43 */
44 
45 #if defined(NDEBUG)
46 #  undef NDEBUG /* because test suite relies on assert(...) at the moment */
47 #endif
48 
49 #include <assert.h>
50 
51 #include <stdio.h>
52 #include <string.h>
53 #include <time.h>
54 
55 #if ! defined(__cplusplus)
56 #  include <stdbool.h>
57 #endif
58 
59 #include "expat_config.h"
60 
61 #include "expat.h"
62 #include "internal.h"
63 #include "minicheck.h"
64 #include "structdata.h"
65 #include "common.h"
66 #include "dummy.h"
67 #include "handlers.h"
68 #include "siphash.h"
69 #include "basic_tests.h"
70 
71 static void
basic_setup(void)72 basic_setup(void) {
73   g_parser = XML_ParserCreate(NULL);
74   if (g_parser == NULL)
75     fail("Parser not created.");
76 }
77 
78 /*
79  * Character & encoding tests.
80  */
81 
START_TEST(test_nul_byte)82 START_TEST(test_nul_byte) {
83   char text[] = "<doc>\0</doc>";
84 
85   /* test that a NUL byte (in US-ASCII data) is an error */
86   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
87       == XML_STATUS_OK)
88     fail("Parser did not report error on NUL-byte.");
89   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
90     xml_failure(g_parser);
91 }
92 END_TEST
93 
START_TEST(test_u0000_char)94 START_TEST(test_u0000_char) {
95   /* test that a NUL byte (in US-ASCII data) is an error */
96   expect_failure("<doc>&#0;</doc>", XML_ERROR_BAD_CHAR_REF,
97                  "Parser did not report error on NUL-byte.");
98 }
99 END_TEST
100 
START_TEST(test_siphash_self)101 START_TEST(test_siphash_self) {
102   if (! sip24_valid())
103     fail("SipHash self-test failed");
104 }
105 END_TEST
106 
START_TEST(test_siphash_spec)107 START_TEST(test_siphash_spec) {
108   /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
109   const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
110                          "\x0a\x0b\x0c\x0d\x0e";
111   const size_t len = sizeof(message) - 1;
112   const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
113   struct siphash state;
114   struct sipkey key;
115 
116   sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
117                   "\x0a\x0b\x0c\x0d\x0e\x0f");
118   sip24_init(&state, &key);
119 
120   /* Cover spread across calls */
121   sip24_update(&state, message, 4);
122   sip24_update(&state, message + 4, len - 4);
123 
124   /* Cover null length */
125   sip24_update(&state, message, 0);
126 
127   if (sip24_final(&state) != expected)
128     fail("sip24_final failed spec test\n");
129 
130   /* Cover wrapper */
131   if (siphash24(message, len, &key) != expected)
132     fail("siphash24 failed spec test\n");
133 }
134 END_TEST
135 
START_TEST(test_bom_utf8)136 START_TEST(test_bom_utf8) {
137   /* This test is really just making sure we don't core on a UTF-8 BOM. */
138   const char *text = "\357\273\277<e/>";
139 
140   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
141       == XML_STATUS_ERROR)
142     xml_failure(g_parser);
143 }
144 END_TEST
145 
START_TEST(test_bom_utf16_be)146 START_TEST(test_bom_utf16_be) {
147   char text[] = "\376\377\0<\0e\0/\0>";
148 
149   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
150       == XML_STATUS_ERROR)
151     xml_failure(g_parser);
152 }
153 END_TEST
154 
START_TEST(test_bom_utf16_le)155 START_TEST(test_bom_utf16_le) {
156   char text[] = "\377\376<\0e\0/\0>\0";
157 
158   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
159       == XML_STATUS_ERROR)
160     xml_failure(g_parser);
161 }
162 END_TEST
163 
START_TEST(test_nobom_utf16_le)164 START_TEST(test_nobom_utf16_le) {
165   char text[] = " \0<\0e\0/\0>\0";
166 
167   if (g_chunkSize == 1) {
168     // TODO: with just the first byte, we can't tell the difference between
169     // UTF-16-LE and UTF-8. Avoid the failure for now.
170     return;
171   }
172 
173   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
174       == XML_STATUS_ERROR)
175     xml_failure(g_parser);
176 }
177 END_TEST
178 
START_TEST(test_hash_collision)179 START_TEST(test_hash_collision) {
180   /* For full coverage of the lookup routine, we need to ensure a
181    * hash collision even though we can only tell that we have one
182    * through breakpoint debugging or coverage statistics.  The
183    * following will cause a hash collision on machines with a 64-bit
184    * long type; others will have to experiment.  The full coverage
185    * tests invoked from qa.sh usually provide a hash collision, but
186    * not always.  This is an attempt to provide insurance.
187    */
188 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
189   const char *text
190       = "<doc>\n"
191         "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
192         "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
193         "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
194         "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
195         "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
196         "<d8>This triggers the table growth and collides with b2</d8>\n"
197         "</doc>\n";
198 
199   XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
200   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
201       == XML_STATUS_ERROR)
202     xml_failure(g_parser);
203 }
204 END_TEST
205 #undef COLLIDING_HASH_SALT
206 
207 /* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)208 START_TEST(test_danish_latin1) {
209   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
210                      "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
211 #ifdef XML_UNICODE
212   const XML_Char *expected
213       = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
214 #else
215   const XML_Char *expected
216       = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
217 #endif
218   run_character_check(text, expected);
219 }
220 END_TEST
221 
222 /* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)223 START_TEST(test_french_charref_hexidecimal) {
224   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
225                      "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
226 #ifdef XML_UNICODE
227   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
228 #else
229   const XML_Char *expected
230       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
231 #endif
232   run_character_check(text, expected);
233 }
234 END_TEST
235 
START_TEST(test_french_charref_decimal)236 START_TEST(test_french_charref_decimal) {
237   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
238                      "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
239 #ifdef XML_UNICODE
240   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
241 #else
242   const XML_Char *expected
243       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
244 #endif
245   run_character_check(text, expected);
246 }
247 END_TEST
248 
START_TEST(test_french_latin1)249 START_TEST(test_french_latin1) {
250   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
251                      "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
252 #ifdef XML_UNICODE
253   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
254 #else
255   const XML_Char *expected
256       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
257 #endif
258   run_character_check(text, expected);
259 }
260 END_TEST
261 
START_TEST(test_french_utf8)262 START_TEST(test_french_utf8) {
263   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
264                      "<doc>\xC3\xA9</doc>";
265 #ifdef XML_UNICODE
266   const XML_Char *expected = XCS("\x00e9");
267 #else
268   const XML_Char *expected = XCS("\xC3\xA9");
269 #endif
270   run_character_check(text, expected);
271 }
272 END_TEST
273 
274 /* Regression test for SF bug #600479.
275    XXX There should be a test that exercises all legal XML Unicode
276    characters as PCDATA and attribute value content, and XML Name
277    characters as part of element and attribute names.
278 */
START_TEST(test_utf8_false_rejection)279 START_TEST(test_utf8_false_rejection) {
280   const char *text = "<doc>\xEF\xBA\xBF</doc>";
281 #ifdef XML_UNICODE
282   const XML_Char *expected = XCS("\xfebf");
283 #else
284   const XML_Char *expected = XCS("\xEF\xBA\xBF");
285 #endif
286   run_character_check(text, expected);
287 }
288 END_TEST
289 
290 /* Regression test for SF bug #477667.
291    This test assures that any 8-bit character followed by a 7-bit
292    character will not be mistakenly interpreted as a valid UTF-8
293    sequence.
294 */
START_TEST(test_illegal_utf8)295 START_TEST(test_illegal_utf8) {
296   char text[100];
297   int i;
298 
299   for (i = 128; i <= 255; ++i) {
300     snprintf(text, sizeof(text), "<e>%ccd</e>", i);
301     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
302         == XML_STATUS_OK) {
303       snprintf(text, sizeof(text),
304                "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
305                i);
306       fail(text);
307     } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
308       xml_failure(g_parser);
309     /* Reset the parser since we use the same parser repeatedly. */
310     XML_ParserReset(g_parser, NULL);
311   }
312 }
313 END_TEST
314 
315 /* Examples, not masks: */
316 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
317 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
318 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
319 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
320 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
321 
START_TEST(test_utf8_auto_align)322 START_TEST(test_utf8_auto_align) {
323   struct TestCase {
324     ptrdiff_t expectedMovementInChars;
325     const char *input;
326   };
327 
328   struct TestCase cases[] = {
329       {00, ""},
330 
331       {00, UTF8_LEAD_1},
332 
333       {-1, UTF8_LEAD_2},
334       {00, UTF8_LEAD_2 UTF8_FOLLOW},
335 
336       {-1, UTF8_LEAD_3},
337       {-2, UTF8_LEAD_3 UTF8_FOLLOW},
338       {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
339 
340       {-1, UTF8_LEAD_4},
341       {-2, UTF8_LEAD_4 UTF8_FOLLOW},
342       {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
343       {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
344   };
345 
346   size_t i = 0;
347   bool success = true;
348   for (; i < sizeof(cases) / sizeof(*cases); i++) {
349     const char *fromLim = cases[i].input + strlen(cases[i].input);
350     const char *const fromLimInitially = fromLim;
351     ptrdiff_t actualMovementInChars;
352 
353     _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
354 
355     actualMovementInChars = (fromLim - fromLimInitially);
356     if (actualMovementInChars != cases[i].expectedMovementInChars) {
357       size_t j = 0;
358       success = false;
359       printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
360              ", actually moved by %2d chars: \"",
361              (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
362              (int)actualMovementInChars);
363       for (; j < strlen(cases[i].input); j++) {
364         printf("\\x%02x", (unsigned char)cases[i].input[j]);
365       }
366       printf("\"\n");
367     }
368   }
369 
370   if (! success) {
371     fail("UTF-8 auto-alignment is not bullet-proof\n");
372   }
373 }
374 END_TEST
375 
START_TEST(test_utf16)376 START_TEST(test_utf16) {
377   /* <?xml version="1.0" encoding="UTF-16"?>
378    *  <doc a='123'>some {A} text</doc>
379    *
380    * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
381    */
382   char text[]
383       = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
384         "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
385         "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
386         "\000'\000?\000>\000\n"
387         "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
388         "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
389         "<\000/\000d\000o\000c\000>";
390 #ifdef XML_UNICODE
391   const XML_Char *expected = XCS("some \xff21 text");
392 #else
393   const XML_Char *expected = XCS("some \357\274\241 text");
394 #endif
395   CharData storage;
396 
397   CharData_Init(&storage);
398   XML_SetUserData(g_parser, &storage);
399   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
400   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
401       == XML_STATUS_ERROR)
402     xml_failure(g_parser);
403   CharData_CheckXMLChars(&storage, expected);
404 }
405 END_TEST
406 
START_TEST(test_utf16_le_epilog_newline)407 START_TEST(test_utf16_le_epilog_newline) {
408   unsigned int first_chunk_bytes = 17;
409   char text[] = "\xFF\xFE"                  /* BOM */
410                 "<\000e\000/\000>\000"      /* document element */
411                 "\r\000\n\000\r\000\n\000"; /* epilog */
412 
413   if (first_chunk_bytes >= sizeof(text) - 1)
414     fail("bad value of first_chunk_bytes");
415   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)first_chunk_bytes, XML_FALSE)
416       == XML_STATUS_ERROR)
417     xml_failure(g_parser);
418   else {
419     enum XML_Status rc;
420     rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
421                                  (int)(sizeof(text) - first_chunk_bytes - 1),
422                                  XML_TRUE);
423     if (rc == XML_STATUS_ERROR)
424       xml_failure(g_parser);
425   }
426 }
427 END_TEST
428 
429 /* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)430 START_TEST(test_not_utf16) {
431   const char *text = "<?xml version='1.0' encoding='utf-16'?>"
432                      "<doc>Hi</doc>";
433 
434   /* Use a handler to provoke the appropriate code paths */
435   XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
436   expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
437                  "UTF-16 declared in UTF-8 not faulted");
438 }
439 END_TEST
440 
441 /* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)442 START_TEST(test_bad_encoding) {
443   const char *text = "<doc>Hi</doc>";
444 
445   if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
446     fail("XML_SetEncoding failed");
447   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
448                  "Unknown encoding not faulted");
449 }
450 END_TEST
451 
452 /* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)453 START_TEST(test_latin1_umlauts) {
454   const char *text
455       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
456         "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
457         "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
458 #ifdef XML_UNICODE
459   /* Expected results in UTF-16 */
460   const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
461       XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
462 #else
463   /* Expected results in UTF-8 */
464   const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
465       XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
466 #endif
467 
468   run_character_check(text, expected);
469   XML_ParserReset(g_parser, NULL);
470   run_attribute_check(text, expected);
471   /* Repeat with a default handler */
472   XML_ParserReset(g_parser, NULL);
473   XML_SetDefaultHandler(g_parser, dummy_default_handler);
474   run_character_check(text, expected);
475   XML_ParserReset(g_parser, NULL);
476   XML_SetDefaultHandler(g_parser, dummy_default_handler);
477   run_attribute_check(text, expected);
478 }
479 END_TEST
480 
481 /* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)482 START_TEST(test_long_utf8_character) {
483   const char *text
484       = "<?xml version='1.0' encoding='utf-8'?>\n"
485         /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
486         "<do\xf0\x90\x80\x80/>";
487   expect_failure(text, XML_ERROR_INVALID_TOKEN,
488                  "4-byte UTF-8 character in element name not faulted");
489 }
490 END_TEST
491 
492 /* Test that a long latin-1 attribute (too long to convert in one go)
493  * is correctly converted
494  */
START_TEST(test_long_latin1_attribute)495 START_TEST(test_long_latin1_attribute) {
496   const char *text
497       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
498         "<doc att='"
499         /* 64 characters per line */
500         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
512         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
513         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
514         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
515         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
516         /* Last character splits across a buffer boundary */
517         "\xe4'>\n</doc>";
518 
519   const XML_Char *expected =
520       /* 64 characters per line */
521       /* clang-format off */
522         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
523         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
524         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
525         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
526         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
527         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
528         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
529         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
530         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
531         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
532         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
533         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
534         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
535         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
536         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
537         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
538   /* clang-format on */
539 #ifdef XML_UNICODE
540                                                   XCS("\x00e4");
541 #else
542                                                   XCS("\xc3\xa4");
543 #endif
544 
545   run_attribute_check(text, expected);
546 }
547 END_TEST
548 
549 /* Test that a long ASCII attribute (too long to convert in one go)
550  * is correctly converted
551  */
START_TEST(test_long_ascii_attribute)552 START_TEST(test_long_ascii_attribute) {
553   const char *text
554       = "<?xml version='1.0' encoding='us-ascii'?>\n"
555         "<doc att='"
556         /* 64 characters per line */
557         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
558         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
559         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
560         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
561         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
562         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
563         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
564         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
565         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
566         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
567         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
568         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
569         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
570         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
571         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
572         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
573         "01234'>\n</doc>";
574   const XML_Char *expected =
575       /* 64 characters per line */
576       /* clang-format off */
577         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
578         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
579         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
580         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
581         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
582         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
583         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
584         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
585         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
586         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
587         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
588         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
589         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
590         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
591         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
592         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
593         XCS("01234");
594   /* clang-format on */
595 
596   run_attribute_check(text, expected);
597 }
598 END_TEST
599 
600 /* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)601 START_TEST(test_line_number_after_parse) {
602   const char *text = "<tag>\n"
603                      "\n"
604                      "\n</tag>";
605   XML_Size lineno;
606 
607   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
608       == XML_STATUS_ERROR)
609     xml_failure(g_parser);
610   lineno = XML_GetCurrentLineNumber(g_parser);
611   if (lineno != 4) {
612     char buffer[100];
613     snprintf(buffer, sizeof(buffer),
614              "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
615     fail(buffer);
616   }
617 }
618 END_TEST
619 
620 /* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)621 START_TEST(test_column_number_after_parse) {
622   const char *text = "<tag></tag>";
623   XML_Size colno;
624 
625   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
626       == XML_STATUS_ERROR)
627     xml_failure(g_parser);
628   colno = XML_GetCurrentColumnNumber(g_parser);
629   if (colno != 11) {
630     char buffer[100];
631     snprintf(buffer, sizeof(buffer),
632              "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
633     fail(buffer);
634   }
635 }
636 END_TEST
637 
638 /* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)639 START_TEST(test_line_and_column_numbers_inside_handlers) {
640   const char *text = "<a>\n"      /* Unix end-of-line */
641                      "  <b>\r\n"  /* Windows end-of-line */
642                      "    <c/>\r" /* Mac OS end-of-line */
643                      "  </b>\n"
644                      "  <d>\n"
645                      "    <f/>\n"
646                      "  </d>\n"
647                      "</a>";
648   const StructDataEntry expected[]
649       = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
650          {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
651          {XCS("b"), 2, 4, STRUCT_END_TAG},   {XCS("d"), 2, 5, STRUCT_START_TAG},
652          {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
653          {XCS("d"), 2, 7, STRUCT_END_TAG},   {XCS("a"), 0, 8, STRUCT_END_TAG}};
654   const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
655   StructData storage;
656 
657   StructData_Init(&storage);
658   XML_SetUserData(g_parser, &storage);
659   XML_SetStartElementHandler(g_parser, start_element_event_handler2);
660   XML_SetEndElementHandler(g_parser, end_element_event_handler2);
661   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
662       == XML_STATUS_ERROR)
663     xml_failure(g_parser);
664 
665   StructData_CheckItems(&storage, expected, expected_count);
666   StructData_Dispose(&storage);
667 }
668 END_TEST
669 
670 /* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)671 START_TEST(test_line_number_after_error) {
672   const char *text = "<a>\n"
673                      "  <b>\n"
674                      "  </a>"; /* missing </b> */
675   XML_Size lineno;
676   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
677       != XML_STATUS_ERROR)
678     fail("Expected a parse error");
679 
680   lineno = XML_GetCurrentLineNumber(g_parser);
681   if (lineno != 3) {
682     char buffer[100];
683     snprintf(buffer, sizeof(buffer),
684              "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
685     fail(buffer);
686   }
687 }
688 END_TEST
689 
690 /* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)691 START_TEST(test_column_number_after_error) {
692   const char *text = "<a>\n"
693                      "  <b>\n"
694                      "  </a>"; /* missing </b> */
695   XML_Size colno;
696   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
697       != XML_STATUS_ERROR)
698     fail("Expected a parse error");
699 
700   colno = XML_GetCurrentColumnNumber(g_parser);
701   if (colno != 4) {
702     char buffer[100];
703     snprintf(buffer, sizeof(buffer),
704              "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
705     fail(buffer);
706   }
707 }
708 END_TEST
709 
710 /* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)711 START_TEST(test_really_long_lines) {
712   /* This parses an input line longer than INIT_DATA_BUF_SIZE
713      characters long (defined to be 1024 in xmlparse.c).  We take a
714      really cheesy approach to building the input buffer, because
715      this avoids writing bugs in buffer-filling code.
716   */
717   const char *text
718       = "<e>"
719         /* 64 chars */
720         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
721         /* until we have at least 1024 characters on the line: */
722         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
723         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
724         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
725         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
726         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
727         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
728         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
729         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
730         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
731         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
732         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
733         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
734         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
735         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
736         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
737         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
738         "</e>";
739   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
740       == XML_STATUS_ERROR)
741     xml_failure(g_parser);
742 }
743 END_TEST
744 
745 /* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)746 START_TEST(test_really_long_encoded_lines) {
747   /* As above, except that we want to provoke an output buffer
748    * overflow with a non-trivial encoding.  For this we need to pass
749    * the whole cdata in one go, not byte-by-byte.
750    */
751   void *buffer;
752   const char *text
753       = "<?xml version='1.0' encoding='iso-8859-1'?>"
754         "<e>"
755         /* 64 chars */
756         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
757         /* until we have at least 1024 characters on the line: */
758         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
765         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
766         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
767         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
768         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
769         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
770         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
771         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
772         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
773         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
774         "</e>";
775   int parse_len = (int)strlen(text);
776 
777   /* Need a cdata handler to provoke the code path we want to test */
778   XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
779   buffer = XML_GetBuffer(g_parser, parse_len);
780   if (buffer == NULL)
781     fail("Could not allocate parse buffer");
782   assert(buffer != NULL);
783   memcpy(buffer, text, parse_len);
784   if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
785     xml_failure(g_parser);
786 }
787 END_TEST
788 
789 /*
790  * Element event tests.
791  */
792 
START_TEST(test_end_element_events)793 START_TEST(test_end_element_events) {
794   const char *text = "<a><b><c/></b><d><f/></d></a>";
795   const XML_Char *expected = XCS("/c/b/f/d/a");
796   CharData storage;
797 
798   CharData_Init(&storage);
799   XML_SetUserData(g_parser, &storage);
800   XML_SetEndElementHandler(g_parser, end_element_event_handler);
801   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
802       == XML_STATUS_ERROR)
803     xml_failure(g_parser);
804   CharData_CheckXMLChars(&storage, expected);
805 }
806 END_TEST
807 
808 /*
809  * Attribute tests.
810  */
811 
812 /* Helper used by the following tests; this checks any "attr" and "refs"
813    attributes to make sure whitespace has been normalized.
814 
815    Return true if whitespace has been normalized in a string, using
816    the rules for attribute value normalization.  The 'is_cdata' flag
817    is needed since CDATA attributes don't need to have multiple
818    whitespace characters collapsed to a single space, while other
819    attribute data types do.  (Section 3.3.3 of the recommendation.)
820 */
821 static int
is_whitespace_normalized(const XML_Char * s,int is_cdata)822 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
823   int blanks = 0;
824   int at_start = 1;
825   while (*s) {
826     if (*s == XCS(' '))
827       ++blanks;
828     else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
829       return 0;
830     else {
831       if (at_start) {
832         at_start = 0;
833         if (blanks && ! is_cdata)
834           /* illegal leading blanks */
835           return 0;
836       } else if (blanks > 1 && ! is_cdata)
837         return 0;
838       blanks = 0;
839     }
840     ++s;
841   }
842   if (blanks && ! is_cdata)
843     return 0;
844   return 1;
845 }
846 
847 /* Check the attribute whitespace checker: */
START_TEST(test_helper_is_whitespace_normalized)848 START_TEST(test_helper_is_whitespace_normalized) {
849   assert(is_whitespace_normalized(XCS("abc"), 0));
850   assert(is_whitespace_normalized(XCS("abc"), 1));
851   assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
852   assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
853   assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
854   assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
855   assert(! is_whitespace_normalized(XCS("abc  def ghi"), 0));
856   assert(is_whitespace_normalized(XCS("abc  def ghi"), 1));
857   assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
858   assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
859   assert(! is_whitespace_normalized(XCS(" "), 0));
860   assert(is_whitespace_normalized(XCS(" "), 1));
861   assert(! is_whitespace_normalized(XCS("\t"), 0));
862   assert(! is_whitespace_normalized(XCS("\t"), 1));
863   assert(! is_whitespace_normalized(XCS("\n"), 0));
864   assert(! is_whitespace_normalized(XCS("\n"), 1));
865   assert(! is_whitespace_normalized(XCS("\r"), 0));
866   assert(! is_whitespace_normalized(XCS("\r"), 1));
867   assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
868 }
869 END_TEST
870 
871 static void XMLCALL
check_attr_contains_normalized_whitespace(void * userData,const XML_Char * name,const XML_Char ** atts)872 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
873                                           const XML_Char **atts) {
874   int i;
875   UNUSED_P(userData);
876   UNUSED_P(name);
877   for (i = 0; atts[i] != NULL; i += 2) {
878     const XML_Char *attrname = atts[i];
879     const XML_Char *value = atts[i + 1];
880     if (xcstrcmp(XCS("attr"), attrname) == 0
881         || xcstrcmp(XCS("ents"), attrname) == 0
882         || xcstrcmp(XCS("refs"), attrname) == 0) {
883       if (! is_whitespace_normalized(value, 0)) {
884         char buffer[256];
885         snprintf(buffer, sizeof(buffer),
886                  "attribute value not normalized: %" XML_FMT_STR
887                  "='%" XML_FMT_STR "'",
888                  attrname, value);
889         fail(buffer);
890       }
891     }
892   }
893 }
894 
START_TEST(test_attr_whitespace_normalization)895 START_TEST(test_attr_whitespace_normalization) {
896   const char *text
897       = "<!DOCTYPE doc [\n"
898         "  <!ATTLIST doc\n"
899         "            attr NMTOKENS #REQUIRED\n"
900         "            ents ENTITIES #REQUIRED\n"
901         "            refs IDREFS   #REQUIRED>\n"
902         "]>\n"
903         "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
904         "     ents=' ent-1   \t\r\n"
905         "            ent-2  ' >\n"
906         "  <e id='id-1'/>\n"
907         "  <e id='id-2'/>\n"
908         "</doc>";
909 
910   XML_SetStartElementHandler(g_parser,
911                              check_attr_contains_normalized_whitespace);
912   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
913       == XML_STATUS_ERROR)
914     xml_failure(g_parser);
915 }
916 END_TEST
917 
918 /*
919  * XML declaration tests.
920  */
921 
START_TEST(test_xmldecl_misplaced)922 START_TEST(test_xmldecl_misplaced) {
923   expect_failure("\n"
924                  "<?xml version='1.0'?>\n"
925                  "<a/>",
926                  XML_ERROR_MISPLACED_XML_PI,
927                  "failed to report misplaced XML declaration");
928 }
929 END_TEST
930 
START_TEST(test_xmldecl_invalid)931 START_TEST(test_xmldecl_invalid) {
932   expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
933                  "Failed to report invalid XML declaration");
934 }
935 END_TEST
936 
START_TEST(test_xmldecl_missing_attr)937 START_TEST(test_xmldecl_missing_attr) {
938   expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
939                  "Failed to report missing XML declaration attribute");
940 }
941 END_TEST
942 
START_TEST(test_xmldecl_missing_value)943 START_TEST(test_xmldecl_missing_value) {
944   expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
945                  "<doc/>",
946                  XML_ERROR_XML_DECL,
947                  "Failed to report missing attribute value");
948 }
949 END_TEST
950 
951 /* Regression test for SF bug #584832. */
START_TEST(test_unknown_encoding_internal_entity)952 START_TEST(test_unknown_encoding_internal_entity) {
953   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
954                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
955                      "<test a='&foo;'/>";
956 
957   XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
958   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
959       == XML_STATUS_ERROR)
960     xml_failure(g_parser);
961 }
962 END_TEST
963 
964 /* Test unrecognised encoding handler */
START_TEST(test_unrecognised_encoding_internal_entity)965 START_TEST(test_unrecognised_encoding_internal_entity) {
966   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
967                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
968                      "<test a='&foo;'/>";
969 
970   XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
971   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
972       != XML_STATUS_ERROR)
973     fail("Unrecognised encoding not rejected");
974 }
975 END_TEST
976 
977 /* Regression test for SF bug #620106. */
START_TEST(test_ext_entity_set_encoding)978 START_TEST(test_ext_entity_set_encoding) {
979   const char *text = "<!DOCTYPE doc [\n"
980                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
981                      "]>\n"
982                      "<doc>&en;</doc>";
983   ExtTest test_data
984       = {/* This text says it's an unsupported encoding, but it's really
985             UTF-8, which we tell Expat using XML_SetEncoding().
986          */
987          "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
988 #ifdef XML_UNICODE
989   const XML_Char *expected = XCS("\x00e9");
990 #else
991   const XML_Char *expected = XCS("\xc3\xa9");
992 #endif
993 
994   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
995   run_ext_character_check(text, &test_data, expected);
996 }
997 END_TEST
998 
999 /* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)1000 START_TEST(test_ext_entity_no_handler) {
1001   const char *text = "<!DOCTYPE doc [\n"
1002                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1003                      "]>\n"
1004                      "<doc>&en;</doc>";
1005 
1006   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1007   run_character_check(text, XCS(""));
1008 }
1009 END_TEST
1010 
1011 /* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)1012 START_TEST(test_ext_entity_set_bom) {
1013   const char *text = "<!DOCTYPE doc [\n"
1014                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1015                      "]>\n"
1016                      "<doc>&en;</doc>";
1017   ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1018                        "<?xml encoding='iso-8859-3'?>"
1019                        "\xC3\xA9",
1020                        XCS("utf-8"), NULL};
1021 #ifdef XML_UNICODE
1022   const XML_Char *expected = XCS("\x00e9");
1023 #else
1024   const XML_Char *expected = XCS("\xc3\xa9");
1025 #endif
1026 
1027   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1028   run_ext_character_check(text, &test_data, expected);
1029 }
1030 END_TEST
1031 
1032 /* Test that bad encodings are faulted */
START_TEST(test_ext_entity_bad_encoding)1033 START_TEST(test_ext_entity_bad_encoding) {
1034   const char *text = "<!DOCTYPE doc [\n"
1035                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1036                      "]>\n"
1037                      "<doc>&en;</doc>";
1038   ExtFaults fault
1039       = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1040          XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1041 
1042   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1043   XML_SetUserData(g_parser, &fault);
1044   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1045                  "Bad encoding should not have been accepted");
1046 }
1047 END_TEST
1048 
1049 /* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)1050 START_TEST(test_ext_entity_bad_encoding_2) {
1051   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1052                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1053                      "<doc>&entity;</doc>";
1054   ExtFaults fault
1055       = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1056          XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1057 
1058   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1059   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1060   XML_SetUserData(g_parser, &fault);
1061   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1062                  "Bad encoding not faulted in external entity handler");
1063 }
1064 END_TEST
1065 
1066 /* Test that no error is reported for unknown entities if we don't
1067    read an external subset.  This was fixed in Expat 1.95.5.
1068 */
START_TEST(test_wfc_undeclared_entity_unread_external_subset)1069 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1070   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1071                      "<doc>&entity;</doc>";
1072 
1073   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1074       == XML_STATUS_ERROR)
1075     xml_failure(g_parser);
1076 }
1077 END_TEST
1078 
1079 /* Test that an error is reported for unknown entities if we don't
1080    have an external subset.
1081 */
START_TEST(test_wfc_undeclared_entity_no_external_subset)1082 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1083   expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1084                  "Parser did not report undefined entity w/out a DTD.");
1085 }
1086 END_TEST
1087 
1088 /* Test that an error is reported for unknown entities if we don't
1089    read an external subset, but have been declared standalone.
1090 */
START_TEST(test_wfc_undeclared_entity_standalone)1091 START_TEST(test_wfc_undeclared_entity_standalone) {
1092   const char *text
1093       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1094         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1095         "<doc>&entity;</doc>";
1096 
1097   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1098                  "Parser did not report undefined entity (standalone).");
1099 }
1100 END_TEST
1101 
1102 /* Test that an error is reported for unknown entities if we have read
1103    an external subset, and standalone is true.
1104 */
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone)1105 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1106   const char *text
1107       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1108         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1109         "<doc>&entity;</doc>";
1110   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1111 
1112   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1113   XML_SetUserData(g_parser, &test_data);
1114   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1115   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1116                  "Parser did not report undefined entity (external DTD).");
1117 }
1118 END_TEST
1119 
1120 /* Test that external entity handling is not done if the parsing flag
1121  * is set to UNLESS_STANDALONE
1122  */
START_TEST(test_entity_with_external_subset_unless_standalone)1123 START_TEST(test_entity_with_external_subset_unless_standalone) {
1124   const char *text
1125       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1126         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1127         "<doc>&entity;</doc>";
1128   ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1129 
1130   XML_SetParamEntityParsing(g_parser,
1131                             XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1132   XML_SetUserData(g_parser, &test_data);
1133   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1134   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1135                  "Parser did not report undefined entity");
1136 }
1137 END_TEST
1138 
1139 /* Test that no error is reported for unknown entities if we have read
1140    an external subset, and standalone is false.
1141 */
START_TEST(test_wfc_undeclared_entity_with_external_subset)1142 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1143   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1144                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1145                      "<doc>&entity;</doc>";
1146   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1147 
1148   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1149   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1150   run_ext_character_check(text, &test_data, XCS(""));
1151 }
1152 END_TEST
1153 
1154 /* Test that an error is reported if our NotStandalone handler fails */
START_TEST(test_not_standalone_handler_reject)1155 START_TEST(test_not_standalone_handler_reject) {
1156   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1157                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1158                      "<doc>&entity;</doc>";
1159   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1160 
1161   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1162   XML_SetUserData(g_parser, &test_data);
1163   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1164   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1165   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1166                  "NotStandalone handler failed to reject");
1167 
1168   /* Try again but without external entity handling */
1169   XML_ParserReset(g_parser, NULL);
1170   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1171   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1172                  "NotStandalone handler failed to reject");
1173 }
1174 END_TEST
1175 
1176 /* Test that no error is reported if our NotStandalone handler succeeds */
START_TEST(test_not_standalone_handler_accept)1177 START_TEST(test_not_standalone_handler_accept) {
1178   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1179                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1180                      "<doc>&entity;</doc>";
1181   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1182 
1183   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1184   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1185   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1186   run_ext_character_check(text, &test_data, XCS(""));
1187 
1188   /* Repeat without the external entity handler */
1189   XML_ParserReset(g_parser, NULL);
1190   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1191   run_character_check(text, XCS(""));
1192 }
1193 END_TEST
1194 
START_TEST(test_entity_start_tag_level_greater_than_one)1195 START_TEST(test_entity_start_tag_level_greater_than_one) {
1196   const char *const text = "<!DOCTYPE t1 [\n"
1197                            "  <!ENTITY e1 'hello'>\n"
1198                            "]>\n"
1199                            "<t1>\n"
1200                            "  <t2>&e1;</t2>\n"
1201                            "</t1>\n";
1202 
1203   XML_Parser parser = XML_ParserCreate(NULL);
1204   assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
1205                                       /*isFinal*/ XML_TRUE)
1206               == XML_STATUS_OK);
1207   XML_ParserFree(parser);
1208 }
1209 END_TEST
1210 
START_TEST(test_wfc_no_recursive_entity_refs)1211 START_TEST(test_wfc_no_recursive_entity_refs) {
1212   const char *text = "<!DOCTYPE doc [\n"
1213                      "  <!ENTITY entity '&#38;entity;'>\n"
1214                      "]>\n"
1215                      "<doc>&entity;</doc>";
1216 
1217   expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1218                  "Parser did not report recursive entity reference.");
1219 }
1220 END_TEST
1221 
START_TEST(test_no_indirectly_recursive_entity_refs)1222 START_TEST(test_no_indirectly_recursive_entity_refs) {
1223   struct TestCase {
1224     const char *doc;
1225     bool usesParameterEntities;
1226   };
1227 
1228   const struct TestCase cases[] = {
1229       // general entity + character data
1230       {"<!DOCTYPE a [\n"
1231        "  <!ENTITY e1 '&e2;'>\n"
1232        "  <!ENTITY e2 '&e1;'>\n"
1233        "]><a>&e2;</a>\n",
1234        false},
1235 
1236       // general entity + attribute value
1237       {"<!DOCTYPE a [\n"
1238        "  <!ENTITY e1 '&e2;'>\n"
1239        "  <!ENTITY e2 '&e1;'>\n"
1240        "]><a k1='&e2;' />\n",
1241        false},
1242 
1243       // parameter entity
1244       {"<!DOCTYPE doc [\n"
1245        "  <!ENTITY % p1 '&#37;p2;'>\n"
1246        "  <!ENTITY % p2 '&#37;p1;'>\n"
1247        "  <!ENTITY % define_g \"<!ENTITY g '&#37;p2;'>\">\n"
1248        "  %define_g;\n"
1249        "]>\n"
1250        "<doc/>\n",
1251        true},
1252   };
1253   const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE};
1254 
1255   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1256     for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]);
1257          j++) {
1258       const XML_Bool reset_wanted = reset_or_not[j];
1259       const char *const doc = cases[i].doc;
1260       const bool usesParameterEntities = cases[i].usesParameterEntities;
1261 
1262       set_subtest("[%i,reset=%i] %s", (int)i, (int)j, doc);
1263 
1264 #ifdef XML_DTD // both GE and DTD
1265       const bool rejection_expected = true;
1266 #elif XML_GE == 1 // GE but not DTD
1267       const bool rejection_expected = ! usesParameterEntities;
1268 #else             // neither DTD nor GE
1269       const bool rejection_expected = false;
1270 #endif
1271 
1272       XML_Parser parser = XML_ParserCreate(NULL);
1273 
1274 #ifdef XML_DTD
1275       if (usesParameterEntities) {
1276         assert_true(
1277             XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS)
1278             == 1);
1279       }
1280 #else
1281       UNUSED_P(usesParameterEntities);
1282 #endif // XML_DTD
1283 
1284       const enum XML_Status status
1285           = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
1286                                     /*isFinal*/ XML_TRUE);
1287 
1288       if (rejection_expected) {
1289         assert_true(status == XML_STATUS_ERROR);
1290         assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF);
1291       } else {
1292         assert_true(status == XML_STATUS_OK);
1293       }
1294 
1295       if (reset_wanted) {
1296         // This covers free'ing of (eventually) all three open entity lists by
1297         // XML_ParserReset.
1298         XML_ParserReset(parser, NULL);
1299       }
1300 
1301       // This covers free'ing of (eventually) all three open entity lists by
1302       // XML_ParserFree (unless XML_ParserReset has already done that above).
1303       XML_ParserFree(parser);
1304     }
1305   }
1306 }
1307 END_TEST
1308 
START_TEST(test_recursive_external_parameter_entity_2)1309 START_TEST(test_recursive_external_parameter_entity_2) {
1310   struct TestCase {
1311     const char *doc;
1312     enum XML_Status expectedStatus;
1313   };
1314 
1315   struct TestCase cases[] = {
1316       {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1317       {"<!ENTITY % p1 '%p1;'>"
1318        "<!ENTITY % p1 'first declaration wins'>",
1319        XML_STATUS_ERROR},
1320       {"<!ENTITY % p1 'first declaration wins'>"
1321        "<!ENTITY % p1 '%p1;'>",
1322        XML_STATUS_OK},
1323       {"<!ENTITY % p1 '&#37;p1;'>", XML_STATUS_OK},
1324   };
1325 
1326   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1327     const char *const doc = cases[i].doc;
1328     const enum XML_Status expectedStatus = cases[i].expectedStatus;
1329     set_subtest("%s", doc);
1330 
1331     XML_Parser parser = XML_ParserCreate(NULL);
1332     assert_true(parser != NULL);
1333 
1334     XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1335     assert_true(ext_parser != NULL);
1336 
1337     const enum XML_Status actualStatus
1338         = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1339 
1340     assert_true(actualStatus == expectedStatus);
1341     if (actualStatus != XML_STATUS_OK) {
1342       assert_true(XML_GetErrorCode(ext_parser)
1343                   == XML_ERROR_RECURSIVE_ENTITY_REF);
1344     }
1345 
1346     XML_ParserFree(ext_parser);
1347     XML_ParserFree(parser);
1348   }
1349 }
1350 END_TEST
1351 
1352 /* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)1353 START_TEST(test_ext_entity_invalid_parse) {
1354   const char *text = "<!DOCTYPE doc [\n"
1355                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1356                      "]>\n"
1357                      "<doc>&en;</doc>";
1358   const ExtFaults faults[]
1359       = {{"<", "Incomplete element declaration not faulted", NULL,
1360           XML_ERROR_UNCLOSED_TOKEN},
1361          {"<\xe2\x82", /* First two bytes of a three-byte char */
1362           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1363          {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1364           XML_ERROR_PARTIAL_CHAR},
1365          {NULL, NULL, NULL, XML_ERROR_NONE}};
1366   const ExtFaults *fault = faults;
1367 
1368   for (; fault->parse_text != NULL; fault++) {
1369     set_subtest("\"%s\"", fault->parse_text);
1370     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1371     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1372     XML_SetUserData(g_parser, (void *)fault);
1373     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1374                    "Parser did not report external entity error");
1375     XML_ParserReset(g_parser, NULL);
1376   }
1377 }
1378 END_TEST
1379 
1380 /* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)1381 START_TEST(test_dtd_default_handling) {
1382   const char *text = "<!DOCTYPE doc [\n"
1383                      "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1384                      "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1385                      "<!ELEMENT doc EMPTY>\n"
1386                      "<!ATTLIST doc a CDATA #IMPLIED>\n"
1387                      "<?pi in dtd?>\n"
1388                      "<!--comment in dtd-->\n"
1389                      "]><doc/>";
1390 
1391   XML_SetDefaultHandler(g_parser, accumulate_characters);
1392   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1393   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1394   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1395   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1396   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1397   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1398   XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1399   XML_SetCommentHandler(g_parser, dummy_comment_handler);
1400   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1401   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1402   run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1403 }
1404 END_TEST
1405 
1406 /* Test handling of attribute declarations */
START_TEST(test_dtd_attr_handling)1407 START_TEST(test_dtd_attr_handling) {
1408   const char *prolog = "<!DOCTYPE doc [\n"
1409                        "<!ELEMENT doc EMPTY>\n";
1410   AttTest attr_data[]
1411       = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1412           "]>"
1413           "<doc a='two'/>",
1414           XCS("doc"), XCS("a"),
1415           XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1416           NULL, XML_TRUE},
1417          {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1418           "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1419           "]>"
1420           "<doc/>",
1421           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1422          {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1423           "]>"
1424           "<doc/>",
1425           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1426          {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1427           "]>"
1428           "<doc/>",
1429           XCS("doc"), XCS("a"), XCS("CDATA"),
1430 #ifdef XML_UNICODE
1431           XCS("\x06f2"),
1432 #else
1433           XCS("\xdb\xb2"),
1434 #endif
1435           XML_FALSE},
1436          {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1437   AttTest *test;
1438 
1439   for (test = attr_data; test->definition != NULL; test++) {
1440     set_subtest("%s", test->definition);
1441     XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1442     XML_SetUserData(g_parser, test);
1443     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1444                                 XML_FALSE)
1445         == XML_STATUS_ERROR)
1446       xml_failure(g_parser);
1447     if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1448                                 (int)strlen(test->definition), XML_TRUE)
1449         == XML_STATUS_ERROR)
1450       xml_failure(g_parser);
1451     XML_ParserReset(g_parser, NULL);
1452   }
1453 }
1454 END_TEST
1455 
1456 /* See related SF bug #673791.
1457    When namespace processing is enabled, setting the namespace URI for
1458    a prefix is not allowed; this test ensures that it *is* allowed
1459    when namespace processing is not enabled.
1460    (See Namespaces in XML, section 2.)
1461 */
START_TEST(test_empty_ns_without_namespaces)1462 START_TEST(test_empty_ns_without_namespaces) {
1463   const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1464                      "  <e xmlns:prefix=''/>\n"
1465                      "</doc>";
1466 
1467   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1468       == XML_STATUS_ERROR)
1469     xml_failure(g_parser);
1470 }
1471 END_TEST
1472 
1473 /* Regression test for SF bug #824420.
1474    Checks that an xmlns:prefix attribute set in an attribute's default
1475    value isn't misinterpreted.
1476 */
START_TEST(test_ns_in_attribute_default_without_namespaces)1477 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1478   const char *text = "<!DOCTYPE e:element [\n"
1479                      "  <!ATTLIST e:element\n"
1480                      "    xmlns:e CDATA 'http://example.org/'>\n"
1481                      "      ]>\n"
1482                      "<e:element/>";
1483 
1484   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1485       == XML_STATUS_ERROR)
1486     xml_failure(g_parser);
1487 }
1488 END_TEST
1489 
1490 /* Regression test for SF bug #1515266: missing check of stopped
1491    parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)1492 START_TEST(test_stop_parser_between_char_data_calls) {
1493   /* The sample data must be big enough that there are two calls to
1494      the character data handler from within the inner "for" loop of
1495      the XML_TOK_DATA_CHARS case in doContent(), and the character
1496      handler must stop the parser and clear the character data
1497      handler.
1498   */
1499   const char *text = long_character_data_text;
1500 
1501   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1502   g_resumable = XML_FALSE;
1503   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1504       != XML_STATUS_ERROR)
1505     xml_failure(g_parser);
1506   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1507     xml_failure(g_parser);
1508 }
1509 END_TEST
1510 
1511 /* Regression test for SF bug #1515266: missing check of stopped
1512    parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)1513 START_TEST(test_suspend_parser_between_char_data_calls) {
1514   /* The sample data must be big enough that there are two calls to
1515      the character data handler from within the inner "for" loop of
1516      the XML_TOK_DATA_CHARS case in doContent(), and the character
1517      handler must stop the parser and clear the character data
1518      handler.
1519   */
1520   const char *text = long_character_data_text;
1521 
1522   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1523   g_resumable = XML_TRUE;
1524   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1525   // we won't know exactly how much input we actually managed to give Expat.
1526   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
1527       != XML_STATUS_SUSPENDED)
1528     xml_failure(g_parser);
1529   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1530     xml_failure(g_parser);
1531   /* Try parsing directly */
1532   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1533       != XML_STATUS_ERROR)
1534     fail("Attempt to continue parse while suspended not faulted");
1535   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1536     fail("Suspended parse not faulted with correct error");
1537 }
1538 END_TEST
1539 
1540 /* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)1541 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1542   const char *text = long_character_data_text;
1543 
1544   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1545   g_resumable = XML_FALSE;
1546   g_abortable = XML_FALSE;
1547   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1548       != XML_STATUS_ERROR)
1549     fail("Failed to double-stop parser");
1550 
1551   XML_ParserReset(g_parser, NULL);
1552   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1553   g_resumable = XML_TRUE;
1554   g_abortable = XML_FALSE;
1555   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1556   // we won't know exactly how much input we actually managed to give Expat.
1557   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
1558       != XML_STATUS_SUSPENDED)
1559     fail("Failed to double-suspend parser");
1560 
1561   XML_ParserReset(g_parser, NULL);
1562   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1563   g_resumable = XML_TRUE;
1564   g_abortable = XML_TRUE;
1565   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1566       != XML_STATUS_ERROR)
1567     fail("Failed to suspend-abort parser");
1568 }
1569 END_TEST
1570 
START_TEST(test_good_cdata_ascii)1571 START_TEST(test_good_cdata_ascii) {
1572   const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1573   const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1574 
1575   CharData storage;
1576   CharData_Init(&storage);
1577   XML_SetUserData(g_parser, &storage);
1578   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1579   /* Add start and end handlers for coverage */
1580   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1581   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1582 
1583   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1584       == XML_STATUS_ERROR)
1585     xml_failure(g_parser);
1586   CharData_CheckXMLChars(&storage, expected);
1587 
1588   /* Try again, this time with a default handler */
1589   XML_ParserReset(g_parser, NULL);
1590   CharData_Init(&storage);
1591   XML_SetUserData(g_parser, &storage);
1592   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1593   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1594 
1595   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1596       == XML_STATUS_ERROR)
1597     xml_failure(g_parser);
1598   CharData_CheckXMLChars(&storage, expected);
1599 }
1600 END_TEST
1601 
START_TEST(test_good_cdata_utf16)1602 START_TEST(test_good_cdata_utf16) {
1603   /* Test data is:
1604    *   <?xml version='1.0' encoding='utf-16'?>
1605    *   <a><![CDATA[hello]]></a>
1606    */
1607   const char text[]
1608       = "\0<\0?\0x\0m\0l\0"
1609         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1610         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1611         "1\0"
1612         "6\0'"
1613         "\0?\0>\0\n"
1614         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1615   const XML_Char *expected = XCS("hello");
1616 
1617   CharData storage;
1618   CharData_Init(&storage);
1619   XML_SetUserData(g_parser, &storage);
1620   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1621 
1622   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1623       == XML_STATUS_ERROR)
1624     xml_failure(g_parser);
1625   CharData_CheckXMLChars(&storage, expected);
1626 }
1627 END_TEST
1628 
START_TEST(test_good_cdata_utf16_le)1629 START_TEST(test_good_cdata_utf16_le) {
1630   /* Test data is:
1631    *   <?xml version='1.0' encoding='utf-16'?>
1632    *   <a><![CDATA[hello]]></a>
1633    */
1634   const char text[]
1635       = "<\0?\0x\0m\0l\0"
1636         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1637         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1638         "1\0"
1639         "6\0'"
1640         "\0?\0>\0\n"
1641         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1642   const XML_Char *expected = XCS("hello");
1643 
1644   CharData storage;
1645   CharData_Init(&storage);
1646   XML_SetUserData(g_parser, &storage);
1647   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1648 
1649   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1650       == XML_STATUS_ERROR)
1651     xml_failure(g_parser);
1652   CharData_CheckXMLChars(&storage, expected);
1653 }
1654 END_TEST
1655 
1656 /* Test UTF16 conversion of a long cdata string */
1657 
1658 /* 16 characters: handy macro to reduce visual clutter */
1659 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1660 
START_TEST(test_long_cdata_utf16)1661 START_TEST(test_long_cdata_utf16) {
1662   /* Test data is:
1663    * <?xlm version='1.0' encoding='utf-16'?>
1664    * <a><![CDATA[
1665    * ABCDEFGHIJKLMNOP
1666    * ]]></a>
1667    */
1668   const char text[]
1669       = "\0<\0?\0x\0m\0l\0 "
1670         "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1671         "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1672         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1673       /* 64 characters per line */
1674       /* clang-format off */
1675         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1676         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1677         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1678         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1679         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1680         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1681         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1682         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1683         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1684         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1685         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1686         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1687         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1688         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1689         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1690         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1691         A_TO_P_IN_UTF16
1692         /* clang-format on */
1693         "\0]\0]\0>\0<\0/\0a\0>";
1694   const XML_Char *expected =
1695       /* clang-format off */
1696         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1697         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1698         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1699         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1700         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1701         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1702         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1703         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1704         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1705         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1706         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1707         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1708         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1709         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1710         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1711         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1712         XCS("ABCDEFGHIJKLMNOP");
1713   /* clang-format on */
1714   CharData storage;
1715   void *buffer;
1716 
1717   CharData_Init(&storage);
1718   XML_SetUserData(g_parser, &storage);
1719   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1720   buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1721   if (buffer == NULL)
1722     fail("Could not allocate parse buffer");
1723   assert(buffer != NULL);
1724   memcpy(buffer, text, sizeof(text) - 1);
1725   if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1726     xml_failure(g_parser);
1727   CharData_CheckXMLChars(&storage, expected);
1728 }
1729 END_TEST
1730 
1731 /* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)1732 START_TEST(test_multichar_cdata_utf16) {
1733   /* Test data is:
1734    *   <?xml version='1.0' encoding='utf-16'?>
1735    *   <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1736    *
1737    * where {MINIM} is U+1d15e (a minim or half-note)
1738    *   UTF-16: 0xd834 0xdd5e
1739    *   UTF-8:  0xf0 0x9d 0x85 0x9e
1740    * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1741    *   UTF-16: 0xd834 0xdd5f
1742    *   UTF-8:  0xf0 0x9d 0x85 0x9f
1743    */
1744   const char text[] = "\0<\0?\0x\0m\0l\0"
1745                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1746                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1747                       "1\0"
1748                       "6\0'"
1749                       "\0?\0>\0\n"
1750                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1751                       "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1752                       "\0]\0]\0>\0<\0/\0a\0>";
1753 #ifdef XML_UNICODE
1754   const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1755 #else
1756   const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1757 #endif
1758   CharData storage;
1759 
1760   CharData_Init(&storage);
1761   XML_SetUserData(g_parser, &storage);
1762   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1763 
1764   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1765       == XML_STATUS_ERROR)
1766     xml_failure(g_parser);
1767   CharData_CheckXMLChars(&storage, expected);
1768 }
1769 END_TEST
1770 
1771 /* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)1772 START_TEST(test_utf16_bad_surrogate_pair) {
1773   /* Test data is:
1774    *   <?xml version='1.0' encoding='utf-16'?>
1775    *   <a><![CDATA[{BADLINB}]]></a>
1776    *
1777    * where {BADLINB} is U+10000 (the first Linear B character)
1778    * with the UTF-16 surrogate pair in the wrong order, i.e.
1779    *   0xdc00 0xd800
1780    */
1781   const char text[] = "\0<\0?\0x\0m\0l\0"
1782                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1783                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1784                       "1\0"
1785                       "6\0'"
1786                       "\0?\0>\0\n"
1787                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1788                       "\xdc\x00\xd8\x00"
1789                       "\0]\0]\0>\0<\0/\0a\0>";
1790 
1791   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1792       != XML_STATUS_ERROR)
1793     fail("Reversed UTF-16 surrogate pair not faulted");
1794   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1795     xml_failure(g_parser);
1796 }
1797 END_TEST
1798 
START_TEST(test_bad_cdata)1799 START_TEST(test_bad_cdata) {
1800   struct CaseData {
1801     const char *text;
1802     enum XML_Error expectedError;
1803   };
1804 
1805   struct CaseData cases[]
1806       = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1807          {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1808          {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1809          {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1810          {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1811          {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1812          {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1813          {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1814 
1815          {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1816          {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1817          {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1818 
1819          {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1820          {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN},  /* ?! */
1821          {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1822          {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1823          {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1824          {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1825          {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1826 
1827          {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1828          {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1829          {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1830 
1831   size_t i = 0;
1832   for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1833     set_subtest("%s", cases[i].text);
1834     const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1835         g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1836     const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1837 
1838     assert(actualStatus == XML_STATUS_ERROR);
1839 
1840     if (actualError != cases[i].expectedError) {
1841       char message[100];
1842       snprintf(message, sizeof(message),
1843                "Expected error %d but got error %d for case %u: \"%s\"\n",
1844                cases[i].expectedError, actualError, (unsigned int)i + 1,
1845                cases[i].text);
1846       fail(message);
1847     }
1848 
1849     XML_ParserReset(g_parser, NULL);
1850   }
1851 }
1852 END_TEST
1853 
1854 /* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)1855 START_TEST(test_bad_cdata_utf16) {
1856   struct CaseData {
1857     size_t text_bytes;
1858     const char *text;
1859     enum XML_Error expected_error;
1860   };
1861 
1862   const char prolog[] = "\0<\0?\0x\0m\0l\0"
1863                         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1864                         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1865                         "1\0"
1866                         "6\0'"
1867                         "\0?\0>\0\n"
1868                         "\0<\0a\0>";
1869   struct CaseData cases[] = {
1870       {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1871       {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1872       {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1873       {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1874       {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1875       {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1876       {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1877       {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1878       {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1879       {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1880       {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1881       {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1882       {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1883       {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1884       {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1885       {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1886       {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1887       {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1888       {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1889       {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1890       /* Now add a four-byte UTF-16 character */
1891       {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1892        XML_ERROR_UNCLOSED_CDATA_SECTION},
1893       {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1894       {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1895        XML_ERROR_PARTIAL_CHAR},
1896       {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1897        XML_ERROR_UNCLOSED_CDATA_SECTION}};
1898   size_t i;
1899 
1900   for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1901     set_subtest("case %lu", (long unsigned)(i + 1));
1902     enum XML_Status actual_status;
1903     enum XML_Error actual_error;
1904 
1905     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1906                                 XML_FALSE)
1907         == XML_STATUS_ERROR)
1908       xml_failure(g_parser);
1909     actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1910                                             (int)cases[i].text_bytes, XML_TRUE);
1911     assert(actual_status == XML_STATUS_ERROR);
1912     actual_error = XML_GetErrorCode(g_parser);
1913     if (actual_error != cases[i].expected_error) {
1914       char message[1024];
1915 
1916       snprintf(message, sizeof(message),
1917                "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1918                ") for case %lu\n",
1919                cases[i].expected_error,
1920                XML_ErrorString(cases[i].expected_error), actual_error,
1921                XML_ErrorString(actual_error), (long unsigned)(i + 1));
1922       fail(message);
1923     }
1924     XML_ParserReset(g_parser, NULL);
1925   }
1926 }
1927 END_TEST
1928 
1929 /* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)1930 START_TEST(test_stop_parser_between_cdata_calls) {
1931   const char *text = long_cdata_text;
1932 
1933   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1934   g_resumable = XML_FALSE;
1935   expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1936 }
1937 END_TEST
1938 
1939 /* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)1940 START_TEST(test_suspend_parser_between_cdata_calls) {
1941   if (g_chunkSize != 0) {
1942     // this test does not use SINGLE_BYTES, because of suspension
1943     return;
1944   }
1945 
1946   const char *text = long_cdata_text;
1947   enum XML_Status result;
1948 
1949   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1950   g_resumable = XML_TRUE;
1951   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1952   // we won't know exactly how much input we actually managed to give Expat.
1953   result = XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE);
1954   if (result != XML_STATUS_SUSPENDED) {
1955     if (result == XML_STATUS_ERROR)
1956       xml_failure(g_parser);
1957     fail("Parse not suspended in CDATA handler");
1958   }
1959   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1960     xml_failure(g_parser);
1961 }
1962 END_TEST
1963 
1964 /* Test memory allocation functions */
START_TEST(test_memory_allocation)1965 START_TEST(test_memory_allocation) {
1966   char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1967   char *p;
1968 
1969   if (buffer == NULL) {
1970     fail("Allocation failed");
1971   } else {
1972     /* Try writing to memory; some OSes try to cheat! */
1973     buffer[0] = 'T';
1974     buffer[1] = 'E';
1975     buffer[2] = 'S';
1976     buffer[3] = 'T';
1977     buffer[4] = '\0';
1978     if (strcmp(buffer, "TEST") != 0) {
1979       fail("Memory not writable");
1980     } else {
1981       p = (char *)XML_MemRealloc(g_parser, buffer, 512);
1982       if (p == NULL) {
1983         fail("Reallocation failed");
1984       } else {
1985         /* Write again, just to be sure */
1986         buffer = p;
1987         buffer[0] = 'V';
1988         if (strcmp(buffer, "VEST") != 0) {
1989           fail("Reallocated memory not writable");
1990         }
1991       }
1992     }
1993     XML_MemFree(g_parser, buffer);
1994   }
1995 }
1996 END_TEST
1997 
1998 /* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)1999 START_TEST(test_default_current) {
2000   const char *text = "<doc>hell]</doc>";
2001   const char *entity_text = "<!DOCTYPE doc [\n"
2002                             "<!ENTITY entity '&#37;'>\n"
2003                             "]>\n"
2004                             "<doc>&entity;</doc>";
2005 
2006   set_subtest("with defaulting");
2007   {
2008     struct handler_record_list storage;
2009     storage.count = 0;
2010     XML_SetDefaultHandler(g_parser, record_default_handler);
2011     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2012     XML_SetUserData(g_parser, &storage);
2013     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2014         == XML_STATUS_ERROR)
2015       xml_failure(g_parser);
2016     int i = 0;
2017     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
2018     // we should have gotten one or more cdata callbacks, totaling 5 chars
2019     int cdata_len_remaining = 5;
2020     while (cdata_len_remaining > 0) {
2021       const struct handler_record_entry *c_entry
2022           = handler_record_get(&storage, i++);
2023       assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
2024       assert_true(c_entry->arg > 0);
2025       assert_true(c_entry->arg <= cdata_len_remaining);
2026       cdata_len_remaining -= c_entry->arg;
2027       // default handler must follow, with the exact same len argument.
2028       assert_record_handler_called(&storage, i++, "record_default_handler",
2029                                    c_entry->arg);
2030     }
2031     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
2032     assert_true(storage.count == i);
2033   }
2034 
2035   /* Again, without the defaulting */
2036   set_subtest("no defaulting");
2037   {
2038     struct handler_record_list storage;
2039     storage.count = 0;
2040     XML_ParserReset(g_parser, NULL);
2041     XML_SetDefaultHandler(g_parser, record_default_handler);
2042     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2043     XML_SetUserData(g_parser, &storage);
2044     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2045         == XML_STATUS_ERROR)
2046       xml_failure(g_parser);
2047     int i = 0;
2048     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
2049     // we should have gotten one or more cdata callbacks, totaling 5 chars
2050     int cdata_len_remaining = 5;
2051     while (cdata_len_remaining > 0) {
2052       const struct handler_record_entry *c_entry
2053           = handler_record_get(&storage, i++);
2054       assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
2055       assert_true(c_entry->arg > 0);
2056       assert_true(c_entry->arg <= cdata_len_remaining);
2057       cdata_len_remaining -= c_entry->arg;
2058     }
2059     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
2060     assert_true(storage.count == i);
2061   }
2062 
2063   /* Now with an internal entity to complicate matters */
2064   set_subtest("with internal entity");
2065   {
2066     struct handler_record_list storage;
2067     storage.count = 0;
2068     XML_ParserReset(g_parser, NULL);
2069     XML_SetDefaultHandler(g_parser, record_default_handler);
2070     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2071     XML_SetUserData(g_parser, &storage);
2072     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2073                                 XML_TRUE)
2074         == XML_STATUS_ERROR)
2075       xml_failure(g_parser);
2076     /* The default handler suppresses the entity */
2077     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2078     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2079     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2080     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2081     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2082     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2083     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2084     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2085     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2086     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2087     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2088     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2089     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2090     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2091     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2092     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2093     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2094     assert_record_handler_called(&storage, 17, "record_default_handler", 8);
2095     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2096     assert_true(storage.count == 19);
2097   }
2098 
2099   /* Again, with a skip handler */
2100   set_subtest("with skip handler");
2101   {
2102     struct handler_record_list storage;
2103     storage.count = 0;
2104     XML_ParserReset(g_parser, NULL);
2105     XML_SetDefaultHandler(g_parser, record_default_handler);
2106     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2107     XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
2108     XML_SetUserData(g_parser, &storage);
2109     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2110                                 XML_TRUE)
2111         == XML_STATUS_ERROR)
2112       xml_failure(g_parser);
2113     /* The default handler suppresses the entity */
2114     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2115     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2116     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2117     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2118     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2119     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2120     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2121     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2122     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2123     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2124     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2125     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2126     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2127     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2128     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2129     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2130     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2131     assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2132     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2133     assert_true(storage.count == 19);
2134   }
2135 
2136   /* This time, allow the entity through */
2137   set_subtest("allow entity");
2138   {
2139     struct handler_record_list storage;
2140     storage.count = 0;
2141     XML_ParserReset(g_parser, NULL);
2142     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2143     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2144     XML_SetUserData(g_parser, &storage);
2145     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2146                                 XML_TRUE)
2147         == XML_STATUS_ERROR)
2148       xml_failure(g_parser);
2149     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2150     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2151     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2152     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2153     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2154     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2155     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2156     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2157     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2158     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2159     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2160     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2161     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2162     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2163     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2164     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2165     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2166     assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2167     assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2168     assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2169     assert_true(storage.count == 20);
2170   }
2171 
2172   /* Finally, without passing the cdata to the default handler */
2173   set_subtest("not passing cdata");
2174   {
2175     struct handler_record_list storage;
2176     storage.count = 0;
2177     XML_ParserReset(g_parser, NULL);
2178     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2179     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2180     XML_SetUserData(g_parser, &storage);
2181     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2182                                 XML_TRUE)
2183         == XML_STATUS_ERROR)
2184       xml_failure(g_parser);
2185     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2186     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2187     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2188     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2189     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2190     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2191     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2192     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2193     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2194     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2195     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2196     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2197     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2198     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2199     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2200     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2201     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2202     assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2203                                  1);
2204     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2205     assert_true(storage.count == 19);
2206   }
2207 }
2208 END_TEST
2209 
2210 /* Test DTD element parsing code paths */
START_TEST(test_dtd_elements)2211 START_TEST(test_dtd_elements) {
2212   const char *text = "<!DOCTYPE doc [\n"
2213                      "<!ELEMENT doc (chapter)>\n"
2214                      "<!ELEMENT chapter (#PCDATA)>\n"
2215                      "]>\n"
2216                      "<doc><chapter>Wombats are go</chapter></doc>";
2217 
2218   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2219   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2220       == XML_STATUS_ERROR)
2221     xml_failure(g_parser);
2222 }
2223 END_TEST
2224 
2225 static void XMLCALL
element_decl_check_model(void * userData,const XML_Char * name,XML_Content * model)2226 element_decl_check_model(void *userData, const XML_Char *name,
2227                          XML_Content *model) {
2228   UNUSED_P(userData);
2229   uint32_t errorFlags = 0;
2230 
2231   /* Expected model array structure is this:
2232    * [0] (type 6, quant 0)
2233    *   [1] (type 5, quant 0)
2234    *     [3] (type 4, quant 0, name "bar")
2235    *     [4] (type 4, quant 0, name "foo")
2236    *     [5] (type 4, quant 3, name "xyz")
2237    *   [2] (type 4, quant 2, name "zebra")
2238    */
2239   errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2240   errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2241 
2242   if (model != NULL) {
2243     errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2244     errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2245     errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2246     errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2247     errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2248 
2249     errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2250     errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2251     errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2252     errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2253     errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2254 
2255     errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2256     errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2257     errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2258     errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2259     errorFlags
2260         |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2261 
2262     errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2263     errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2264     errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2265     errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2266     errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2267 
2268     errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2269     errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2270     errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2271     errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2272     errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2273 
2274     errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2275     errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2276     errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2277     errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2278     errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2279   }
2280 
2281   XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2282   XML_FreeContentModel(g_parser, model);
2283 }
2284 
START_TEST(test_dtd_elements_nesting)2285 START_TEST(test_dtd_elements_nesting) {
2286   // Payload inspired by a test in Perl's XML::Parser
2287   const char *text = "<!DOCTYPE foo [\n"
2288                      "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2289                      "]>\n"
2290                      "<foo/>";
2291 
2292   XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2293 
2294   XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2295   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2296       == XML_STATUS_ERROR)
2297     xml_failure(g_parser);
2298 
2299   if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2300     fail("Element declaration model regression detected");
2301 }
2302 END_TEST
2303 
2304 /* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd)2305 START_TEST(test_set_foreign_dtd) {
2306   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2307   const char *text2 = "<doc>&entity;</doc>";
2308   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2309 
2310   /* Check hash salt is passed through too */
2311   XML_SetHashSalt(g_parser, 0x12345678);
2312   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2313   XML_SetUserData(g_parser, &test_data);
2314   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2315   /* Add a default handler to exercise more code paths */
2316   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2317   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2318     fail("Could not set foreign DTD");
2319   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2320       == XML_STATUS_ERROR)
2321     xml_failure(g_parser);
2322 
2323   /* Ensure that trying to set the DTD after parsing has started
2324    * is faulted, even if it's the same setting.
2325    */
2326   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2327       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2328     fail("Failed to reject late foreign DTD setting");
2329   /* Ditto for the hash salt */
2330   if (XML_SetHashSalt(g_parser, 0x23456789))
2331     fail("Failed to reject late hash salt change");
2332 
2333   /* Now finish the parse */
2334   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2335       == XML_STATUS_ERROR)
2336     xml_failure(g_parser);
2337 }
2338 END_TEST
2339 
2340 /* Test foreign DTD handling with a failing NotStandalone handler */
START_TEST(test_foreign_dtd_not_standalone)2341 START_TEST(test_foreign_dtd_not_standalone) {
2342   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2343                      "<doc>&entity;</doc>";
2344   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2345 
2346   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2347   XML_SetUserData(g_parser, &test_data);
2348   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2349   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2350   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2351     fail("Could not set foreign DTD");
2352   expect_failure(text, XML_ERROR_NOT_STANDALONE,
2353                  "NotStandalonehandler failed to reject");
2354 }
2355 END_TEST
2356 
2357 /* Test invalid character in a foreign DTD is faulted */
START_TEST(test_invalid_foreign_dtd)2358 START_TEST(test_invalid_foreign_dtd) {
2359   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2360                      "<doc>&entity;</doc>";
2361   ExtFaults test_data
2362       = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2363 
2364   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2365   XML_SetUserData(g_parser, &test_data);
2366   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2367   XML_UseForeignDTD(g_parser, XML_TRUE);
2368   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2369                  "Bad DTD should not have been accepted");
2370 }
2371 END_TEST
2372 
2373 /* Test foreign DTD use with a doctype */
START_TEST(test_foreign_dtd_with_doctype)2374 START_TEST(test_foreign_dtd_with_doctype) {
2375   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2376                       "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2377   const char *text2 = "<doc>&entity;</doc>";
2378   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2379 
2380   /* Check hash salt is passed through too */
2381   XML_SetHashSalt(g_parser, 0x12345678);
2382   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2383   XML_SetUserData(g_parser, &test_data);
2384   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2385   /* Add a default handler to exercise more code paths */
2386   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2387   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2388     fail("Could not set foreign DTD");
2389   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2390       == XML_STATUS_ERROR)
2391     xml_failure(g_parser);
2392 
2393   /* Ensure that trying to set the DTD after parsing has started
2394    * is faulted, even if it's the same setting.
2395    */
2396   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2397       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2398     fail("Failed to reject late foreign DTD setting");
2399   /* Ditto for the hash salt */
2400   if (XML_SetHashSalt(g_parser, 0x23456789))
2401     fail("Failed to reject late hash salt change");
2402 
2403   /* Now finish the parse */
2404   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2405       == XML_STATUS_ERROR)
2406     xml_failure(g_parser);
2407 }
2408 END_TEST
2409 
2410 /* Test XML_UseForeignDTD with no external subset present */
START_TEST(test_foreign_dtd_without_external_subset)2411 START_TEST(test_foreign_dtd_without_external_subset) {
2412   const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2413                      "<doc>&foo;</doc>";
2414 
2415   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2416   XML_SetUserData(g_parser, NULL);
2417   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2418   XML_UseForeignDTD(g_parser, XML_TRUE);
2419   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2420       == XML_STATUS_ERROR)
2421     xml_failure(g_parser);
2422 }
2423 END_TEST
2424 
START_TEST(test_empty_foreign_dtd)2425 START_TEST(test_empty_foreign_dtd) {
2426   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2427                      "<doc>&entity;</doc>";
2428 
2429   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2430   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2431   XML_UseForeignDTD(g_parser, XML_TRUE);
2432   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2433                  "Undefined entity not faulted");
2434 }
2435 END_TEST
2436 
2437 /* Test XML Base is set and unset appropriately */
START_TEST(test_set_base)2438 START_TEST(test_set_base) {
2439   const XML_Char *old_base;
2440   const XML_Char *new_base = XCS("/local/file/name.xml");
2441 
2442   old_base = XML_GetBase(g_parser);
2443   if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2444     fail("Unable to set base");
2445   if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2446     fail("Base setting not correct");
2447   if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2448     fail("Unable to NULL base");
2449   if (XML_GetBase(g_parser) != NULL)
2450     fail("Base setting not nulled");
2451   XML_SetBase(g_parser, old_base);
2452 }
2453 END_TEST
2454 
2455 /* Test attribute counts, indexing, etc */
START_TEST(test_attributes)2456 START_TEST(test_attributes) {
2457   const char *text = "<!DOCTYPE doc [\n"
2458                      "<!ELEMENT doc (tag)>\n"
2459                      "<!ATTLIST doc id ID #REQUIRED>\n"
2460                      "]>"
2461                      "<doc a='1' id='one' b='2'>"
2462                      "<tag c='3'/>"
2463                      "</doc>";
2464   AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2465                          {XCS("b"), XCS("2")},
2466                          {XCS("id"), XCS("one")},
2467                          {NULL, NULL}};
2468   AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2469   ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2470                         {XCS("tag"), 1, NULL, NULL},
2471                         {NULL, 0, NULL, NULL}};
2472   info[0].attributes = doc_info;
2473   info[1].attributes = tag_info;
2474 
2475   XML_Parser parser = XML_ParserCreate(NULL);
2476   assert_true(parser != NULL);
2477   ParserAndElementInfo parserAndElementInfos = {
2478       parser,
2479       info,
2480   };
2481 
2482   XML_SetStartElementHandler(parser, counting_start_element_handler);
2483   XML_SetUserData(parser, &parserAndElementInfos);
2484   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2485       == XML_STATUS_ERROR)
2486     xml_failure(parser);
2487 
2488   XML_ParserFree(parser);
2489 }
2490 END_TEST
2491 
2492 /* Test reset works correctly in the middle of processing an internal
2493  * entity.  Exercises some obscure code in XML_ParserReset().
2494  */
START_TEST(test_reset_in_entity)2495 START_TEST(test_reset_in_entity) {
2496   if (g_chunkSize != 0) {
2497     // this test does not use SINGLE_BYTES, because of suspension
2498     return;
2499   }
2500 
2501   const char *text = "<!DOCTYPE doc [\n"
2502                      "<!ENTITY wombat 'wom'>\n"
2503                      "<!ENTITY entity 'hi &wom; there'>\n"
2504                      "]>\n"
2505                      "<doc>&entity;</doc>";
2506   XML_ParsingStatus status;
2507 
2508   g_resumable = XML_TRUE;
2509   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2510   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
2511   // we won't know exactly how much input we actually managed to give Expat.
2512   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2513       == XML_STATUS_ERROR)
2514     xml_failure(g_parser);
2515   XML_GetParsingStatus(g_parser, &status);
2516   if (status.parsing != XML_SUSPENDED)
2517     fail("Parsing status not SUSPENDED");
2518   XML_ParserReset(g_parser, NULL);
2519   XML_GetParsingStatus(g_parser, &status);
2520   if (status.parsing != XML_INITIALIZED)
2521     fail("Parsing status doesn't reset to INITIALIZED");
2522 }
2523 END_TEST
2524 
2525 /* Test that resume correctly passes through parse errors */
START_TEST(test_resume_invalid_parse)2526 START_TEST(test_resume_invalid_parse) {
2527   const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2528 
2529   g_resumable = XML_TRUE;
2530   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2531   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2532       == XML_STATUS_ERROR)
2533     xml_failure(g_parser);
2534   if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2535     fail("Resumed invalid parse not faulted");
2536   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2537     fail("Invalid parse not correctly faulted");
2538 }
2539 END_TEST
2540 
2541 /* Test that re-suspended parses are correctly passed through */
START_TEST(test_resume_resuspended)2542 START_TEST(test_resume_resuspended) {
2543   const char *text = "<doc>Hello<meep/>world</doc>";
2544 
2545   g_resumable = XML_TRUE;
2546   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2547   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2548       == XML_STATUS_ERROR)
2549     xml_failure(g_parser);
2550   g_resumable = XML_TRUE;
2551   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2552   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2553     fail("Resumption not suspended");
2554   /* This one should succeed and finish up */
2555   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2556     xml_failure(g_parser);
2557 }
2558 END_TEST
2559 
2560 /* Test that CDATA shows up correctly through a default handler */
START_TEST(test_cdata_default)2561 START_TEST(test_cdata_default) {
2562   const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2563   const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2564   CharData storage;
2565 
2566   CharData_Init(&storage);
2567   XML_SetUserData(g_parser, &storage);
2568   XML_SetDefaultHandler(g_parser, accumulate_characters);
2569 
2570   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2571       == XML_STATUS_ERROR)
2572     xml_failure(g_parser);
2573   CharData_CheckXMLChars(&storage, expected);
2574 }
2575 END_TEST
2576 
2577 /* Test resetting a subordinate parser does exactly nothing */
START_TEST(test_subordinate_reset)2578 START_TEST(test_subordinate_reset) {
2579   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2580                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2581                      "<doc>&entity;</doc>";
2582 
2583   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2584   XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2585   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2586       == XML_STATUS_ERROR)
2587     xml_failure(g_parser);
2588 }
2589 END_TEST
2590 
2591 /* Test suspending a subordinate parser */
START_TEST(test_subordinate_suspend)2592 START_TEST(test_subordinate_suspend) {
2593   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2594                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2595                      "<doc>&entity;</doc>";
2596 
2597   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2598   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2599   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2600       == XML_STATUS_ERROR)
2601     xml_failure(g_parser);
2602 }
2603 END_TEST
2604 
2605 /* Test suspending a subordinate parser from an XML declaration */
2606 /* Increases code coverage of the tests */
2607 
START_TEST(test_subordinate_xdecl_suspend)2608 START_TEST(test_subordinate_xdecl_suspend) {
2609   const char *text
2610       = "<!DOCTYPE doc [\n"
2611         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2612         "]>\n"
2613         "<doc>&entity;</doc>";
2614 
2615   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2616   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2617   g_resumable = XML_TRUE;
2618   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2619       == XML_STATUS_ERROR)
2620     xml_failure(g_parser);
2621 }
2622 END_TEST
2623 
START_TEST(test_subordinate_xdecl_abort)2624 START_TEST(test_subordinate_xdecl_abort) {
2625   const char *text
2626       = "<!DOCTYPE doc [\n"
2627         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2628         "]>\n"
2629         "<doc>&entity;</doc>";
2630 
2631   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2632   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2633   g_resumable = XML_FALSE;
2634   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2635       == XML_STATUS_ERROR)
2636     xml_failure(g_parser);
2637 }
2638 END_TEST
2639 
2640 /* Test external entity fault handling with suspension */
START_TEST(test_ext_entity_invalid_suspended_parse)2641 START_TEST(test_ext_entity_invalid_suspended_parse) {
2642   const char *text = "<!DOCTYPE doc [\n"
2643                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2644                      "]>\n"
2645                      "<doc>&en;</doc>";
2646   ExtFaults faults[]
2647       = {{"<?xml version='1.0' encoding='us-ascii'?><",
2648           "Incomplete element declaration not faulted", NULL,
2649           XML_ERROR_UNCLOSED_TOKEN},
2650          {/* First two bytes of a three-byte char */
2651           "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2652           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2653          {NULL, NULL, NULL, XML_ERROR_NONE}};
2654   ExtFaults *fault;
2655 
2656   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2657     set_subtest("%s", fault->parse_text);
2658     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2659     XML_SetExternalEntityRefHandler(g_parser,
2660                                     external_entity_suspending_faulter);
2661     XML_SetUserData(g_parser, fault);
2662     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2663                    "Parser did not report external entity error");
2664     XML_ParserReset(g_parser, NULL);
2665   }
2666 }
2667 END_TEST
2668 
2669 /* Test setting an explicit encoding */
START_TEST(test_explicit_encoding)2670 START_TEST(test_explicit_encoding) {
2671   const char *text1 = "<doc>Hello ";
2672   const char *text2 = " World</doc>";
2673 
2674   /* Just check that we can set the encoding to NULL before starting */
2675   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2676     fail("Failed to initialise encoding to NULL");
2677   /* Say we are UTF-8 */
2678   if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2679     fail("Failed to set explicit encoding");
2680   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2681       == XML_STATUS_ERROR)
2682     xml_failure(g_parser);
2683   /* Try to switch encodings mid-parse */
2684   if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2685     fail("Allowed encoding change");
2686   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2687       == XML_STATUS_ERROR)
2688     xml_failure(g_parser);
2689   /* Try now the parse is over */
2690   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2691     fail("Failed to unset encoding");
2692 }
2693 END_TEST
2694 
2695 /* Test handling of trailing CR (rather than newline) */
START_TEST(test_trailing_cr)2696 START_TEST(test_trailing_cr) {
2697   const char *text = "<doc>\r";
2698   int found_cr;
2699 
2700   /* Try with a character handler, for code coverage */
2701   XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2702   XML_SetUserData(g_parser, &found_cr);
2703   found_cr = 0;
2704   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2705       == XML_STATUS_OK)
2706     fail("Failed to fault unclosed doc");
2707   if (found_cr == 0)
2708     fail("Did not catch the carriage return");
2709   XML_ParserReset(g_parser, NULL);
2710 
2711   /* Now with a default handler instead */
2712   XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2713   XML_SetUserData(g_parser, &found_cr);
2714   found_cr = 0;
2715   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2716       == XML_STATUS_OK)
2717     fail("Failed to fault unclosed doc");
2718   if (found_cr == 0)
2719     fail("Did not catch default carriage return");
2720 }
2721 END_TEST
2722 
2723 /* Test trailing CR in an external entity parse */
START_TEST(test_ext_entity_trailing_cr)2724 START_TEST(test_ext_entity_trailing_cr) {
2725   const char *text = "<!DOCTYPE doc [\n"
2726                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2727                      "]>\n"
2728                      "<doc>&en;</doc>";
2729   int found_cr;
2730 
2731   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2732   XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2733   XML_SetUserData(g_parser, &found_cr);
2734   found_cr = 0;
2735   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2736       != XML_STATUS_OK)
2737     xml_failure(g_parser);
2738   if (found_cr == 0)
2739     fail("No carriage return found");
2740   XML_ParserReset(g_parser, NULL);
2741 
2742   /* Try again with a different trailing CR */
2743   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2744   XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2745   XML_SetUserData(g_parser, &found_cr);
2746   found_cr = 0;
2747   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2748       != XML_STATUS_OK)
2749     xml_failure(g_parser);
2750   if (found_cr == 0)
2751     fail("No carriage return found");
2752 }
2753 END_TEST
2754 
2755 /* Test handling of trailing square bracket */
START_TEST(test_trailing_rsqb)2756 START_TEST(test_trailing_rsqb) {
2757   const char *text8 = "<doc>]";
2758   const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2759   int found_rsqb;
2760   int text8_len = (int)strlen(text8);
2761 
2762   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2763   XML_SetUserData(g_parser, &found_rsqb);
2764   found_rsqb = 0;
2765   if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2766       == XML_STATUS_OK)
2767     fail("Failed to fault unclosed doc");
2768   if (found_rsqb == 0)
2769     fail("Did not catch the right square bracket");
2770 
2771   /* Try again with a different encoding */
2772   XML_ParserReset(g_parser, NULL);
2773   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2774   XML_SetUserData(g_parser, &found_rsqb);
2775   found_rsqb = 0;
2776   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2777                               XML_TRUE)
2778       == XML_STATUS_OK)
2779     fail("Failed to fault unclosed doc");
2780   if (found_rsqb == 0)
2781     fail("Did not catch the right square bracket");
2782 
2783   /* And finally with a default handler */
2784   XML_ParserReset(g_parser, NULL);
2785   XML_SetDefaultHandler(g_parser, rsqb_handler);
2786   XML_SetUserData(g_parser, &found_rsqb);
2787   found_rsqb = 0;
2788   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2789                               XML_TRUE)
2790       == XML_STATUS_OK)
2791     fail("Failed to fault unclosed doc");
2792   if (found_rsqb == 0)
2793     fail("Did not catch the right square bracket");
2794 }
2795 END_TEST
2796 
2797 /* Test trailing right square bracket in an external entity parse */
START_TEST(test_ext_entity_trailing_rsqb)2798 START_TEST(test_ext_entity_trailing_rsqb) {
2799   const char *text = "<!DOCTYPE doc [\n"
2800                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2801                      "]>\n"
2802                      "<doc>&en;</doc>";
2803   int found_rsqb;
2804 
2805   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2806   XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2807   XML_SetUserData(g_parser, &found_rsqb);
2808   found_rsqb = 0;
2809   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2810       != XML_STATUS_OK)
2811     xml_failure(g_parser);
2812   if (found_rsqb == 0)
2813     fail("No right square bracket found");
2814 }
2815 END_TEST
2816 
2817 /* Test CDATA handling in an external entity */
START_TEST(test_ext_entity_good_cdata)2818 START_TEST(test_ext_entity_good_cdata) {
2819   const char *text = "<!DOCTYPE doc [\n"
2820                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2821                      "]>\n"
2822                      "<doc>&en;</doc>";
2823 
2824   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2825   XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2826   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2827       != XML_STATUS_OK)
2828     xml_failure(g_parser);
2829 }
2830 END_TEST
2831 
2832 /* Test user parameter settings */
START_TEST(test_user_parameters)2833 START_TEST(test_user_parameters) {
2834   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2835                      "<!-- Primary parse -->\n"
2836                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2837                      "<doc>&entity;";
2838   const char *epilog = "<!-- Back to primary parser -->\n"
2839                        "</doc>";
2840 
2841   g_comment_count = 0;
2842   g_skip_count = 0;
2843   g_xdecl_count = 0;
2844   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2845   XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2846   XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2847   XML_SetCommentHandler(g_parser, data_check_comment_handler);
2848   XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2849   XML_UseParserAsHandlerArg(g_parser);
2850   XML_SetUserData(g_parser, (void *)1);
2851   g_handler_data = g_parser;
2852   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2853       == XML_STATUS_ERROR)
2854     xml_failure(g_parser);
2855   /* Ensure we can't change policy mid-parse */
2856   if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2857     fail("Changed param entity parsing policy while parsing");
2858   if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2859       == XML_STATUS_ERROR)
2860     xml_failure(g_parser);
2861   if (g_comment_count != 3)
2862     fail("Comment handler not invoked enough times");
2863   if (g_skip_count != 1)
2864     fail("Skip handler not invoked enough times");
2865   if (g_xdecl_count != 1)
2866     fail("XML declaration handler not invoked");
2867 }
2868 END_TEST
2869 
2870 /* Test that an explicit external entity handler argument replaces
2871  * the parser as the first argument.
2872  *
2873  * We do not call the first parameter to the external entity handler
2874  * 'parser' for once, since the first time the handler is called it
2875  * will actually be a text string.  We need to be able to access the
2876  * global 'parser' variable to create our external entity parser from,
2877  * since there are code paths we need to ensure get executed.
2878  */
START_TEST(test_ext_entity_ref_parameter)2879 START_TEST(test_ext_entity_ref_parameter) {
2880   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2881                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2882                      "<doc>&entity;</doc>";
2883 
2884   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2885   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2886   /* Set a handler arg that is not NULL and not parser (which is
2887    * what NULL would cause to be passed.
2888    */
2889   XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2890   g_handler_data = text;
2891   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2892       == XML_STATUS_ERROR)
2893     xml_failure(g_parser);
2894 
2895   /* Now try again with unset args */
2896   XML_ParserReset(g_parser, NULL);
2897   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2898   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2899   XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2900   g_handler_data = g_parser;
2901   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2902       == XML_STATUS_ERROR)
2903     xml_failure(g_parser);
2904 }
2905 END_TEST
2906 
2907 /* Test the parsing of an empty string */
START_TEST(test_empty_parse)2908 START_TEST(test_empty_parse) {
2909   const char *text = "<doc></doc>";
2910   const char *partial = "<doc>";
2911 
2912   if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2913     fail("Parsing empty string faulted");
2914   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2915     fail("Parsing final empty string not faulted");
2916   if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2917     fail("Parsing final empty string faulted for wrong reason");
2918 
2919   /* Now try with valid text before the empty end */
2920   XML_ParserReset(g_parser, NULL);
2921   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2922       == XML_STATUS_ERROR)
2923     xml_failure(g_parser);
2924   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2925     fail("Parsing final empty string faulted");
2926 
2927   /* Now try with invalid text before the empty end */
2928   XML_ParserReset(g_parser, NULL);
2929   if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2930                               XML_FALSE)
2931       == XML_STATUS_ERROR)
2932     xml_failure(g_parser);
2933   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2934     fail("Parsing final incomplete empty string not faulted");
2935 }
2936 END_TEST
2937 
2938 /* Test XML_Parse for len < 0 */
START_TEST(test_negative_len_parse)2939 START_TEST(test_negative_len_parse) {
2940   const char *const doc = "<root/>";
2941   for (int isFinal = 0; isFinal < 2; isFinal++) {
2942     set_subtest("isFinal=%d", isFinal);
2943 
2944     XML_Parser parser = XML_ParserCreate(NULL);
2945 
2946     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2947       fail("There was not supposed to be any initial parse error.");
2948 
2949     const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
2950 
2951     if (status != XML_STATUS_ERROR)
2952       fail("Negative len was expected to fail the parse but did not.");
2953 
2954     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2955       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2956 
2957     XML_ParserFree(parser);
2958   }
2959 }
2960 END_TEST
2961 
2962 /* Test XML_ParseBuffer for len < 0 */
START_TEST(test_negative_len_parse_buffer)2963 START_TEST(test_negative_len_parse_buffer) {
2964   const char *const doc = "<root/>";
2965   for (int isFinal = 0; isFinal < 2; isFinal++) {
2966     set_subtest("isFinal=%d", isFinal);
2967 
2968     XML_Parser parser = XML_ParserCreate(NULL);
2969 
2970     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2971       fail("There was not supposed to be any initial parse error.");
2972 
2973     void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
2974 
2975     if (buffer == NULL)
2976       fail("XML_GetBuffer failed.");
2977 
2978     memcpy(buffer, doc, strlen(doc));
2979 
2980     const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
2981 
2982     if (status != XML_STATUS_ERROR)
2983       fail("Negative len was expected to fail the parse but did not.");
2984 
2985     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2986       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2987 
2988     XML_ParserFree(parser);
2989   }
2990 }
2991 END_TEST
2992 
2993 /* Test odd corners of the XML_GetBuffer interface */
2994 static enum XML_Status
get_feature(enum XML_FeatureEnum feature_id,long * presult)2995 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
2996   const XML_Feature *feature = XML_GetFeatureList();
2997 
2998   if (feature == NULL)
2999     return XML_STATUS_ERROR;
3000   for (; feature->feature != XML_FEATURE_END; feature++) {
3001     if (feature->feature == feature_id) {
3002       *presult = feature->value;
3003       return XML_STATUS_OK;
3004     }
3005   }
3006   return XML_STATUS_ERROR;
3007 }
3008 
3009 /* Test odd corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_1)3010 START_TEST(test_get_buffer_1) {
3011   const char *text = get_buffer_test_text;
3012   void *buffer;
3013   long context_bytes;
3014 
3015   /* Attempt to allocate a negative length buffer */
3016   if (XML_GetBuffer(g_parser, -12) != NULL)
3017     fail("Negative length buffer not failed");
3018 
3019   /* Now get a small buffer and extend it past valid length */
3020   buffer = XML_GetBuffer(g_parser, 1536);
3021   if (buffer == NULL)
3022     fail("1.5K buffer failed");
3023   assert(buffer != NULL);
3024   memcpy(buffer, text, strlen(text));
3025   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
3026       == XML_STATUS_ERROR)
3027     xml_failure(g_parser);
3028   if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
3029     fail("INT_MAX buffer not failed");
3030 
3031   /* Now try extending it a more reasonable but still too large
3032    * amount.  The allocator in XML_GetBuffer() doubles the buffer
3033    * size until it exceeds the requested amount or INT_MAX.  If it
3034    * exceeds INT_MAX, it rejects the request, so we want a request
3035    * between INT_MAX and INT_MAX/2.  A gap of 1K seems comfortable,
3036    * with an extra byte just to ensure that the request is off any
3037    * boundary.  The request will be inflated internally by
3038    * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
3039    * request.
3040    */
3041   if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
3042     context_bytes = 0;
3043   if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
3044     fail("INT_MAX- buffer not failed");
3045 
3046   /* Now try extending it a carefully crafted amount */
3047   if (XML_GetBuffer(g_parser, 1000) == NULL)
3048     fail("1000 buffer failed");
3049 }
3050 END_TEST
3051 
3052 /* Test more corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_2)3053 START_TEST(test_get_buffer_2) {
3054   const char *text = get_buffer_test_text;
3055   void *buffer;
3056 
3057   /* Now get a decent buffer */
3058   buffer = XML_GetBuffer(g_parser, 1536);
3059   if (buffer == NULL)
3060     fail("1.5K buffer failed");
3061   assert(buffer != NULL);
3062   memcpy(buffer, text, strlen(text));
3063   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
3064       == XML_STATUS_ERROR)
3065     xml_failure(g_parser);
3066 
3067   /* Extend it, to catch a different code path */
3068   if (XML_GetBuffer(g_parser, 1024) == NULL)
3069     fail("1024 buffer failed");
3070 }
3071 END_TEST
3072 
3073 /* Test for signed integer overflow CVE-2022-23852 */
3074 #if XML_CONTEXT_BYTES > 0
START_TEST(test_get_buffer_3_overflow)3075 START_TEST(test_get_buffer_3_overflow) {
3076   XML_Parser parser = XML_ParserCreate(NULL);
3077   assert(parser != NULL);
3078 
3079   const char *const text = "\n";
3080   const int expectedKeepValue = (int)strlen(text);
3081 
3082   // After this call, variable "keep" in XML_GetBuffer will
3083   // have value expectedKeepValue
3084   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
3085                               XML_FALSE /* isFinal */)
3086       == XML_STATUS_ERROR)
3087     xml_failure(parser);
3088 
3089   assert(expectedKeepValue > 0);
3090   if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
3091     fail("enlarging buffer not failed");
3092 
3093   XML_ParserFree(parser);
3094 }
3095 END_TEST
3096 #endif // XML_CONTEXT_BYTES > 0
3097 
START_TEST(test_buffer_can_grow_to_max)3098 START_TEST(test_buffer_can_grow_to_max) {
3099   const char *const prefixes[] = {
3100       "",
3101       "<",
3102       "<x a='",
3103       "<doc><x a='",
3104       "<document><x a='",
3105       "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
3106       "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
3107       "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
3108       "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
3109       "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
3110   const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
3111   int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
3112 #if defined(__MINGW32__) && ! defined(__MINGW64__)
3113   // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
3114   // Can we make a big allocation?
3115   void *big = malloc(maxbuf);
3116   if (! big) {
3117     // The big allocation failed. Let's be a little lenient.
3118     maxbuf = maxbuf / 2;
3119   }
3120   free(big);
3121 #endif
3122 
3123   for (int i = 0; i < num_prefixes; ++i) {
3124     set_subtest("\"%s\"", prefixes[i]);
3125     XML_Parser parser = XML_ParserCreate(NULL);
3126 #if XML_GE == 1
3127     assert_true(XML_SetAllocTrackerActivationThreshold(parser, (size_t)-1)
3128                 == XML_TRUE); // i.e. deactivate
3129 #endif
3130     const int prefix_len = (int)strlen(prefixes[i]);
3131     const enum XML_Status s
3132         = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
3133     if (s != XML_STATUS_OK)
3134       xml_failure(parser);
3135 
3136     // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
3137     // subtracting the whole prefix is easiest, and close enough.
3138     assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
3139     // The limit should be consistent; no prefix should allow us to
3140     // reach above the max buffer size.
3141     assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
3142     XML_ParserFree(parser);
3143   }
3144 }
3145 END_TEST
3146 
START_TEST(test_getbuffer_allocates_on_zero_len)3147 START_TEST(test_getbuffer_allocates_on_zero_len) {
3148   for (int first_len = 1; first_len >= 0; first_len--) {
3149     set_subtest("with len=%d first", first_len);
3150     XML_Parser parser = XML_ParserCreate(NULL);
3151     assert_true(parser != NULL);
3152     assert_true(XML_GetBuffer(parser, first_len) != NULL);
3153     assert_true(XML_GetBuffer(parser, 0) != NULL);
3154     if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
3155       xml_failure(parser);
3156     XML_ParserFree(parser);
3157   }
3158 }
3159 END_TEST
3160 
3161 /* Test position information macros */
START_TEST(test_byte_info_at_end)3162 START_TEST(test_byte_info_at_end) {
3163   const char *text = "<doc></doc>";
3164 
3165   if (XML_GetCurrentByteIndex(g_parser) != -1
3166       || XML_GetCurrentByteCount(g_parser) != 0)
3167     fail("Byte index/count incorrect at start of parse");
3168   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3169       == XML_STATUS_ERROR)
3170     xml_failure(g_parser);
3171   /* At end, the count will be zero and the index the end of string */
3172   if (XML_GetCurrentByteCount(g_parser) != 0)
3173     fail("Terminal byte count incorrect");
3174   if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
3175     fail("Terminal byte index incorrect");
3176 }
3177 END_TEST
3178 
3179 /* Test position information from errors */
3180 #define PRE_ERROR_STR "<doc></"
3181 #define POST_ERROR_STR "wombat></doc>"
START_TEST(test_byte_info_at_error)3182 START_TEST(test_byte_info_at_error) {
3183   const char *text = PRE_ERROR_STR POST_ERROR_STR;
3184 
3185   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3186       == XML_STATUS_OK)
3187     fail("Syntax error not faulted");
3188   if (XML_GetCurrentByteCount(g_parser) != 0)
3189     fail("Error byte count incorrect");
3190   if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3191     fail("Error byte index incorrect");
3192 }
3193 END_TEST
3194 #undef PRE_ERROR_STR
3195 #undef POST_ERROR_STR
3196 
3197 /* Test position information in handler */
3198 #define START_ELEMENT "<e>"
3199 #define CDATA_TEXT "Hello"
3200 #define END_ELEMENT "</e>"
START_TEST(test_byte_info_at_cdata)3201 START_TEST(test_byte_info_at_cdata) {
3202   const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3203   int offset, size;
3204   ByteTestData data;
3205 
3206   /* Check initial context is empty */
3207   if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3208     fail("Unexpected context at start of parse");
3209 
3210   data.start_element_len = (int)strlen(START_ELEMENT);
3211   data.cdata_len = (int)strlen(CDATA_TEXT);
3212   data.total_string_len = (int)strlen(text);
3213   XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3214   XML_SetUserData(g_parser, &data);
3215   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3216     xml_failure(g_parser);
3217 }
3218 END_TEST
3219 #undef START_ELEMENT
3220 #undef CDATA_TEXT
3221 #undef END_ELEMENT
3222 
3223 /* Test predefined entities are correctly recognised */
START_TEST(test_predefined_entities)3224 START_TEST(test_predefined_entities) {
3225   const char *text = "<doc>&lt;&gt;&amp;&quot;&apos;</doc>";
3226   const XML_Char *expected = XCS("<doc>&lt;&gt;&amp;&quot;&apos;</doc>");
3227   const XML_Char *result = XCS("<>&\"'");
3228   CharData storage;
3229 
3230   XML_SetDefaultHandler(g_parser, accumulate_characters);
3231   /* run_character_check uses XML_SetCharacterDataHandler(), which
3232    * unfortunately heads off a code path that we need to exercise.
3233    */
3234   CharData_Init(&storage);
3235   XML_SetUserData(g_parser, &storage);
3236   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3237       == XML_STATUS_ERROR)
3238     xml_failure(g_parser);
3239   /* The default handler doesn't translate the entities */
3240   CharData_CheckXMLChars(&storage, expected);
3241 
3242   /* Now try again and check the translation */
3243   XML_ParserReset(g_parser, NULL);
3244   run_character_check(text, result);
3245 }
3246 END_TEST
3247 
3248 /* Regression test that an invalid tag in an external parameter
3249  * reference in an external DTD is correctly faulted.
3250  *
3251  * Only a few specific tags are legal in DTDs ignoring comments and
3252  * processing instructions, all of which begin with an exclamation
3253  * mark.  "<el/>" is not one of them, so the parser should raise an
3254  * error on encountering it.
3255  */
START_TEST(test_invalid_tag_in_dtd)3256 START_TEST(test_invalid_tag_in_dtd) {
3257   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3258                      "<doc></doc>\n";
3259 
3260   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3261   XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3262   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3263                  "Invalid tag IN DTD external param not rejected");
3264 }
3265 END_TEST
3266 
3267 /* Test entities not quite the predefined ones are not mis-recognised */
START_TEST(test_not_predefined_entities)3268 START_TEST(test_not_predefined_entities) {
3269   const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3270                         "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3271   int i = 0;
3272 
3273   while (text[i] != NULL) {
3274     expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3275                    "Undefined entity not rejected");
3276     XML_ParserReset(g_parser, NULL);
3277     i++;
3278   }
3279 }
3280 END_TEST
3281 
3282 /* Test conditional inclusion (IGNORE) */
START_TEST(test_ignore_section)3283 START_TEST(test_ignore_section) {
3284   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3285                      "<doc><e>&entity;</e></doc>";
3286   const XML_Char *expected
3287       = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3288   CharData storage;
3289 
3290   CharData_Init(&storage);
3291   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3292   XML_SetUserData(g_parser, &storage);
3293   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3294   XML_SetDefaultHandler(g_parser, accumulate_characters);
3295   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3296   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3297   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3298   XML_SetStartElementHandler(g_parser, dummy_start_element);
3299   XML_SetEndElementHandler(g_parser, dummy_end_element);
3300   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3301       == XML_STATUS_ERROR)
3302     xml_failure(g_parser);
3303   CharData_CheckXMLChars(&storage, expected);
3304 }
3305 END_TEST
3306 
START_TEST(test_ignore_section_utf16)3307 START_TEST(test_ignore_section_utf16) {
3308   const char text[] =
3309       /* <!DOCTYPE d SYSTEM 's'> */
3310       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3311       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3312       /* <d><e>&en;</e></d> */
3313       "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3314   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3315   CharData storage;
3316 
3317   CharData_Init(&storage);
3318   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3319   XML_SetUserData(g_parser, &storage);
3320   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3321   XML_SetDefaultHandler(g_parser, accumulate_characters);
3322   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3323   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3324   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3325   XML_SetStartElementHandler(g_parser, dummy_start_element);
3326   XML_SetEndElementHandler(g_parser, dummy_end_element);
3327   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3328       == XML_STATUS_ERROR)
3329     xml_failure(g_parser);
3330   CharData_CheckXMLChars(&storage, expected);
3331 }
3332 END_TEST
3333 
START_TEST(test_ignore_section_utf16_be)3334 START_TEST(test_ignore_section_utf16_be) {
3335   const char text[] =
3336       /* <!DOCTYPE d SYSTEM 's'> */
3337       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3338       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3339       /* <d><e>&en;</e></d> */
3340       "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3341   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3342   CharData storage;
3343 
3344   CharData_Init(&storage);
3345   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3346   XML_SetUserData(g_parser, &storage);
3347   XML_SetExternalEntityRefHandler(g_parser,
3348                                   external_entity_load_ignore_utf16_be);
3349   XML_SetDefaultHandler(g_parser, accumulate_characters);
3350   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3351   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3352   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3353   XML_SetStartElementHandler(g_parser, dummy_start_element);
3354   XML_SetEndElementHandler(g_parser, dummy_end_element);
3355   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3356       == XML_STATUS_ERROR)
3357     xml_failure(g_parser);
3358   CharData_CheckXMLChars(&storage, expected);
3359 }
3360 END_TEST
3361 
3362 /* Test mis-formatted conditional exclusion */
START_TEST(test_bad_ignore_section)3363 START_TEST(test_bad_ignore_section) {
3364   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3365                      "<doc><e>&entity;</e></doc>";
3366   ExtFaults faults[]
3367       = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3368           XML_ERROR_SYNTAX},
3369          {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3370           XML_ERROR_INVALID_TOKEN},
3371          {/* FIrst two bytes of a three-byte char */
3372           "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3373           XML_ERROR_PARTIAL_CHAR},
3374          {NULL, NULL, NULL, XML_ERROR_NONE}};
3375   ExtFaults *fault;
3376 
3377   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3378     set_subtest("%s", fault->parse_text);
3379     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3380     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3381     XML_SetUserData(g_parser, fault);
3382     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3383                    "Incomplete IGNORE section not failed");
3384     XML_ParserReset(g_parser, NULL);
3385   }
3386 }
3387 END_TEST
3388 
3389 struct bom_testdata {
3390   const char *external;
3391   int split;
3392   XML_Bool nested_callback_happened;
3393 };
3394 
3395 static int XMLCALL
external_bom_checker(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)3396 external_bom_checker(XML_Parser parser, const XML_Char *context,
3397                      const XML_Char *base, const XML_Char *systemId,
3398                      const XML_Char *publicId) {
3399   const char *text;
3400   UNUSED_P(base);
3401   UNUSED_P(systemId);
3402   UNUSED_P(publicId);
3403 
3404   XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3405   if (ext_parser == NULL)
3406     fail("Could not create external entity parser");
3407 
3408   if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3409     struct bom_testdata *const testdata
3410         = (struct bom_testdata *)XML_GetUserData(parser);
3411     const char *const external = testdata->external;
3412     const int split = testdata->split;
3413     testdata->nested_callback_happened = XML_TRUE;
3414 
3415     if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3416         != XML_STATUS_OK) {
3417       xml_failure(ext_parser);
3418     }
3419     text = external + split; // the parse below will continue where we left off.
3420   } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3421     text = "<!ELEMENT doc EMPTY>\n"
3422            "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3423            "<!ENTITY % e2 '%e1;'>\n";
3424   } else {
3425     fail("unknown systemId");
3426   }
3427 
3428   if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3429       != XML_STATUS_OK)
3430     xml_failure(ext_parser);
3431 
3432   XML_ParserFree(ext_parser);
3433   return XML_STATUS_OK;
3434 }
3435 
3436 /* regression test: BOM should be consumed when followed by a partial token. */
START_TEST(test_external_bom_consumed)3437 START_TEST(test_external_bom_consumed) {
3438   const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3439                            "<doc></doc>\n";
3440   const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3441   const int len = (int)strlen(external);
3442   for (int split = 0; split <= len; ++split) {
3443     set_subtest("split at byte %d", split);
3444 
3445     struct bom_testdata testdata;
3446     testdata.external = external;
3447     testdata.split = split;
3448     testdata.nested_callback_happened = XML_FALSE;
3449 
3450     XML_Parser parser = XML_ParserCreate(NULL);
3451     if (parser == NULL) {
3452       fail("Couldn't create parser");
3453     }
3454     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3455     XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3456     XML_SetUserData(parser, &testdata);
3457     if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3458         == XML_STATUS_ERROR)
3459       xml_failure(parser);
3460     if (! testdata.nested_callback_happened) {
3461       fail("ref handler not called");
3462     }
3463     XML_ParserFree(parser);
3464   }
3465 }
3466 END_TEST
3467 
3468 /* Test recursive parsing */
START_TEST(test_external_entity_values)3469 START_TEST(test_external_entity_values) {
3470   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3471                      "<doc></doc>\n";
3472   ExtFaults data_004_2[] = {
3473       {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3474       {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3475        XML_ERROR_INVALID_TOKEN},
3476       {"'wombat", "Unterminated string not faulted", NULL,
3477        XML_ERROR_UNCLOSED_TOKEN},
3478       {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3479        XML_ERROR_PARTIAL_CHAR},
3480       {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3481       {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3482        XML_ERROR_XML_DECL},
3483       {/* UTF-8 BOM */
3484        "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3485        XML_ERROR_NONE},
3486       {"<?xml version='1.0' encoding='utf-8'?>\n$",
3487        "Invalid token after text declaration not faulted", NULL,
3488        XML_ERROR_INVALID_TOKEN},
3489       {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3490        "Unterminated string after text decl not faulted", NULL,
3491        XML_ERROR_UNCLOSED_TOKEN},
3492       {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3493        "Partial UTF-8 character after text decl not faulted", NULL,
3494        XML_ERROR_PARTIAL_CHAR},
3495       {"%e1;", "Recursive parameter entity not faulted", NULL,
3496        XML_ERROR_RECURSIVE_ENTITY_REF},
3497       {NULL, NULL, NULL, XML_ERROR_NONE}};
3498   int i;
3499 
3500   for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3501     set_subtest("%s", data_004_2[i].parse_text);
3502     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3503     XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3504     XML_SetUserData(g_parser, &data_004_2[i]);
3505     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3506         == XML_STATUS_ERROR)
3507       xml_failure(g_parser);
3508     XML_ParserReset(g_parser, NULL);
3509   }
3510 }
3511 END_TEST
3512 
3513 /* Test the recursive parse interacts with a not standalone handler */
START_TEST(test_ext_entity_not_standalone)3514 START_TEST(test_ext_entity_not_standalone) {
3515   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3516                      "<doc></doc>";
3517 
3518   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3519   XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3520   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3521                  "Standalone rejection not caught");
3522 }
3523 END_TEST
3524 
START_TEST(test_ext_entity_value_abort)3525 START_TEST(test_ext_entity_value_abort) {
3526   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3527                      "<doc></doc>\n";
3528 
3529   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3530   XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3531   g_resumable = XML_FALSE;
3532   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3533       == XML_STATUS_ERROR)
3534     xml_failure(g_parser);
3535 }
3536 END_TEST
3537 
START_TEST(test_bad_public_doctype)3538 START_TEST(test_bad_public_doctype) {
3539   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3540                      "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3541                      "<doc></doc>";
3542 
3543   /* Setting a handler provokes a particular code path */
3544   XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3545                             dummy_end_doctype_handler);
3546   expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3547 }
3548 END_TEST
3549 
3550 /* Test based on ibm/valid/P32/ibm32v04.xml */
START_TEST(test_attribute_enum_value)3551 START_TEST(test_attribute_enum_value) {
3552   const char *text = "<?xml version='1.0' standalone='no'?>\n"
3553                      "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3554                      "<animal>This is a \n    <a/>  \n\nyellow tiger</animal>";
3555   ExtTest dtd_data
3556       = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3557          "<!ELEMENT a EMPTY>\n"
3558          "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3559          NULL, NULL};
3560   const XML_Char *expected = XCS("This is a \n      \n\nyellow tiger");
3561 
3562   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3563   XML_SetUserData(g_parser, &dtd_data);
3564   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3565   /* An attribute list handler provokes a different code path */
3566   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3567   run_ext_character_check(text, &dtd_data, expected);
3568 }
3569 END_TEST
3570 
3571 /* Slightly bizarrely, the library seems to silently ignore entity
3572  * definitions for predefined entities, even when they are wrong.  The
3573  * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3574  * to happen, so this is currently treated as acceptable.
3575  */
START_TEST(test_predefined_entity_redefinition)3576 START_TEST(test_predefined_entity_redefinition) {
3577   const char *text = "<!DOCTYPE doc [\n"
3578                      "<!ENTITY apos 'foo'>\n"
3579                      "]>\n"
3580                      "<doc>&apos;</doc>";
3581   run_character_check(text, XCS("'"));
3582 }
3583 END_TEST
3584 
3585 /* Test that the parser stops processing the DTD after an unresolved
3586  * parameter entity is encountered.
3587  */
START_TEST(test_dtd_stop_processing)3588 START_TEST(test_dtd_stop_processing) {
3589   const char *text = "<!DOCTYPE doc [\n"
3590                      "%foo;\n"
3591                      "<!ENTITY bar 'bas'>\n"
3592                      "]><doc/>";
3593 
3594   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3595   init_dummy_handlers();
3596   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3597       == XML_STATUS_ERROR)
3598     xml_failure(g_parser);
3599   if (get_dummy_handler_flags() != 0)
3600     fail("DTD processing still going after undefined PE");
3601 }
3602 END_TEST
3603 
3604 /* Test public notations with no system ID */
START_TEST(test_public_notation_no_sysid)3605 START_TEST(test_public_notation_no_sysid) {
3606   const char *text = "<!DOCTYPE doc [\n"
3607                      "<!NOTATION note PUBLIC 'foo'>\n"
3608                      "<!ELEMENT doc EMPTY>\n"
3609                      "]>\n<doc/>";
3610 
3611   init_dummy_handlers();
3612   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3613   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3614       == XML_STATUS_ERROR)
3615     xml_failure(g_parser);
3616   if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3617     fail("Notation declaration handler not called");
3618 }
3619 END_TEST
3620 
START_TEST(test_nested_groups)3621 START_TEST(test_nested_groups) {
3622   const char *text
3623       = "<!DOCTYPE doc [\n"
3624         "<!ELEMENT doc "
3625         /* Sixteen elements per line */
3626         "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3627         "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3628         "))))))))))))))))))))))))))))))))>\n"
3629         "<!ELEMENT e EMPTY>"
3630         "]>\n"
3631         "<doc><e/></doc>";
3632   CharData storage;
3633 
3634   CharData_Init(&storage);
3635   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3636   XML_SetStartElementHandler(g_parser, record_element_start_handler);
3637   XML_SetUserData(g_parser, &storage);
3638   init_dummy_handlers();
3639   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3640       == XML_STATUS_ERROR)
3641     xml_failure(g_parser);
3642   CharData_CheckXMLChars(&storage, XCS("doce"));
3643   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3644     fail("Element handler not fired");
3645 }
3646 END_TEST
3647 
START_TEST(test_group_choice)3648 START_TEST(test_group_choice) {
3649   const char *text = "<!DOCTYPE doc [\n"
3650                      "<!ELEMENT doc (a|b|c)+>\n"
3651                      "<!ELEMENT a EMPTY>\n"
3652                      "<!ELEMENT b (#PCDATA)>\n"
3653                      "<!ELEMENT c ANY>\n"
3654                      "]>\n"
3655                      "<doc>\n"
3656                      "<a/>\n"
3657                      "<b attr='foo'>This is a foo</b>\n"
3658                      "<c></c>\n"
3659                      "</doc>\n";
3660 
3661   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3662   init_dummy_handlers();
3663   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3664       == XML_STATUS_ERROR)
3665     xml_failure(g_parser);
3666   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3667     fail("Element handler flag not raised");
3668 }
3669 END_TEST
3670 
START_TEST(test_standalone_parameter_entity)3671 START_TEST(test_standalone_parameter_entity) {
3672   const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3673                      "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3674                      "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3675                      "%entity;\n"
3676                      "]>\n"
3677                      "<doc></doc>";
3678   char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3679 
3680   XML_SetUserData(g_parser, dtd_data);
3681   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3682   XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3683   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3684       == XML_STATUS_ERROR)
3685     xml_failure(g_parser);
3686 }
3687 END_TEST
3688 
3689 /* Test skipping of parameter entity in an external DTD */
3690 /* Derived from ibm/invalid/P69/ibm69i01.xml */
START_TEST(test_skipped_parameter_entity)3691 START_TEST(test_skipped_parameter_entity) {
3692   const char *text = "<?xml version='1.0'?>\n"
3693                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3694                      "<!ELEMENT root (#PCDATA|a)* >\n"
3695                      "]>\n"
3696                      "<root></root>";
3697   ExtTest dtd_data = {"%pe2;", NULL, NULL};
3698 
3699   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3700   XML_SetUserData(g_parser, &dtd_data);
3701   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3702   XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3703   init_dummy_handlers();
3704   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3705       == XML_STATUS_ERROR)
3706     xml_failure(g_parser);
3707   if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3708     fail("Skip handler not executed");
3709 }
3710 END_TEST
3711 
3712 /* Test recursive parameter entity definition rejected in external DTD */
START_TEST(test_recursive_external_parameter_entity)3713 START_TEST(test_recursive_external_parameter_entity) {
3714   const char *text = "<?xml version='1.0'?>\n"
3715                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3716                      "<!ELEMENT root (#PCDATA|a)* >\n"
3717                      "]>\n"
3718                      "<root></root>";
3719   ExtFaults dtd_data = {"<!ENTITY % pe2 '&#37;pe2;'>\n%pe2;",
3720                         "Recursive external parameter entity not faulted", NULL,
3721                         XML_ERROR_RECURSIVE_ENTITY_REF};
3722 
3723   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3724   XML_SetUserData(g_parser, &dtd_data);
3725   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3726   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3727                  "Recursive external parameter not spotted");
3728 }
3729 END_TEST
3730 
3731 /* Test undefined parameter entity in external entity handler */
START_TEST(test_undefined_ext_entity_in_external_dtd)3732 START_TEST(test_undefined_ext_entity_in_external_dtd) {
3733   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3734                      "<doc></doc>\n";
3735 
3736   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3737   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3738   XML_SetUserData(g_parser, NULL);
3739   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3740       == XML_STATUS_ERROR)
3741     xml_failure(g_parser);
3742 
3743   /* Now repeat without the external entity ref handler invoking
3744    * another copy of itself.
3745    */
3746   XML_ParserReset(g_parser, NULL);
3747   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3748   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3749   XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3750   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3751       == XML_STATUS_ERROR)
3752     xml_failure(g_parser);
3753 }
3754 END_TEST
3755 
3756 /* Test suspending the parse on receiving an XML declaration works */
START_TEST(test_suspend_xdecl)3757 START_TEST(test_suspend_xdecl) {
3758   const char *text = long_character_data_text;
3759 
3760   XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3761   XML_SetUserData(g_parser, g_parser);
3762   g_resumable = XML_TRUE;
3763   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3764   // we won't know exactly how much input we actually managed to give Expat.
3765   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3766       != XML_STATUS_SUSPENDED)
3767     xml_failure(g_parser);
3768   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3769     xml_failure(g_parser);
3770   /* Attempt to start a new parse while suspended */
3771   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3772       != XML_STATUS_ERROR)
3773     fail("Attempt to parse while suspended not faulted");
3774   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3775     fail("Suspended parse not faulted with correct error");
3776 }
3777 END_TEST
3778 
3779 /* Test aborting the parse in an epilog works */
START_TEST(test_abort_epilog)3780 START_TEST(test_abort_epilog) {
3781   const char *text = "<doc></doc>\n\r\n";
3782   XML_Char trigger_char = XCS('\r');
3783 
3784   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3785   XML_SetUserData(g_parser, &trigger_char);
3786   g_resumable = XML_FALSE;
3787   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3788       != XML_STATUS_ERROR)
3789     fail("Abort not triggered");
3790   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3791     xml_failure(g_parser);
3792 }
3793 END_TEST
3794 
3795 /* Test a different code path for abort in the epilog */
START_TEST(test_abort_epilog_2)3796 START_TEST(test_abort_epilog_2) {
3797   const char *text = "<doc></doc>\n";
3798   XML_Char trigger_char = XCS('\n');
3799 
3800   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3801   XML_SetUserData(g_parser, &trigger_char);
3802   g_resumable = XML_FALSE;
3803   expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3804 }
3805 END_TEST
3806 
3807 /* Test suspension from the epilog */
START_TEST(test_suspend_epilog)3808 START_TEST(test_suspend_epilog) {
3809   const char *text = "<doc></doc>\n";
3810   XML_Char trigger_char = XCS('\n');
3811 
3812   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3813   XML_SetUserData(g_parser, &trigger_char);
3814   g_resumable = XML_TRUE;
3815   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3816       != XML_STATUS_SUSPENDED)
3817     xml_failure(g_parser);
3818 }
3819 END_TEST
3820 
START_TEST(test_suspend_in_sole_empty_tag)3821 START_TEST(test_suspend_in_sole_empty_tag) {
3822   const char *text = "<doc/>";
3823   enum XML_Status rc;
3824 
3825   XML_SetEndElementHandler(g_parser, suspending_end_handler);
3826   XML_SetUserData(g_parser, g_parser);
3827   rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3828   if (rc == XML_STATUS_ERROR)
3829     xml_failure(g_parser);
3830   else if (rc != XML_STATUS_SUSPENDED)
3831     fail("Suspend not triggered");
3832   rc = XML_ResumeParser(g_parser);
3833   if (rc == XML_STATUS_ERROR)
3834     xml_failure(g_parser);
3835   else if (rc != XML_STATUS_OK)
3836     fail("Resume failed");
3837 }
3838 END_TEST
3839 
START_TEST(test_unfinished_epilog)3840 START_TEST(test_unfinished_epilog) {
3841   const char *text = "<doc></doc><";
3842 
3843   expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3844                  "Incomplete epilog entry not faulted");
3845 }
3846 END_TEST
3847 
START_TEST(test_partial_char_in_epilog)3848 START_TEST(test_partial_char_in_epilog) {
3849   const char *text = "<doc></doc>\xe2\x82";
3850 
3851   /* First check that no fault is raised if the parse is not finished */
3852   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3853       == XML_STATUS_ERROR)
3854     xml_failure(g_parser);
3855   /* Now check that it is faulted once we finish */
3856   if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3857     fail("Partial character in epilog not faulted");
3858   if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3859     xml_failure(g_parser);
3860 }
3861 END_TEST
3862 
3863 /* Test resuming a parse suspended in entity substitution */
START_TEST(test_suspend_resume_internal_entity)3864 START_TEST(test_suspend_resume_internal_entity) {
3865   const char *text
3866       = "<!DOCTYPE doc [\n"
3867         "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3868         "]>\n"
3869         "<doc>&foo;</doc>\n";
3870   const XML_Char *expected1 = XCS("Hi");
3871   const XML_Char *expected2 = XCS("HiHo");
3872   CharData storage;
3873 
3874   CharData_Init(&storage);
3875   XML_SetStartElementHandler(g_parser, start_element_suspender);
3876   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3877   XML_SetUserData(g_parser, &storage);
3878   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3879   // we won't know exactly how much input we actually managed to give Expat.
3880   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3881       != XML_STATUS_SUSPENDED)
3882     xml_failure(g_parser);
3883   CharData_CheckXMLChars(&storage, XCS(""));
3884   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3885     xml_failure(g_parser);
3886   CharData_CheckXMLChars(&storage, expected1);
3887   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3888     xml_failure(g_parser);
3889   CharData_CheckXMLChars(&storage, expected2);
3890 }
3891 END_TEST
3892 
START_TEST(test_suspend_resume_internal_entity_issue_629)3893 START_TEST(test_suspend_resume_internal_entity_issue_629) {
3894   const char *const text
3895       = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3896         "<"
3897         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3898         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3899         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3900         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3901         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3902         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3903         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3904         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3905         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3906         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3907         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3908         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3909         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3910         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3911         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3912         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3913         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3914         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3915         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3916         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3917         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3918         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3919         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3920         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3921         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3922         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3923         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3924         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3925         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3926         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3927         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3928         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3929         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3930         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3931         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3932         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3933         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3934         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3935         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3936         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3937         "/>"
3938         "</b></a>";
3939   const size_t firstChunkSizeBytes = 54;
3940 
3941   XML_Parser parser = XML_ParserCreate(NULL);
3942   XML_SetUserData(parser, parser);
3943   XML_SetCommentHandler(parser, suspending_comment_handler);
3944 
3945   if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3946       != XML_STATUS_SUSPENDED)
3947     xml_failure(parser);
3948   if (XML_ResumeParser(parser) != XML_STATUS_OK)
3949     xml_failure(parser);
3950   if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3951                               (int)(strlen(text) - firstChunkSizeBytes),
3952                               XML_TRUE)
3953       != XML_STATUS_OK)
3954     xml_failure(parser);
3955   XML_ParserFree(parser);
3956 }
3957 END_TEST
3958 
3959 /* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error)3960 START_TEST(test_resume_entity_with_syntax_error) {
3961   if (g_chunkSize != 0) {
3962     // this test does not use SINGLE_BYTES, because of suspension
3963     return;
3964   }
3965 
3966   const char *text = "<!DOCTYPE doc [\n"
3967                      "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3968                      "]>\n"
3969                      "<doc>&foo;</doc>\n";
3970 
3971   XML_SetStartElementHandler(g_parser, start_element_suspender);
3972   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3973   // we won't know exactly how much input we actually managed to give Expat.
3974   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3975       != XML_STATUS_SUSPENDED)
3976     xml_failure(g_parser);
3977   if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
3978     fail("Syntax error in entity not faulted");
3979   if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
3980     xml_failure(g_parser);
3981 }
3982 END_TEST
3983 
3984 /* Test suspending and resuming in a parameter entity substitution */
START_TEST(test_suspend_resume_parameter_entity)3985 START_TEST(test_suspend_resume_parameter_entity) {
3986   const char *text = "<!DOCTYPE doc [\n"
3987                      "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
3988                      "%foo;\n"
3989                      "]>\n"
3990                      "<doc>Hello, world</doc>";
3991   const XML_Char *expected = XCS("Hello, world");
3992   CharData storage;
3993 
3994   CharData_Init(&storage);
3995   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3996   XML_SetElementDeclHandler(g_parser, element_decl_suspender);
3997   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3998   XML_SetUserData(g_parser, &storage);
3999   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4000       != XML_STATUS_SUSPENDED)
4001     xml_failure(g_parser);
4002   CharData_CheckXMLChars(&storage, XCS(""));
4003   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
4004     xml_failure(g_parser);
4005   CharData_CheckXMLChars(&storage, expected);
4006 }
4007 END_TEST
4008 
4009 /* Test attempting to use parser after an error is faulted */
START_TEST(test_restart_on_error)4010 START_TEST(test_restart_on_error) {
4011   const char *text = "<$doc><doc></doc>";
4012 
4013   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4014       != XML_STATUS_ERROR)
4015     fail("Invalid tag name not faulted");
4016   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
4017     xml_failure(g_parser);
4018   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
4019     fail("Restarting invalid parse not faulted");
4020   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
4021     xml_failure(g_parser);
4022 }
4023 END_TEST
4024 
4025 /* Test that angle brackets in an attribute default value are faulted */
START_TEST(test_reject_lt_in_attribute_value)4026 START_TEST(test_reject_lt_in_attribute_value) {
4027   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
4028                      "<doc></doc>";
4029 
4030   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4031                  "Bad attribute default not faulted");
4032 }
4033 END_TEST
4034 
START_TEST(test_reject_unfinished_param_in_att_value)4035 START_TEST(test_reject_unfinished_param_in_att_value) {
4036   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
4037                      "<doc></doc>";
4038 
4039   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4040                  "Bad attribute default not faulted");
4041 }
4042 END_TEST
4043 
START_TEST(test_trailing_cr_in_att_value)4044 START_TEST(test_trailing_cr_in_att_value) {
4045   const char *text = "<doc a='value\r'/>";
4046 
4047   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4048       == XML_STATUS_ERROR)
4049     xml_failure(g_parser);
4050 }
4051 END_TEST
4052 
4053 /* Try parsing a general entity within a parameter entity in a
4054  * standalone internal DTD.  Covers a corner case in the parser.
4055  */
START_TEST(test_standalone_internal_entity)4056 START_TEST(test_standalone_internal_entity) {
4057   const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
4058                      "<!DOCTYPE doc [\n"
4059                      "  <!ELEMENT doc (#PCDATA)>\n"
4060                      "  <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"&ge;\">'>\n"
4061                      "  <!ENTITY ge 'AttDefaultValue'>\n"
4062                      "  %pe;\n"
4063                      "]>\n"
4064                      "<doc att2='any'/>";
4065 
4066   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4067   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4068       == XML_STATUS_ERROR)
4069     xml_failure(g_parser);
4070 }
4071 END_TEST
4072 
4073 /* Test that a reference to an unknown external entity is skipped */
START_TEST(test_skipped_external_entity)4074 START_TEST(test_skipped_external_entity) {
4075   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
4076                      "<doc></doc>\n";
4077   ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
4078                        "<!ENTITY % e2 '%e1;'>\n",
4079                        NULL, NULL};
4080 
4081   XML_SetUserData(g_parser, &test_data);
4082   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4083   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4084   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4085       == XML_STATUS_ERROR)
4086     xml_failure(g_parser);
4087 }
4088 END_TEST
4089 
4090 /* Test a different form of unknown external entity */
START_TEST(test_skipped_null_loaded_ext_entity)4091 START_TEST(test_skipped_null_loaded_ext_entity) {
4092   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
4093                      "<doc />";
4094   ExtHdlrData test_data
4095       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
4096          "<!ENTITY % pe2 '%pe1;'>\n"
4097          "%pe2;\n",
4098          external_entity_null_loader, NULL};
4099 
4100   XML_SetUserData(g_parser, &test_data);
4101   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4102   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
4103   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4104       == XML_STATUS_ERROR)
4105     xml_failure(g_parser);
4106 }
4107 END_TEST
4108 
START_TEST(test_skipped_unloaded_ext_entity)4109 START_TEST(test_skipped_unloaded_ext_entity) {
4110   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
4111                      "<doc />";
4112   ExtHdlrData test_data
4113       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
4114          "<!ENTITY % pe2 '%pe1;'>\n"
4115          "%pe2;\n",
4116          NULL, NULL};
4117 
4118   XML_SetUserData(g_parser, &test_data);
4119   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4120   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
4121   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4122       == XML_STATUS_ERROR)
4123     xml_failure(g_parser);
4124 }
4125 END_TEST
4126 
4127 /* Test that a parameter entity value ending with a carriage return
4128  * has it translated internally into a newline.
4129  */
START_TEST(test_param_entity_with_trailing_cr)4130 START_TEST(test_param_entity_with_trailing_cr) {
4131 #define PARAM_ENTITY_NAME "pe"
4132 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
4133   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
4134                      "<doc/>";
4135   ExtTest test_data
4136       = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
4137          "%" PARAM_ENTITY_NAME ";\n",
4138          NULL, NULL};
4139 
4140   XML_SetUserData(g_parser, &test_data);
4141   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4142   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4143   XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
4144   param_entity_match_init(XCS(PARAM_ENTITY_NAME),
4145                           XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
4146   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4147       == XML_STATUS_ERROR)
4148     xml_failure(g_parser);
4149   int entity_match_flag = get_param_entity_match_flag();
4150   if (entity_match_flag == ENTITY_MATCH_FAIL)
4151     fail("Parameter entity CR->NEWLINE conversion failed");
4152   else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
4153     fail("Parameter entity not parsed");
4154 }
4155 #undef PARAM_ENTITY_NAME
4156 #undef PARAM_ENTITY_CORE_VALUE
4157 END_TEST
4158 
START_TEST(test_invalid_character_entity)4159 START_TEST(test_invalid_character_entity) {
4160   const char *text = "<!DOCTYPE doc [\n"
4161                      "  <!ENTITY entity '&#x110000;'>\n"
4162                      "]>\n"
4163                      "<doc>&entity;</doc>";
4164 
4165   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4166                  "Out of range character reference not faulted");
4167 }
4168 END_TEST
4169 
START_TEST(test_invalid_character_entity_2)4170 START_TEST(test_invalid_character_entity_2) {
4171   const char *text = "<!DOCTYPE doc [\n"
4172                      "  <!ENTITY entity '&#xg0;'>\n"
4173                      "]>\n"
4174                      "<doc>&entity;</doc>";
4175 
4176   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4177                  "Out of range character reference not faulted");
4178 }
4179 END_TEST
4180 
START_TEST(test_invalid_character_entity_3)4181 START_TEST(test_invalid_character_entity_3) {
4182   const char text[] =
4183       /* <!DOCTYPE doc [\n */
4184       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4185       /* U+0E04 = KHO KHWAI
4186        * U+0E08 = CHO CHAN */
4187       /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
4188       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
4189       "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
4190       /* ]>\n */
4191       "\0]\0>\0\n"
4192       /* <doc>&entity;</doc> */
4193       "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
4194 
4195   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4196       != XML_STATUS_ERROR)
4197     fail("Invalid start of entity name not faulted");
4198   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4199     xml_failure(g_parser);
4200 }
4201 END_TEST
4202 
START_TEST(test_invalid_character_entity_4)4203 START_TEST(test_invalid_character_entity_4) {
4204   const char *text = "<!DOCTYPE doc [\n"
4205                      "  <!ENTITY entity '&#1114112;'>\n" /* = &#x110000 */
4206                      "]>\n"
4207                      "<doc>&entity;</doc>";
4208 
4209   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4210                  "Out of range character reference not faulted");
4211 }
4212 END_TEST
4213 
4214 /* Test that processing instructions are picked up by a default handler */
START_TEST(test_pi_handled_in_default)4215 START_TEST(test_pi_handled_in_default) {
4216   const char *text = "<?test processing instruction?>\n<doc/>";
4217   const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4218   CharData storage;
4219 
4220   CharData_Init(&storage);
4221   XML_SetDefaultHandler(g_parser, accumulate_characters);
4222   XML_SetUserData(g_parser, &storage);
4223   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4224       == XML_STATUS_ERROR)
4225     xml_failure(g_parser);
4226   CharData_CheckXMLChars(&storage, expected);
4227 }
4228 END_TEST
4229 
4230 /* Test that comments are picked up by a default handler */
START_TEST(test_comment_handled_in_default)4231 START_TEST(test_comment_handled_in_default) {
4232   const char *text = "<!-- This is a comment -->\n<doc/>";
4233   const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4234   CharData storage;
4235 
4236   CharData_Init(&storage);
4237   XML_SetDefaultHandler(g_parser, accumulate_characters);
4238   XML_SetUserData(g_parser, &storage);
4239   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4240       == XML_STATUS_ERROR)
4241     xml_failure(g_parser);
4242   CharData_CheckXMLChars(&storage, expected);
4243 }
4244 END_TEST
4245 
4246 /* Test PIs that look almost but not quite like XML declarations */
START_TEST(test_pi_yml)4247 START_TEST(test_pi_yml) {
4248   const char *text = "<?yml something like data?><doc/>";
4249   const XML_Char *expected = XCS("yml: something like data\n");
4250   CharData storage;
4251 
4252   CharData_Init(&storage);
4253   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4254   XML_SetUserData(g_parser, &storage);
4255   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4256       == XML_STATUS_ERROR)
4257     xml_failure(g_parser);
4258   CharData_CheckXMLChars(&storage, expected);
4259 }
4260 END_TEST
4261 
START_TEST(test_pi_xnl)4262 START_TEST(test_pi_xnl) {
4263   const char *text = "<?xnl nothing like data?><doc/>";
4264   const XML_Char *expected = XCS("xnl: nothing like data\n");
4265   CharData storage;
4266 
4267   CharData_Init(&storage);
4268   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4269   XML_SetUserData(g_parser, &storage);
4270   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4271       == XML_STATUS_ERROR)
4272     xml_failure(g_parser);
4273   CharData_CheckXMLChars(&storage, expected);
4274 }
4275 END_TEST
4276 
START_TEST(test_pi_xmm)4277 START_TEST(test_pi_xmm) {
4278   const char *text = "<?xmm everything like data?><doc/>";
4279   const XML_Char *expected = XCS("xmm: everything like data\n");
4280   CharData storage;
4281 
4282   CharData_Init(&storage);
4283   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4284   XML_SetUserData(g_parser, &storage);
4285   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4286       == XML_STATUS_ERROR)
4287     xml_failure(g_parser);
4288   CharData_CheckXMLChars(&storage, expected);
4289 }
4290 END_TEST
4291 
START_TEST(test_utf16_pi)4292 START_TEST(test_utf16_pi) {
4293   const char text[] =
4294       /* <?{KHO KHWAI}{CHO CHAN}?>
4295        * where {KHO KHWAI} = U+0E04
4296        * and   {CHO CHAN}  = U+0E08
4297        */
4298       "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4299       /* <q/> */
4300       "<\0q\0/\0>\0";
4301 #ifdef XML_UNICODE
4302   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4303 #else
4304   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4305 #endif
4306   CharData storage;
4307 
4308   CharData_Init(&storage);
4309   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4310   XML_SetUserData(g_parser, &storage);
4311   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4312       == XML_STATUS_ERROR)
4313     xml_failure(g_parser);
4314   CharData_CheckXMLChars(&storage, expected);
4315 }
4316 END_TEST
4317 
START_TEST(test_utf16_be_pi)4318 START_TEST(test_utf16_be_pi) {
4319   const char text[] =
4320       /* <?{KHO KHWAI}{CHO CHAN}?>
4321        * where {KHO KHWAI} = U+0E04
4322        * and   {CHO CHAN}  = U+0E08
4323        */
4324       "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4325       /* <q/> */
4326       "\0<\0q\0/\0>";
4327 #ifdef XML_UNICODE
4328   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4329 #else
4330   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4331 #endif
4332   CharData storage;
4333 
4334   CharData_Init(&storage);
4335   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4336   XML_SetUserData(g_parser, &storage);
4337   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4338       == XML_STATUS_ERROR)
4339     xml_failure(g_parser);
4340   CharData_CheckXMLChars(&storage, expected);
4341 }
4342 END_TEST
4343 
4344 /* Test that comments can be picked up and translated */
START_TEST(test_utf16_be_comment)4345 START_TEST(test_utf16_be_comment) {
4346   const char text[] =
4347       /* <!-- Comment A --> */
4348       "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4349       /* <doc/> */
4350       "\0<\0d\0o\0c\0/\0>";
4351   const XML_Char *expected = XCS(" Comment A ");
4352   CharData storage;
4353 
4354   CharData_Init(&storage);
4355   XML_SetCommentHandler(g_parser, accumulate_comment);
4356   XML_SetUserData(g_parser, &storage);
4357   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4358       == XML_STATUS_ERROR)
4359     xml_failure(g_parser);
4360   CharData_CheckXMLChars(&storage, expected);
4361 }
4362 END_TEST
4363 
START_TEST(test_utf16_le_comment)4364 START_TEST(test_utf16_le_comment) {
4365   const char text[] =
4366       /* <!-- Comment B --> */
4367       "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4368       /* <doc/> */
4369       "<\0d\0o\0c\0/\0>\0";
4370   const XML_Char *expected = XCS(" Comment B ");
4371   CharData storage;
4372 
4373   CharData_Init(&storage);
4374   XML_SetCommentHandler(g_parser, accumulate_comment);
4375   XML_SetUserData(g_parser, &storage);
4376   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4377       == XML_STATUS_ERROR)
4378     xml_failure(g_parser);
4379   CharData_CheckXMLChars(&storage, expected);
4380 }
4381 END_TEST
4382 
4383 /* Test that the unknown encoding handler with map entries that expect
4384  * conversion but no conversion function is faulted
4385  */
START_TEST(test_missing_encoding_conversion_fn)4386 START_TEST(test_missing_encoding_conversion_fn) {
4387   const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4388                      "<doc>\x81</doc>";
4389 
4390   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4391   /* MiscEncodingHandler sets up an encoding with every top-bit-set
4392    * character introducing a two-byte sequence.  For this, it
4393    * requires a convert function.  The above function call doesn't
4394    * pass one through, so when BadEncodingHandler actually gets
4395    * called it should supply an invalid encoding.
4396    */
4397   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4398                  "Encoding with missing convert() not faulted");
4399 }
4400 END_TEST
4401 
START_TEST(test_failing_encoding_conversion_fn)4402 START_TEST(test_failing_encoding_conversion_fn) {
4403   const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4404                      "<doc>\x81</doc>";
4405 
4406   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4407   /* BadEncodingHandler sets up an encoding with every top-bit-set
4408    * character introducing a two-byte sequence.  For this, it
4409    * requires a convert function.  The above function call passes
4410    * one that insists all possible sequences are invalid anyway.
4411    */
4412   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4413                  "Encoding with failing convert() not faulted");
4414 }
4415 END_TEST
4416 
4417 /* Test unknown encoding conversions */
START_TEST(test_unknown_encoding_success)4418 START_TEST(test_unknown_encoding_success) {
4419   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4420                      /* Equivalent to <eoc>Hello, world</eoc> */
4421                      "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4422 
4423   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4424   run_character_check(text, XCS("Hello, world"));
4425 }
4426 END_TEST
4427 
4428 /* Test bad name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name)4429 START_TEST(test_unknown_encoding_bad_name) {
4430   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4431                      "<\xff\x64oc>Hello, world</\xff\x64oc>";
4432 
4433   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4434   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4435                  "Bad name start in unknown encoding not faulted");
4436 }
4437 END_TEST
4438 
4439 /* Test bad mid-name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name_2)4440 START_TEST(test_unknown_encoding_bad_name_2) {
4441   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4442                      "<d\xffoc>Hello, world</d\xffoc>";
4443 
4444   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4445   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4446                  "Bad name in unknown encoding not faulted");
4447 }
4448 END_TEST
4449 
4450 /* Test element name that is long enough to fill the conversion buffer
4451  * in an unknown encoding, finishing with an encoded character.
4452  */
START_TEST(test_unknown_encoding_long_name_1)4453 START_TEST(test_unknown_encoding_long_name_1) {
4454   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4455                      "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4456                      "Hi"
4457                      "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4458   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4459   CharData storage;
4460 
4461   CharData_Init(&storage);
4462   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4463   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4464   XML_SetUserData(g_parser, &storage);
4465   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4466       == XML_STATUS_ERROR)
4467     xml_failure(g_parser);
4468   CharData_CheckXMLChars(&storage, expected);
4469 }
4470 END_TEST
4471 
4472 /* Test element name that is long enough to fill the conversion buffer
4473  * in an unknown encoding, finishing with an simple character.
4474  */
START_TEST(test_unknown_encoding_long_name_2)4475 START_TEST(test_unknown_encoding_long_name_2) {
4476   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4477                      "<abcdefghabcdefghabcdefghijklmnop>"
4478                      "Hi"
4479                      "</abcdefghabcdefghabcdefghijklmnop>";
4480   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4481   CharData storage;
4482 
4483   CharData_Init(&storage);
4484   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4485   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4486   XML_SetUserData(g_parser, &storage);
4487   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4488       == XML_STATUS_ERROR)
4489     xml_failure(g_parser);
4490   CharData_CheckXMLChars(&storage, expected);
4491 }
4492 END_TEST
4493 
START_TEST(test_invalid_unknown_encoding)4494 START_TEST(test_invalid_unknown_encoding) {
4495   const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4496                      "<doc>Hello world</doc>";
4497 
4498   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4499   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4500                  "Invalid unknown encoding not faulted");
4501 }
4502 END_TEST
4503 
START_TEST(test_unknown_ascii_encoding_ok)4504 START_TEST(test_unknown_ascii_encoding_ok) {
4505   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4506                      "<doc>Hello, world</doc>";
4507 
4508   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4509   run_character_check(text, XCS("Hello, world"));
4510 }
4511 END_TEST
4512 
START_TEST(test_unknown_ascii_encoding_fail)4513 START_TEST(test_unknown_ascii_encoding_fail) {
4514   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4515                      "<doc>Hello, \x80 world</doc>";
4516 
4517   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4518   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4519                  "Invalid character not faulted");
4520 }
4521 END_TEST
4522 
START_TEST(test_unknown_encoding_invalid_length)4523 START_TEST(test_unknown_encoding_invalid_length) {
4524   const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4525                      "<doc>Hello, world</doc>";
4526 
4527   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4528   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4529                  "Invalid unknown encoding not faulted");
4530 }
4531 END_TEST
4532 
START_TEST(test_unknown_encoding_invalid_topbit)4533 START_TEST(test_unknown_encoding_invalid_topbit) {
4534   const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4535                      "<doc>Hello, world</doc>";
4536 
4537   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4538   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4539                  "Invalid unknown encoding not faulted");
4540 }
4541 END_TEST
4542 
START_TEST(test_unknown_encoding_invalid_surrogate)4543 START_TEST(test_unknown_encoding_invalid_surrogate) {
4544   const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4545                      "<doc>Hello, \x82 world</doc>";
4546 
4547   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4548   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4549                  "Invalid unknown encoding not faulted");
4550 }
4551 END_TEST
4552 
START_TEST(test_unknown_encoding_invalid_high)4553 START_TEST(test_unknown_encoding_invalid_high) {
4554   const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4555                      "<doc>Hello, world</doc>";
4556 
4557   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4558   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4559                  "Invalid unknown encoding not faulted");
4560 }
4561 END_TEST
4562 
START_TEST(test_unknown_encoding_invalid_attr_value)4563 START_TEST(test_unknown_encoding_invalid_attr_value) {
4564   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4565                      "<doc attr='\xff\x30'/>";
4566 
4567   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4568   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4569                  "Invalid attribute valid not faulted");
4570 }
4571 END_TEST
4572 
4573 /* Test an external entity parser set to use latin-1 detects UTF-16
4574  * BOMs correctly.
4575  */
4576 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
START_TEST(test_ext_entity_latin1_utf16le_bom)4577 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4578   const char *text = "<!DOCTYPE doc [\n"
4579                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4580                      "]>\n"
4581                      "<doc>&en;</doc>";
4582   ExtTest2 test_data
4583       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4584          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4585           *   0x4c = L and 0x20 is a space
4586           */
4587          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4588 #ifdef XML_UNICODE
4589   const XML_Char *expected = XCS("\x00ff\x00feL ");
4590 #else
4591   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4592   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4593 #endif
4594   CharData storage;
4595 
4596   CharData_Init(&storage);
4597   test_data.storage = &storage;
4598   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4599   XML_SetUserData(g_parser, &test_data);
4600   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4601   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4602       == XML_STATUS_ERROR)
4603     xml_failure(g_parser);
4604   CharData_CheckXMLChars(&storage, expected);
4605 }
4606 END_TEST
4607 
START_TEST(test_ext_entity_latin1_utf16be_bom)4608 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4609   const char *text = "<!DOCTYPE doc [\n"
4610                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4611                      "]>\n"
4612                      "<doc>&en;</doc>";
4613   ExtTest2 test_data
4614       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4615          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4616           *   0x4c = L and 0x20 is a space
4617           */
4618          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4619 #ifdef XML_UNICODE
4620   const XML_Char *expected = XCS("\x00fe\x00ff L");
4621 #else
4622   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4623   const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4624 #endif
4625   CharData storage;
4626 
4627   CharData_Init(&storage);
4628   test_data.storage = &storage;
4629   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4630   XML_SetUserData(g_parser, &test_data);
4631   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4632   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4633       == XML_STATUS_ERROR)
4634     xml_failure(g_parser);
4635   CharData_CheckXMLChars(&storage, expected);
4636 }
4637 END_TEST
4638 
4639 /* Parsing the full buffer rather than a byte at a time makes a
4640  * difference to the encoding scanning code, so repeat the above tests
4641  * without breaking them down by byte.
4642  */
START_TEST(test_ext_entity_latin1_utf16le_bom2)4643 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4644   const char *text = "<!DOCTYPE doc [\n"
4645                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4646                      "]>\n"
4647                      "<doc>&en;</doc>";
4648   ExtTest2 test_data
4649       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4650          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4651           *   0x4c = L and 0x20 is a space
4652           */
4653          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4654 #ifdef XML_UNICODE
4655   const XML_Char *expected = XCS("\x00ff\x00feL ");
4656 #else
4657   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4658   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4659 #endif
4660   CharData storage;
4661 
4662   CharData_Init(&storage);
4663   test_data.storage = &storage;
4664   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4665   XML_SetUserData(g_parser, &test_data);
4666   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4667   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4668       == XML_STATUS_ERROR)
4669     xml_failure(g_parser);
4670   CharData_CheckXMLChars(&storage, expected);
4671 }
4672 END_TEST
4673 
START_TEST(test_ext_entity_latin1_utf16be_bom2)4674 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4675   const char *text = "<!DOCTYPE doc [\n"
4676                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4677                      "]>\n"
4678                      "<doc>&en;</doc>";
4679   ExtTest2 test_data
4680       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4681          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4682           *   0x4c = L and 0x20 is a space
4683           */
4684          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4685 #ifdef XML_UNICODE
4686   const XML_Char *expected = XCS("\x00fe\x00ff L");
4687 #else
4688   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4689   const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4690 #endif
4691   CharData storage;
4692 
4693   CharData_Init(&storage);
4694   test_data.storage = &storage;
4695   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4696   XML_SetUserData(g_parser, &test_data);
4697   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4698   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4699       == XML_STATUS_ERROR)
4700     xml_failure(g_parser);
4701   CharData_CheckXMLChars(&storage, expected);
4702 }
4703 END_TEST
4704 
4705 /* Test little-endian UTF-16 given an explicit big-endian encoding */
START_TEST(test_ext_entity_utf16_be)4706 START_TEST(test_ext_entity_utf16_be) {
4707   const char *text = "<!DOCTYPE doc [\n"
4708                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4709                      "]>\n"
4710                      "<doc>&en;</doc>";
4711   ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4712 #ifdef XML_UNICODE
4713   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4714 #else
4715   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4716                                  "\xe6\x94\x80"   /* U+6500 */
4717                                  "\xe2\xbc\x80"   /* U+2F00 */
4718                                  "\xe3\xb8\x80"); /* U+3E00 */
4719 #endif
4720   CharData storage;
4721 
4722   CharData_Init(&storage);
4723   test_data.storage = &storage;
4724   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4725   XML_SetUserData(g_parser, &test_data);
4726   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4727   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4728       == XML_STATUS_ERROR)
4729     xml_failure(g_parser);
4730   CharData_CheckXMLChars(&storage, expected);
4731 }
4732 END_TEST
4733 
4734 /* Test big-endian UTF-16 given an explicit little-endian encoding */
START_TEST(test_ext_entity_utf16_le)4735 START_TEST(test_ext_entity_utf16_le) {
4736   const char *text = "<!DOCTYPE doc [\n"
4737                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4738                      "]>\n"
4739                      "<doc>&en;</doc>";
4740   ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4741 #ifdef XML_UNICODE
4742   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4743 #else
4744   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4745                                  "\xe6\x94\x80"   /* U+6500 */
4746                                  "\xe2\xbc\x80"   /* U+2F00 */
4747                                  "\xe3\xb8\x80"); /* U+3E00 */
4748 #endif
4749   CharData storage;
4750 
4751   CharData_Init(&storage);
4752   test_data.storage = &storage;
4753   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4754   XML_SetUserData(g_parser, &test_data);
4755   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4756   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4757       == XML_STATUS_ERROR)
4758     xml_failure(g_parser);
4759   CharData_CheckXMLChars(&storage, expected);
4760 }
4761 END_TEST
4762 
4763 /* Test little-endian UTF-16 given no explicit encoding.
4764  * The existing default encoding (UTF-8) is assumed to hold without a
4765  * BOM to contradict it, so the entity value will in fact provoke an
4766  * error because 0x00 is not a valid XML character.  We parse the
4767  * whole buffer in one go rather than feeding it in byte by byte to
4768  * exercise different code paths in the initial scanning routines.
4769  */
START_TEST(test_ext_entity_utf16_unknown)4770 START_TEST(test_ext_entity_utf16_unknown) {
4771   const char *text = "<!DOCTYPE doc [\n"
4772                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4773                      "]>\n"
4774                      "<doc>&en;</doc>";
4775   ExtFaults2 test_data
4776       = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4777          XML_ERROR_INVALID_TOKEN};
4778 
4779   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4780   XML_SetUserData(g_parser, &test_data);
4781   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4782                  "Invalid character should not have been accepted");
4783 }
4784 END_TEST
4785 
4786 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
START_TEST(test_ext_entity_utf8_non_bom)4787 START_TEST(test_ext_entity_utf8_non_bom) {
4788   const char *text = "<!DOCTYPE doc [\n"
4789                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4790                      "]>\n"
4791                      "<doc>&en;</doc>";
4792   ExtTest2 test_data
4793       = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4794          3, NULL, NULL};
4795 #ifdef XML_UNICODE
4796   const XML_Char *expected = XCS("\xfec0");
4797 #else
4798   const XML_Char *expected = XCS("\xef\xbb\x80");
4799 #endif
4800   CharData storage;
4801 
4802   CharData_Init(&storage);
4803   test_data.storage = &storage;
4804   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4805   XML_SetUserData(g_parser, &test_data);
4806   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4807   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4808       == XML_STATUS_ERROR)
4809     xml_failure(g_parser);
4810   CharData_CheckXMLChars(&storage, expected);
4811 }
4812 END_TEST
4813 
4814 /* Test that UTF-8 in a CDATA section is correctly passed through */
START_TEST(test_utf8_in_cdata_section)4815 START_TEST(test_utf8_in_cdata_section) {
4816   const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4817 #ifdef XML_UNICODE
4818   const XML_Char *expected = XCS("one \x00e9 two");
4819 #else
4820   const XML_Char *expected = XCS("one \xc3\xa9 two");
4821 #endif
4822 
4823   run_character_check(text, expected);
4824 }
4825 END_TEST
4826 
4827 /* Test that little-endian UTF-16 in a CDATA section is handled */
START_TEST(test_utf8_in_cdata_section_2)4828 START_TEST(test_utf8_in_cdata_section_2) {
4829   const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4830 #ifdef XML_UNICODE
4831   const XML_Char *expected = XCS("\x00e9]\x00e9two");
4832 #else
4833   const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4834 #endif
4835 
4836   run_character_check(text, expected);
4837 }
4838 END_TEST
4839 
START_TEST(test_utf8_in_start_tags)4840 START_TEST(test_utf8_in_start_tags) {
4841   struct test_case {
4842     bool goodName;
4843     bool goodNameStart;
4844     const char *tagName;
4845   };
4846 
4847   // The idea with the tests below is this:
4848   // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4849   // go to isNever and are hence not a concern.
4850   //
4851   // We start with a character that is a valid name character
4852   // (or even name-start character, see XML 1.0r4 spec) and then we flip
4853   // single bits at places where (1) the result leaves the UTF-8 encoding space
4854   // and (2) we stay in the same n-byte sequence family.
4855   //
4856   // The flipped bits are highlighted in angle brackets in comments,
4857   // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4858   // the most significant bit to 1 to leave UTF-8 encoding space.
4859   struct test_case cases[] = {
4860       // 1-byte UTF-8: [0xxx xxxx]
4861       {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
4862       {false, false, "\xBA"}, // [<1>011 1010]
4863       {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
4864       {false, false, "\xB9"}, // [<1>011 1001]
4865 
4866       // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4867       {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
4868                                   // Arabic small waw U+06E5
4869       {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4870       {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4871       {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4872       {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
4873                                   // combining char U+0301
4874       {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4875       {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4876       {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4877 
4878       // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4879       {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
4880                                       // Devanagari Letter A U+0905
4881       {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4882       {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4883       {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4884       {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4885       {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4886       {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
4887                                       // combining char U+0901
4888       {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4889       {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4890       {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4891       {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4892       {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4893   };
4894   const bool atNameStart[] = {true, false};
4895 
4896   size_t i = 0;
4897   char doc[1024];
4898   size_t failCount = 0;
4899 
4900   // we need all the bytes to be parsed, but we don't want the errors that can
4901   // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4902   if (g_reparseDeferralEnabledDefault) {
4903     return;
4904   }
4905 
4906   for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4907     size_t j = 0;
4908     for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4909       const bool expectedSuccess
4910           = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4911       snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4912                cases[i].tagName);
4913       XML_Parser parser = XML_ParserCreate(NULL);
4914 
4915       const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4916           parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4917 
4918       bool success = true;
4919       if ((status == XML_STATUS_OK) != expectedSuccess) {
4920         success = false;
4921       }
4922       if ((status == XML_STATUS_ERROR)
4923           && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4924         success = false;
4925       }
4926 
4927       if (! success) {
4928         fprintf(
4929             stderr,
4930             "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
4931             (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
4932             (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
4933         failCount++;
4934       }
4935 
4936       XML_ParserFree(parser);
4937     }
4938   }
4939 
4940   if (failCount > 0) {
4941     fail("UTF-8 regression detected");
4942   }
4943 }
4944 END_TEST
4945 
4946 /* Test trailing spaces in elements are accepted */
START_TEST(test_trailing_spaces_in_elements)4947 START_TEST(test_trailing_spaces_in_elements) {
4948   const char *text = "<doc   >Hi</doc >";
4949   const XML_Char *expected = XCS("doc/doc");
4950   CharData storage;
4951 
4952   CharData_Init(&storage);
4953   XML_SetElementHandler(g_parser, record_element_start_handler,
4954                         record_element_end_handler);
4955   XML_SetUserData(g_parser, &storage);
4956   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4957       == XML_STATUS_ERROR)
4958     xml_failure(g_parser);
4959   CharData_CheckXMLChars(&storage, expected);
4960 }
4961 END_TEST
4962 
START_TEST(test_utf16_attribute)4963 START_TEST(test_utf16_attribute) {
4964   const char text[] =
4965       /* <d {KHO KHWAI}{CHO CHAN}='a'/>
4966        * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4967        * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4968        */
4969       "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
4970   const XML_Char *expected = XCS("a");
4971   CharData storage;
4972 
4973   CharData_Init(&storage);
4974   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4975   XML_SetUserData(g_parser, &storage);
4976   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4977       == XML_STATUS_ERROR)
4978     xml_failure(g_parser);
4979   CharData_CheckXMLChars(&storage, expected);
4980 }
4981 END_TEST
4982 
START_TEST(test_utf16_second_attr)4983 START_TEST(test_utf16_second_attr) {
4984   /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
4985    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4986    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4987    */
4988   const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
4989                       "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
4990   const XML_Char *expected = XCS("1");
4991   CharData storage;
4992 
4993   CharData_Init(&storage);
4994   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4995   XML_SetUserData(g_parser, &storage);
4996   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4997       == XML_STATUS_ERROR)
4998     xml_failure(g_parser);
4999   CharData_CheckXMLChars(&storage, expected);
5000 }
5001 END_TEST
5002 
START_TEST(test_attr_after_solidus)5003 START_TEST(test_attr_after_solidus) {
5004   const char *text = "<doc attr1='a' / attr2='b'>";
5005 
5006   expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
5007 }
5008 END_TEST
5009 
START_TEST(test_utf16_pe)5010 START_TEST(test_utf16_pe) {
5011   /* <!DOCTYPE doc [
5012    * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
5013    * %{KHO KHWAI}{CHO CHAN};
5014    * ]>
5015    * <doc></doc>
5016    *
5017    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5018    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5019    */
5020   const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
5021                       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
5022                       "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
5023                       "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
5024                       "\0%\x0e\x04\x0e\x08\0;\0\n"
5025                       "\0]\0>\0\n"
5026                       "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
5027 #ifdef XML_UNICODE
5028   const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
5029 #else
5030   const XML_Char *expected
5031       = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
5032 #endif
5033   CharData storage;
5034 
5035   CharData_Init(&storage);
5036   XML_SetUserData(g_parser, &storage);
5037   XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
5038   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5039       == XML_STATUS_ERROR)
5040     xml_failure(g_parser);
5041   CharData_CheckXMLChars(&storage, expected);
5042 }
5043 END_TEST
5044 
5045 /* Test that duff attribute description keywords are rejected */
START_TEST(test_bad_attr_desc_keyword)5046 START_TEST(test_bad_attr_desc_keyword) {
5047   const char *text = "<!DOCTYPE doc [\n"
5048                      "  <!ATTLIST doc attr CDATA #!IMPLIED>\n"
5049                      "]>\n"
5050                      "<doc />";
5051 
5052   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5053                  "Bad keyword !IMPLIED not faulted");
5054 }
5055 END_TEST
5056 
5057 /* Test that an invalid attribute description keyword consisting of
5058  * UTF-16 characters with their top bytes non-zero are correctly
5059  * faulted
5060  */
START_TEST(test_bad_attr_desc_keyword_utf16)5061 START_TEST(test_bad_attr_desc_keyword_utf16) {
5062   /* <!DOCTYPE d [
5063    * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
5064    * ]><d/>
5065    *
5066    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5067    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5068    */
5069   const char text[]
5070       = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5071         "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
5072         "\0#\x0e\x04\x0e\x08\0>\0\n"
5073         "\0]\0>\0<\0d\0/\0>";
5074 
5075   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5076       != XML_STATUS_ERROR)
5077     fail("Invalid UTF16 attribute keyword not faulted");
5078   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
5079     xml_failure(g_parser);
5080 }
5081 END_TEST
5082 
5083 /* Test that invalid syntax in a <!DOCTYPE> is rejected.  Do this
5084  * using prefix-encoding (see above) to trigger specific code paths
5085  */
START_TEST(test_bad_doctype)5086 START_TEST(test_bad_doctype) {
5087   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
5088                      "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
5089 
5090   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5091   expect_failure(text, XML_ERROR_SYNTAX,
5092                  "Invalid bytes in DOCTYPE not faulted");
5093 }
5094 END_TEST
5095 
START_TEST(test_bad_doctype_utf8)5096 START_TEST(test_bad_doctype_utf8) {
5097   const char *text = "<!DOCTYPE \xDB\x25"
5098                      "doc><doc/>"; // [1101 1011] [<0>010 0101]
5099   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5100                  "Invalid UTF-8 in DOCTYPE not faulted");
5101 }
5102 END_TEST
5103 
START_TEST(test_bad_doctype_utf16)5104 START_TEST(test_bad_doctype_utf16) {
5105   const char text[] =
5106       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
5107        *
5108        * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
5109        * (name character) but not a valid letter (name start character)
5110        */
5111       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
5112       "\x06\xf2"
5113       "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
5114 
5115   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5116       != XML_STATUS_ERROR)
5117     fail("Invalid bytes in DOCTYPE not faulted");
5118   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
5119     xml_failure(g_parser);
5120 }
5121 END_TEST
5122 
START_TEST(test_bad_doctype_plus)5123 START_TEST(test_bad_doctype_plus) {
5124   const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
5125                      "<1+>&foo;</1+>";
5126 
5127   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5128                  "'+' in document name not faulted");
5129 }
5130 END_TEST
5131 
START_TEST(test_bad_doctype_star)5132 START_TEST(test_bad_doctype_star) {
5133   const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
5134                      "<1*>&foo;</1*>";
5135 
5136   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5137                  "'*' in document name not faulted");
5138 }
5139 END_TEST
5140 
START_TEST(test_bad_doctype_query)5141 START_TEST(test_bad_doctype_query) {
5142   const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
5143                      "<1?>&foo;</1?>";
5144 
5145   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5146                  "'?' in document name not faulted");
5147 }
5148 END_TEST
5149 
START_TEST(test_unknown_encoding_bad_ignore)5150 START_TEST(test_unknown_encoding_bad_ignore) {
5151   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
5152                      "<!DOCTYPE doc SYSTEM 'foo'>"
5153                      "<doc><e>&entity;</e></doc>";
5154   ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
5155                      "Invalid character not faulted", XCS("prefix-conv"),
5156                      XML_ERROR_INVALID_TOKEN};
5157 
5158   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5159   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5160   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
5161   XML_SetUserData(g_parser, &fault);
5162   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5163                  "Bad IGNORE section with unknown encoding not failed");
5164 }
5165 END_TEST
5166 
START_TEST(test_entity_in_utf16_be_attr)5167 START_TEST(test_entity_in_utf16_be_attr) {
5168   const char text[] =
5169       /* <e a='&#228; &#x00E4;'></e> */
5170       "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
5171       "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
5172 #ifdef XML_UNICODE
5173   const XML_Char *expected = XCS("\x00e4 \x00e4");
5174 #else
5175   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5176 #endif
5177   CharData storage;
5178 
5179   CharData_Init(&storage);
5180   XML_SetUserData(g_parser, &storage);
5181   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5182   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5183       == XML_STATUS_ERROR)
5184     xml_failure(g_parser);
5185   CharData_CheckXMLChars(&storage, expected);
5186 }
5187 END_TEST
5188 
START_TEST(test_entity_in_utf16_le_attr)5189 START_TEST(test_entity_in_utf16_le_attr) {
5190   const char text[] =
5191       /* <e a='&#228; &#x00E4;'></e> */
5192       "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
5193       "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
5194 #ifdef XML_UNICODE
5195   const XML_Char *expected = XCS("\x00e4 \x00e4");
5196 #else
5197   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5198 #endif
5199   CharData storage;
5200 
5201   CharData_Init(&storage);
5202   XML_SetUserData(g_parser, &storage);
5203   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5204   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5205       == XML_STATUS_ERROR)
5206     xml_failure(g_parser);
5207   CharData_CheckXMLChars(&storage, expected);
5208 }
5209 END_TEST
5210 
START_TEST(test_entity_public_utf16_be)5211 START_TEST(test_entity_public_utf16_be) {
5212   const char text[] =
5213       /* <!DOCTYPE d [ */
5214       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5215       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5216       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5217       "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5218       /* %e; */
5219       "\0%\0e\0;\0\n"
5220       /* ]> */
5221       "\0]\0>\0\n"
5222       /* <d>&j;</d> */
5223       "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5224   ExtTest2 test_data
5225       = {/* <!ENTITY j 'baz'> */
5226          "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5227   const XML_Char *expected = XCS("baz");
5228   CharData storage;
5229 
5230   CharData_Init(&storage);
5231   test_data.storage = &storage;
5232   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5233   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5234   XML_SetUserData(g_parser, &test_data);
5235   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5236   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5237       == XML_STATUS_ERROR)
5238     xml_failure(g_parser);
5239   CharData_CheckXMLChars(&storage, expected);
5240 }
5241 END_TEST
5242 
START_TEST(test_entity_public_utf16_le)5243 START_TEST(test_entity_public_utf16_le) {
5244   const char text[] =
5245       /* <!DOCTYPE d [ */
5246       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5247       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5248       "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5249       "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5250       /* %e; */
5251       "%\0e\0;\0\n\0"
5252       /* ]> */
5253       "]\0>\0\n\0"
5254       /* <d>&j;</d> */
5255       "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5256   ExtTest2 test_data
5257       = {/* <!ENTITY j 'baz'> */
5258          "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5259   const XML_Char *expected = XCS("baz");
5260   CharData storage;
5261 
5262   CharData_Init(&storage);
5263   test_data.storage = &storage;
5264   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5265   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5266   XML_SetUserData(g_parser, &test_data);
5267   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5268   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5269       == XML_STATUS_ERROR)
5270     xml_failure(g_parser);
5271   CharData_CheckXMLChars(&storage, expected);
5272 }
5273 END_TEST
5274 
5275 /* Test that a doctype with neither an internal nor external subset is
5276  * faulted
5277  */
START_TEST(test_short_doctype)5278 START_TEST(test_short_doctype) {
5279   const char *text = "<!DOCTYPE doc></doc>";
5280   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5281                  "DOCTYPE without subset not rejected");
5282 }
5283 END_TEST
5284 
START_TEST(test_short_doctype_2)5285 START_TEST(test_short_doctype_2) {
5286   const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5287   expect_failure(text, XML_ERROR_SYNTAX,
5288                  "DOCTYPE without Public ID not rejected");
5289 }
5290 END_TEST
5291 
START_TEST(test_short_doctype_3)5292 START_TEST(test_short_doctype_3) {
5293   const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5294   expect_failure(text, XML_ERROR_SYNTAX,
5295                  "DOCTYPE without System ID not rejected");
5296 }
5297 END_TEST
5298 
START_TEST(test_long_doctype)5299 START_TEST(test_long_doctype) {
5300   const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5301   expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5302 }
5303 END_TEST
5304 
START_TEST(test_bad_entity)5305 START_TEST(test_bad_entity) {
5306   const char *text = "<!DOCTYPE doc [\n"
5307                      "  <!ENTITY foo PUBLIC>\n"
5308                      "]>\n"
5309                      "<doc/>";
5310   expect_failure(text, XML_ERROR_SYNTAX,
5311                  "ENTITY without Public ID is not rejected");
5312 }
5313 END_TEST
5314 
5315 /* Test unquoted value is faulted */
START_TEST(test_bad_entity_2)5316 START_TEST(test_bad_entity_2) {
5317   const char *text = "<!DOCTYPE doc [\n"
5318                      "  <!ENTITY % foo bar>\n"
5319                      "]>\n"
5320                      "<doc/>";
5321   expect_failure(text, XML_ERROR_SYNTAX,
5322                  "ENTITY without Public ID is not rejected");
5323 }
5324 END_TEST
5325 
START_TEST(test_bad_entity_3)5326 START_TEST(test_bad_entity_3) {
5327   const char *text = "<!DOCTYPE doc [\n"
5328                      "  <!ENTITY % foo PUBLIC>\n"
5329                      "]>\n"
5330                      "<doc/>";
5331   expect_failure(text, XML_ERROR_SYNTAX,
5332                  "Parameter ENTITY without Public ID is not rejected");
5333 }
5334 END_TEST
5335 
START_TEST(test_bad_entity_4)5336 START_TEST(test_bad_entity_4) {
5337   const char *text = "<!DOCTYPE doc [\n"
5338                      "  <!ENTITY % foo SYSTEM>\n"
5339                      "]>\n"
5340                      "<doc/>";
5341   expect_failure(text, XML_ERROR_SYNTAX,
5342                  "Parameter ENTITY without Public ID is not rejected");
5343 }
5344 END_TEST
5345 
START_TEST(test_bad_notation)5346 START_TEST(test_bad_notation) {
5347   const char *text = "<!DOCTYPE doc [\n"
5348                      "  <!NOTATION n SYSTEM>\n"
5349                      "]>\n"
5350                      "<doc/>";
5351   expect_failure(text, XML_ERROR_SYNTAX,
5352                  "Notation without System ID is not rejected");
5353 }
5354 END_TEST
5355 
5356 /* Test for issue #11, wrongly suppressed default handler */
START_TEST(test_default_doctype_handler)5357 START_TEST(test_default_doctype_handler) {
5358   const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5359                      "  <!ENTITY foo 'bar'>\n"
5360                      "]>\n"
5361                      "<doc>&foo;</doc>";
5362   DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5363                               {XCS("'test.dtd'"), 10, XML_FALSE},
5364                               {NULL, 0, XML_FALSE}};
5365   int i;
5366 
5367   XML_SetUserData(g_parser, &test_data);
5368   XML_SetDefaultHandler(g_parser, checking_default_handler);
5369   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5370   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5371       == XML_STATUS_ERROR)
5372     xml_failure(g_parser);
5373   for (i = 0; test_data[i].expected != NULL; i++)
5374     if (! test_data[i].seen)
5375       fail("Default handler not run for public !DOCTYPE");
5376 }
5377 END_TEST
5378 
START_TEST(test_empty_element_abort)5379 START_TEST(test_empty_element_abort) {
5380   const char *text = "<abort/>";
5381 
5382   XML_SetStartElementHandler(g_parser, start_element_suspender);
5383   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5384       != XML_STATUS_ERROR)
5385     fail("Expected to error on abort");
5386 }
5387 END_TEST
5388 
5389 /* Regression test for GH issue #612: unfinished m_declAttributeType
5390  * allocation in ->m_tempPool can corrupt following allocation.
5391  */
START_TEST(test_pool_integrity_with_unfinished_attr)5392 START_TEST(test_pool_integrity_with_unfinished_attr) {
5393   const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5394                      "<!DOCTYPE foo [\n"
5395                      "<!ELEMENT foo ANY>\n"
5396                      "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5397                      "%entp;\n"
5398                      "]>\n"
5399                      "<a></a>\n";
5400   const XML_Char *expected = XCS("COMMENT");
5401   CharData storage;
5402 
5403   CharData_Init(&storage);
5404   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5405   XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5406   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5407   XML_SetCommentHandler(g_parser, accumulate_comment);
5408   XML_SetUserData(g_parser, &storage);
5409   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5410       == XML_STATUS_ERROR)
5411     xml_failure(g_parser);
5412   CharData_CheckXMLChars(&storage, expected);
5413 }
5414 END_TEST
5415 
5416 /* Test a possible early return location in internalEntityProcessor */
START_TEST(test_entity_ref_no_elements)5417 START_TEST(test_entity_ref_no_elements) {
5418   const char *const text = "<!DOCTYPE foo [\n"
5419                            "<!ENTITY e1 \"test\">\n"
5420                            "]> <foo>&e1;"; // intentionally missing newline
5421 
5422   XML_Parser parser = XML_ParserCreate(NULL);
5423   assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5424               == XML_STATUS_ERROR);
5425   assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS);
5426   XML_ParserFree(parser);
5427 }
5428 END_TEST
5429 
5430 /* Tests if chained entity references lead to unbounded recursion */
START_TEST(test_deep_nested_entity)5431 START_TEST(test_deep_nested_entity) {
5432   const size_t N_LINES = 60000;
5433   const size_t SIZE_PER_LINE = 50;
5434 
5435   char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5436   if (text == NULL) {
5437     fail("malloc failed");
5438   }
5439 
5440   char *textPtr = text;
5441 
5442   // Create the XML
5443   textPtr += snprintf(textPtr, SIZE_PER_LINE,
5444                       "<!DOCTYPE foo [\n"
5445                       "	<!ENTITY s0 'deepText'>\n");
5446 
5447   for (size_t i = 1; i < N_LINES; ++i) {
5448     textPtr += snprintf(textPtr, SIZE_PER_LINE, "  <!ENTITY s%lu '&s%lu;'>\n",
5449                         (long unsigned)i, (long unsigned)(i - 1));
5450   }
5451 
5452   snprintf(textPtr, SIZE_PER_LINE, "]> <foo>&s%lu;</foo>\n",
5453            (long unsigned)(N_LINES - 1));
5454 
5455   const XML_Char *const expected = XCS("deepText");
5456 
5457   CharData storage;
5458   CharData_Init(&storage);
5459 
5460   XML_Parser parser = XML_ParserCreate(NULL);
5461 
5462   XML_SetCharacterDataHandler(parser, accumulate_characters);
5463   XML_SetUserData(parser, &storage);
5464 
5465   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5466       == XML_STATUS_ERROR)
5467     xml_failure(parser);
5468 
5469   CharData_CheckXMLChars(&storage, expected);
5470   XML_ParserFree(parser);
5471   free(text);
5472 }
5473 END_TEST
5474 
5475 /* Tests if chained entity references in attributes
5476 lead to unbounded recursion */
START_TEST(test_deep_nested_attribute_entity)5477 START_TEST(test_deep_nested_attribute_entity) {
5478   const size_t N_LINES = 60000;
5479   const size_t SIZE_PER_LINE = 100;
5480 
5481   char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5482   if (text == NULL) {
5483     fail("malloc failed");
5484   }
5485 
5486   char *textPtr = text;
5487 
5488   // Create the XML
5489   textPtr += snprintf(textPtr, SIZE_PER_LINE,
5490                       "<!DOCTYPE foo [\n"
5491                       "	<!ENTITY s0 'deepText'>\n");
5492 
5493   for (size_t i = 1; i < N_LINES; ++i) {
5494     textPtr += snprintf(textPtr, SIZE_PER_LINE, "  <!ENTITY s%lu '&s%lu;'>\n",
5495                         (long unsigned)i, (long unsigned)(i - 1));
5496   }
5497 
5498   snprintf(textPtr, SIZE_PER_LINE, "]> <foo name='&s%lu;'>mainText</foo>\n",
5499            (long unsigned)(N_LINES - 1));
5500 
5501   AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}};
5502   ElementInfo info[] = {{XCS("foo"), 1, NULL, NULL}, {NULL, 0, NULL, NULL}};
5503   info[0].attributes = doc_info;
5504 
5505   XML_Parser parser = XML_ParserCreate(NULL);
5506   ParserAndElementInfo parserPlusElemenInfo = {parser, info};
5507 
5508   XML_SetStartElementHandler(parser, counting_start_element_handler);
5509   XML_SetUserData(parser, &parserPlusElemenInfo);
5510 
5511   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5512       == XML_STATUS_ERROR)
5513     xml_failure(parser);
5514 
5515   XML_ParserFree(parser);
5516   free(text);
5517 }
5518 END_TEST
5519 
START_TEST(test_deep_nested_entity_delayed_interpretation)5520 START_TEST(test_deep_nested_entity_delayed_interpretation) {
5521   const size_t N_LINES = 70000;
5522   const size_t SIZE_PER_LINE = 100;
5523 
5524   char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5525   if (text == NULL) {
5526     fail("malloc failed");
5527   }
5528 
5529   char *textPtr = text;
5530 
5531   // Create the XML
5532   textPtr += snprintf(textPtr, SIZE_PER_LINE,
5533                       "<!DOCTYPE foo [\n"
5534                       "	<!ENTITY %% s0 'deepText'>\n");
5535 
5536   for (size_t i = 1; i < N_LINES; ++i) {
5537     textPtr += snprintf(textPtr, SIZE_PER_LINE,
5538                         "  <!ENTITY %% s%lu '&#37;s%lu;'>\n", (long unsigned)i,
5539                         (long unsigned)(i - 1));
5540   }
5541 
5542   snprintf(textPtr, SIZE_PER_LINE,
5543            "  <!ENTITY %% define_g \"<!ENTITY g '&#37;s%lu;'>\">\n"
5544            "  %%define_g;\n"
5545            "]>\n"
5546            "<foo/>\n",
5547            (long unsigned)(N_LINES - 1));
5548 
5549   XML_Parser parser = XML_ParserCreate(NULL);
5550 
5551   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5552   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5553       == XML_STATUS_ERROR)
5554     xml_failure(parser);
5555 
5556   XML_ParserFree(parser);
5557   free(text);
5558 }
5559 END_TEST
5560 
START_TEST(test_nested_entity_suspend)5561 START_TEST(test_nested_entity_suspend) {
5562   const char *const text = "<!DOCTYPE a [\n"
5563                            "  <!ENTITY e1 '<!--e1-->'>\n"
5564                            "  <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5565                            "  <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5566                            "]>\n"
5567                            "<a><!--start-->&e3;<!--end--></a>";
5568   const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5569       XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5570   CharData storage;
5571   CharData_Init(&storage);
5572   XML_Parser parser = XML_ParserCreate(NULL);
5573   ParserPlusStorage parserPlusStorage = {parser, &storage};
5574 
5575   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5576   XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5577   XML_SetUserData(parser, &parserPlusStorage);
5578 
5579   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5580   while (status == XML_STATUS_SUSPENDED) {
5581     status = XML_ResumeParser(parser);
5582   }
5583   if (status != XML_STATUS_OK)
5584     xml_failure(parser);
5585 
5586   CharData_CheckXMLChars(&storage, expected);
5587   XML_ParserFree(parser);
5588 }
5589 END_TEST
5590 
START_TEST(test_nested_entity_suspend_2)5591 START_TEST(test_nested_entity_suspend_2) {
5592   const char *const text = "<!DOCTYPE doc [\n"
5593                            "  <!ENTITY ge1 'head1Ztail1'>\n"
5594                            "  <!ENTITY ge2 'head2&ge1;tail2'>\n"
5595                            "  <!ENTITY ge3 'head3&ge2;tail3'>\n"
5596                            "]>\n"
5597                            "<doc>&ge3;</doc>";
5598   const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1")
5599       XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3");
5600   CharData storage;
5601   CharData_Init(&storage);
5602   XML_Parser parser = XML_ParserCreate(NULL);
5603   ParserPlusStorage parserPlusStorage = {parser, &storage};
5604 
5605   XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend);
5606   XML_SetUserData(parser, &parserPlusStorage);
5607 
5608   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5609   while (status == XML_STATUS_SUSPENDED) {
5610     status = XML_ResumeParser(parser);
5611   }
5612   if (status != XML_STATUS_OK)
5613     xml_failure(parser);
5614 
5615   CharData_CheckXMLChars(&storage, expected);
5616   XML_ParserFree(parser);
5617 }
5618 END_TEST
5619 
5620 /* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_scale_linearly)5621 START_TEST(test_big_tokens_scale_linearly) {
5622   const struct {
5623     const char *pre;
5624     const char *post;
5625   } text[] = {
5626       {"<a>", "</a>"},                      // assumed good, used as baseline
5627       {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5628       {"<c attr='", "'></c>"},              // big attribute, used to be O(N²)
5629       {"<d><!-- ", " --></d>"},             // long comment, used to be O(N²)
5630       {"<e><", "/></e>"},                   // big elem name, used to be O(N²)
5631   };
5632   const int num_cases = sizeof(text) / sizeof(text[0]);
5633   char aaaaaa[4096];
5634   const int fillsize = (int)sizeof(aaaaaa);
5635   const int fillcount = 100;
5636   const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5637   const unsigned max_factor = 4;
5638   const unsigned max_scanned = max_factor * approx_bytes;
5639 
5640   memset(aaaaaa, 'a', fillsize);
5641 
5642   if (! g_reparseDeferralEnabledDefault) {
5643     return; // heuristic is disabled; we would get O(n^2) and fail.
5644   }
5645 
5646   for (int i = 0; i < num_cases; ++i) {
5647     XML_Parser parser = XML_ParserCreate(NULL);
5648     assert_true(parser != NULL);
5649     enum XML_Status status;
5650     set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
5651 
5652     // parse the start text
5653     g_bytesScanned = 0;
5654     status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5655                                      (int)strlen(text[i].pre), XML_FALSE);
5656     if (status != XML_STATUS_OK) {
5657       xml_failure(parser);
5658     }
5659 
5660     // parse lots of 'a', failing the test early if it takes too long
5661     unsigned past_max_count = 0;
5662     for (int f = 0; f < fillcount; ++f) {
5663       status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5664       if (status != XML_STATUS_OK) {
5665         xml_failure(parser);
5666       }
5667       if (g_bytesScanned > max_scanned) {
5668         // We're not done, and have already passed the limit -- the test will
5669         // definitely fail. This block allows us to save time by failing early.
5670         const unsigned pushed
5671             = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
5672         fprintf(
5673             stderr,
5674             "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5675             f + 1, fillcount, pushed, g_bytesScanned,
5676             g_bytesScanned / (double)pushed, max_scanned, max_factor);
5677         past_max_count++;
5678         // We are failing, but allow a few log prints first. If we don't reach
5679         // a count of five, the test will fail after the loop instead.
5680         assert_true(past_max_count < 5);
5681       }
5682     }
5683 
5684     // parse the end text
5685     status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5686                                      (int)strlen(text[i].post), XML_TRUE);
5687     if (status != XML_STATUS_OK) {
5688       xml_failure(parser);
5689     }
5690 
5691     assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
5692     if (g_bytesScanned > max_scanned) {
5693       fprintf(
5694           stderr,
5695           "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5696           g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
5697           max_factor);
5698       fail("scanned too many bytes");
5699     }
5700 
5701     XML_ParserFree(parser);
5702   }
5703 }
5704 END_TEST
5705 
START_TEST(test_set_reparse_deferral)5706 START_TEST(test_set_reparse_deferral) {
5707   const char *const pre = "<d>";
5708   const char *const start = "<x attr='";
5709   const char *const end = "'></x>";
5710   char eeeeee[100];
5711   const int fillsize = (int)sizeof(eeeeee);
5712   memset(eeeeee, 'e', fillsize);
5713 
5714   for (int enabled = 0; enabled <= 1; enabled += 1) {
5715     set_subtest("deferral=%d", enabled);
5716 
5717     XML_Parser parser = XML_ParserCreate(NULL);
5718     assert_true(parser != NULL);
5719     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5720     // pre-grow the buffer to avoid reparsing due to almost-fullness
5721     assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5722 
5723     CharData storage;
5724     CharData_Init(&storage);
5725     XML_SetUserData(parser, &storage);
5726     XML_SetStartElementHandler(parser, start_element_event_handler);
5727 
5728     enum XML_Status status;
5729     // parse the start text
5730     status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5731     if (status != XML_STATUS_OK) {
5732       xml_failure(parser);
5733     }
5734     CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5735 
5736     // ..and the start of the token
5737     status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5738     if (status != XML_STATUS_OK) {
5739       xml_failure(parser);
5740     }
5741     CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5742 
5743     // try to parse lots of 'e', but the token isn't finished
5744     for (int c = 0; c < 100; ++c) {
5745       status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5746       if (status != XML_STATUS_OK) {
5747         xml_failure(parser);
5748       }
5749     }
5750     CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5751 
5752     // end the <x> token.
5753     status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5754     if (status != XML_STATUS_OK) {
5755       xml_failure(parser);
5756     }
5757 
5758     if (enabled) {
5759       // In general, we may need to push more data to trigger a reparse attempt,
5760       // but in this test, the data is constructed to always require it.
5761       CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5762       // 2x the token length should suffice; the +1 covers the start and end.
5763       for (int c = 0; c < 101; ++c) {
5764         status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5765         if (status != XML_STATUS_OK) {
5766           xml_failure(parser);
5767         }
5768       }
5769     }
5770     CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5771 
5772     XML_ParserFree(parser);
5773   }
5774 }
5775 END_TEST
5776 
5777 struct element_decl_data {
5778   XML_Parser parser;
5779   int count;
5780 };
5781 
5782 static void
element_decl_counter(void * userData,const XML_Char * name,XML_Content * model)5783 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5784   UNUSED_P(name);
5785   struct element_decl_data *testdata = (struct element_decl_data *)userData;
5786   testdata->count += 1;
5787   XML_FreeContentModel(testdata->parser, model);
5788 }
5789 
5790 static int
external_inherited_parser(XML_Parser p,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)5791 external_inherited_parser(XML_Parser p, const XML_Char *context,
5792                           const XML_Char *base, const XML_Char *systemId,
5793                           const XML_Char *publicId) {
5794   UNUSED_P(base);
5795   UNUSED_P(systemId);
5796   UNUSED_P(publicId);
5797   const char *const pre = "<!ELEMENT document ANY>\n";
5798   const char *const start = "<!ELEMENT ";
5799   const char *const end = " ANY>\n";
5800   const char *const post = "<!ELEMENT xyz ANY>\n";
5801   const int enabled = *(int *)XML_GetUserData(p);
5802   char eeeeee[100];
5803   char spaces[100];
5804   const int fillsize = (int)sizeof(eeeeee);
5805   assert_true(fillsize == (int)sizeof(spaces));
5806   memset(eeeeee, 'e', fillsize);
5807   memset(spaces, ' ', fillsize);
5808 
5809   XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5810   assert_true(parser != NULL);
5811   // pre-grow the buffer to avoid reparsing due to almost-fullness
5812   assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5813 
5814   struct element_decl_data testdata;
5815   testdata.parser = parser;
5816   testdata.count = 0;
5817   XML_SetUserData(parser, &testdata);
5818   XML_SetElementDeclHandler(parser, element_decl_counter);
5819 
5820   enum XML_Status status;
5821   // parse the initial text
5822   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5823   if (status != XML_STATUS_OK) {
5824     xml_failure(parser);
5825   }
5826   assert_true(testdata.count == 1); // first element should be done
5827 
5828   // ..and the start of the big token
5829   status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5830   if (status != XML_STATUS_OK) {
5831     xml_failure(parser);
5832   }
5833   assert_true(testdata.count == 1); // still just the first one
5834 
5835   // try to parse lots of 'e', but the token isn't finished
5836   for (int c = 0; c < 100; ++c) {
5837     status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5838     if (status != XML_STATUS_OK) {
5839       xml_failure(parser);
5840     }
5841   }
5842   assert_true(testdata.count == 1); // *still* just the first one
5843 
5844   // end the big token.
5845   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5846   if (status != XML_STATUS_OK) {
5847     xml_failure(parser);
5848   }
5849 
5850   if (enabled) {
5851     // In general, we may need to push more data to trigger a reparse attempt,
5852     // but in this test, the data is constructed to always require it.
5853     assert_true(testdata.count == 1); // or the test is incorrect
5854     // 2x the token length should suffice; the +1 covers the start and end.
5855     for (int c = 0; c < 101; ++c) {
5856       status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5857       if (status != XML_STATUS_OK) {
5858         xml_failure(parser);
5859       }
5860     }
5861   }
5862   assert_true(testdata.count == 2); // the big token should be done
5863 
5864   // parse the final text
5865   status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5866   if (status != XML_STATUS_OK) {
5867     xml_failure(parser);
5868   }
5869   assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5870 
5871   XML_ParserFree(parser);
5872   return XML_STATUS_OK;
5873 }
5874 
START_TEST(test_reparse_deferral_is_inherited)5875 START_TEST(test_reparse_deferral_is_inherited) {
5876   const char *const text
5877       = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5878   for (int enabled = 0; enabled <= 1; ++enabled) {
5879     set_subtest("deferral=%d", enabled);
5880 
5881     XML_Parser parser = XML_ParserCreate(NULL);
5882     assert_true(parser != NULL);
5883     XML_SetUserData(parser, (void *)&enabled);
5884     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5885     // this handler creates a sub-parser and checks that its deferral behavior
5886     // is what we expected, based on the value of `enabled` (in userdata).
5887     XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5888     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5889     if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5890       xml_failure(parser);
5891 
5892     XML_ParserFree(parser);
5893   }
5894 }
5895 END_TEST
5896 
START_TEST(test_set_reparse_deferral_on_null_parser)5897 START_TEST(test_set_reparse_deferral_on_null_parser) {
5898   assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5899   assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5900   assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5901   assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5902   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5903               == XML_FALSE);
5904   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5905               == XML_FALSE);
5906 }
5907 END_TEST
5908 
START_TEST(test_set_reparse_deferral_on_the_fly)5909 START_TEST(test_set_reparse_deferral_on_the_fly) {
5910   const char *const pre = "<d><x attr='";
5911   const char *const end = "'></x>";
5912   char iiiiii[100];
5913   const int fillsize = (int)sizeof(iiiiii);
5914   memset(iiiiii, 'i', fillsize);
5915 
5916   XML_Parser parser = XML_ParserCreate(NULL);
5917   assert_true(parser != NULL);
5918   assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5919 
5920   CharData storage;
5921   CharData_Init(&storage);
5922   XML_SetUserData(parser, &storage);
5923   XML_SetStartElementHandler(parser, start_element_event_handler);
5924 
5925   enum XML_Status status;
5926   // parse the start text
5927   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5928   if (status != XML_STATUS_OK) {
5929     xml_failure(parser);
5930   }
5931   CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5932 
5933   // try to parse some 'i', but the token isn't finished
5934   status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
5935   if (status != XML_STATUS_OK) {
5936     xml_failure(parser);
5937   }
5938   CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5939 
5940   // end the <x> token.
5941   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5942   if (status != XML_STATUS_OK) {
5943     xml_failure(parser);
5944   }
5945   CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
5946 
5947   // now change the heuristic setting and add *no* data
5948   assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
5949   // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
5950   status = XML_Parse(parser, "", 0, XML_FALSE);
5951   if (status != XML_STATUS_OK) {
5952     xml_failure(parser);
5953   }
5954   CharData_CheckXMLChars(&storage, XCS("dx"));
5955 
5956   XML_ParserFree(parser);
5957 }
5958 END_TEST
5959 
START_TEST(test_set_bad_reparse_option)5960 START_TEST(test_set_bad_reparse_option) {
5961   XML_Parser parser = XML_ParserCreate(NULL);
5962   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
5963   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
5964   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
5965   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
5966   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
5967   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
5968   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
5969   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
5970   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
5971   XML_ParserFree(parser);
5972 }
5973 END_TEST
5974 
5975 static size_t g_totalAlloc = 0;
5976 static size_t g_biggestAlloc = 0;
5977 
5978 static void *
counting_realloc(void * ptr,size_t size)5979 counting_realloc(void *ptr, size_t size) {
5980   g_totalAlloc += size;
5981   if (size > g_biggestAlloc) {
5982     g_biggestAlloc = size;
5983   }
5984   return realloc(ptr, size);
5985 }
5986 
5987 static void *
counting_malloc(size_t size)5988 counting_malloc(size_t size) {
5989   return counting_realloc(NULL, size);
5990 }
5991 
START_TEST(test_bypass_heuristic_when_close_to_bufsize)5992 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
5993   if (g_chunkSize != 0) {
5994     // this test does not use SINGLE_BYTES, because it depends on very precise
5995     // buffer fills.
5996     return;
5997   }
5998   if (! g_reparseDeferralEnabledDefault) {
5999     return; // this test is irrelevant when the deferral heuristic is disabled.
6000   }
6001 
6002   const int document_length = 65536;
6003   char *const document = (char *)malloc(document_length);
6004 
6005   const XML_Memory_Handling_Suite memfuncs = {
6006       counting_malloc,
6007       counting_realloc,
6008       free,
6009   };
6010 
6011   const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
6012   const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
6013   const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
6014 
6015   for (const int *leading = leading_list; *leading >= 0; leading++) {
6016     for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
6017       for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
6018         set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
6019                     *fillsize);
6020         // start by checking that the test looks reasonably valid
6021         assert_true(*leading + *bigtoken <= document_length);
6022 
6023         // put 'x' everywhere; some will be overwritten by elements.
6024         memset(document, 'x', document_length);
6025         // maybe add an initial tag
6026         if (*leading) {
6027           assert_true(*leading >= 3); // or the test case is invalid
6028           memcpy(document, "<a>", 3);
6029         }
6030         // add the large token
6031         document[*leading + 0] = '<';
6032         document[*leading + 1] = 'b';
6033         memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
6034         document[*leading + *bigtoken - 1] = '>';
6035 
6036         // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
6037         const int expected_elem_total = 1 + (*leading ? 1 : 0);
6038 
6039         XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
6040         assert_true(parser != NULL);
6041 
6042         CharData storage;
6043         CharData_Init(&storage);
6044         XML_SetUserData(parser, &storage);
6045         XML_SetStartElementHandler(parser, start_element_event_handler);
6046 
6047         g_biggestAlloc = 0;
6048         g_totalAlloc = 0;
6049         int offset = 0;
6050         // fill data until the big token is covered (but not necessarily parsed)
6051         while (offset < *leading + *bigtoken) {
6052           assert_true(offset + *fillsize <= document_length);
6053           const enum XML_Status status
6054               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6055           if (status != XML_STATUS_OK) {
6056             xml_failure(parser);
6057           }
6058           offset += *fillsize;
6059         }
6060         // Now, check that we've had a buffer allocation that could fit the
6061         // context bytes and our big token. In order to detect a special case,
6062         // we need to know how many bytes of our big token were included in the
6063         // first push that contained _any_ bytes of the big token:
6064         const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
6065         if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
6066           // Special case: we aren't saving any context, and the whole big token
6067           // was covered by a single fill, so Expat may have parsed directly
6068           // from our input pointer, without allocating an internal buffer.
6069         } else if (*leading < XML_CONTEXT_BYTES) {
6070           assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
6071         } else {
6072           assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
6073         }
6074         // fill data until the big token is actually parsed
6075         while (storage.count < expected_elem_total) {
6076           const size_t alloc_before = g_totalAlloc;
6077           assert_true(offset + *fillsize <= document_length);
6078           const enum XML_Status status
6079               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6080           if (status != XML_STATUS_OK) {
6081             xml_failure(parser);
6082           }
6083           offset += *fillsize;
6084           // since all the bytes of the big token are already in the buffer,
6085           // the bufsize ceiling should make us finish its parsing without any
6086           // further buffer allocations. We assume that there will be no other
6087           // large allocations in this test.
6088           assert_true(g_totalAlloc - alloc_before < 4096);
6089         }
6090         // test-the-test: was our alloc even called?
6091         assert_true(g_totalAlloc > 0);
6092         // test-the-test: there shouldn't be any extra start elements
6093         assert_true(storage.count == expected_elem_total);
6094 
6095         XML_ParserFree(parser);
6096       }
6097     }
6098   }
6099   free(document);
6100 }
6101 END_TEST
6102 
START_TEST(test_varying_buffer_fills)6103 START_TEST(test_varying_buffer_fills) {
6104   const int KiB = 1024;
6105   const int MiB = 1024 * KiB;
6106   const int document_length = 16 * MiB;
6107   const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
6108 
6109   if (g_chunkSize != 0) {
6110     return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
6111   }
6112 
6113   char *const document = (char *)malloc(document_length);
6114   assert_true(document != NULL);
6115   memset(document, 'x', document_length);
6116   document[0] = '<';
6117   document[1] = 't';
6118   memset(&document[2], ' ', big - 2); // a very spacy token
6119   document[big - 1] = '>';
6120 
6121   // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
6122   // When reparse deferral is enabled, the final (negated) value is the expected
6123   // maximum number of bytes scanned in parse attempts.
6124   const int testcases[][30] = {
6125       {8 * MiB, -8 * MiB},
6126       {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
6127       // zero-size fills shouldn't trigger the bypass
6128       {4 * MiB, 0, 4 * MiB, -12 * MiB},
6129       {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
6130       {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
6131       // try to hit the buffer ceiling only once (at the end)
6132       {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
6133       // try to hit the same buffer ceiling multiple times
6134       {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
6135 
6136       // try to hit every ceiling, by always landing 1K shy of the buffer size
6137       {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
6138        128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
6139 
6140       // try to avoid every ceiling, by always landing 1B past the buffer size
6141       // the normal 2x heuristic threshold still forces parse attempts.
6142       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
6143        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
6144        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
6145        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
6146        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6147        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6148        2 * MiB, 4 * MiB,     // will attempt 8MiB + 1 ==> total 10M + 682K + 7
6149        -(10 * MiB + 682 * KiB + 7)},
6150       // try to avoid every ceiling again, except on our last fill.
6151       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
6152        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
6153        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
6154        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
6155        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6156        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6157        2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
6158        -(10 * MiB + 682 * KiB + 6)},
6159 
6160       // try to hit ceilings on the way multiple times
6161       {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
6162        512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
6163        1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1,   // 4 MiB buffer
6164        2 * MiB + 1, 1 * MiB, 512 * KiB,                    // 8 MiB buffer
6165        // we'll make a parse attempt at every parse call
6166        -(45 * MiB + 12)},
6167   };
6168   const int testcount = sizeof(testcases) / sizeof(testcases[0]);
6169   for (int test_i = 0; test_i < testcount; test_i++) {
6170     const int *fillsize = testcases[test_i];
6171     set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
6172                 fillsize[2], fillsize[3]);
6173     XML_Parser parser = XML_ParserCreate(NULL);
6174     assert_true(parser != NULL);
6175 
6176     CharData storage;
6177     CharData_Init(&storage);
6178     XML_SetUserData(parser, &storage);
6179     XML_SetStartElementHandler(parser, start_element_event_handler);
6180 
6181     g_bytesScanned = 0;
6182     int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
6183     int offset = 0;
6184     while (*fillsize >= 0) {
6185       assert_true(offset + *fillsize <= document_length); // or test is invalid
6186       const enum XML_Status status
6187           = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6188       if (status != XML_STATUS_OK) {
6189         xml_failure(parser);
6190       }
6191       offset += *fillsize;
6192       fillsize++;
6193       assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
6194       worstcase_bytes += offset; // we might've tried to parse all pending bytes
6195     }
6196     assert_true(storage.count == 1); // the big token should've been parsed
6197     assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
6198     if (g_reparseDeferralEnabledDefault) {
6199       // heuristic is enabled; some XML_Parse calls may have deferred reparsing
6200       const unsigned max_bytes_scanned = -*fillsize;
6201       if (g_bytesScanned > max_bytes_scanned) {
6202         fprintf(stderr,
6203                 "bytes scanned in parse attempts: actual=%u limit=%u \n",
6204                 g_bytesScanned, max_bytes_scanned);
6205         fail("too many bytes scanned in parse attempts");
6206       }
6207     }
6208     assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
6209 
6210     XML_ParserFree(parser);
6211   }
6212   free(document);
6213 }
6214 END_TEST
6215 
6216 void
make_basic_test_case(Suite * s)6217 make_basic_test_case(Suite *s) {
6218   TCase *tc_basic = tcase_create("basic tests");
6219 
6220   suite_add_tcase(s, tc_basic);
6221   tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
6222 
6223   tcase_add_test(tc_basic, test_nul_byte);
6224   tcase_add_test(tc_basic, test_u0000_char);
6225   tcase_add_test(tc_basic, test_siphash_self);
6226   tcase_add_test(tc_basic, test_siphash_spec);
6227   tcase_add_test(tc_basic, test_bom_utf8);
6228   tcase_add_test(tc_basic, test_bom_utf16_be);
6229   tcase_add_test(tc_basic, test_bom_utf16_le);
6230   tcase_add_test(tc_basic, test_nobom_utf16_le);
6231   tcase_add_test(tc_basic, test_hash_collision);
6232   tcase_add_test(tc_basic, test_illegal_utf8);
6233   tcase_add_test(tc_basic, test_utf8_auto_align);
6234   tcase_add_test(tc_basic, test_utf16);
6235   tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
6236   tcase_add_test(tc_basic, test_not_utf16);
6237   tcase_add_test(tc_basic, test_bad_encoding);
6238   tcase_add_test(tc_basic, test_latin1_umlauts);
6239   tcase_add_test(tc_basic, test_long_utf8_character);
6240   tcase_add_test(tc_basic, test_long_latin1_attribute);
6241   tcase_add_test(tc_basic, test_long_ascii_attribute);
6242   /* Regression test for SF bug #491986. */
6243   tcase_add_test(tc_basic, test_danish_latin1);
6244   /* Regression test for SF bug #514281. */
6245   tcase_add_test(tc_basic, test_french_charref_hexidecimal);
6246   tcase_add_test(tc_basic, test_french_charref_decimal);
6247   tcase_add_test(tc_basic, test_french_latin1);
6248   tcase_add_test(tc_basic, test_french_utf8);
6249   tcase_add_test(tc_basic, test_utf8_false_rejection);
6250   tcase_add_test(tc_basic, test_line_number_after_parse);
6251   tcase_add_test(tc_basic, test_column_number_after_parse);
6252   tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
6253   tcase_add_test(tc_basic, test_line_number_after_error);
6254   tcase_add_test(tc_basic, test_column_number_after_error);
6255   tcase_add_test(tc_basic, test_really_long_lines);
6256   tcase_add_test(tc_basic, test_really_long_encoded_lines);
6257   tcase_add_test(tc_basic, test_end_element_events);
6258   tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
6259   tcase_add_test(tc_basic, test_attr_whitespace_normalization);
6260   tcase_add_test(tc_basic, test_xmldecl_misplaced);
6261   tcase_add_test(tc_basic, test_xmldecl_invalid);
6262   tcase_add_test(tc_basic, test_xmldecl_missing_attr);
6263   tcase_add_test(tc_basic, test_xmldecl_missing_value);
6264   tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
6265   tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
6266   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
6267   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
6268   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
6269   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
6270   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
6271   tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
6272   tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
6273   tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
6274   tcase_add_test(tc_basic,
6275                  test_wfc_undeclared_entity_with_external_subset_standalone);
6276   tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
6277   tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
6278   tcase_add_test(tc_basic, test_not_standalone_handler_reject);
6279   tcase_add_test(tc_basic, test_not_standalone_handler_accept);
6280   tcase_add_test(tc_basic, test_entity_start_tag_level_greater_than_one);
6281   tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
6282   tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs);
6283   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
6284   tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
6285   tcase_add_test(tc_basic, test_dtd_attr_handling);
6286   tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
6287   tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
6288   tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
6289   tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
6290   tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
6291   tcase_add_test(tc_basic, test_good_cdata_ascii);
6292   tcase_add_test(tc_basic, test_good_cdata_utf16);
6293   tcase_add_test(tc_basic, test_good_cdata_utf16_le);
6294   tcase_add_test(tc_basic, test_long_cdata_utf16);
6295   tcase_add_test(tc_basic, test_multichar_cdata_utf16);
6296   tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
6297   tcase_add_test(tc_basic, test_bad_cdata);
6298   tcase_add_test(tc_basic, test_bad_cdata_utf16);
6299   tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
6300   tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
6301   tcase_add_test(tc_basic, test_memory_allocation);
6302   tcase_add_test__if_xml_ge(tc_basic, test_default_current);
6303   tcase_add_test(tc_basic, test_dtd_elements);
6304   tcase_add_test(tc_basic, test_dtd_elements_nesting);
6305   tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
6306   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
6307   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
6308   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
6309   tcase_add_test__ifdef_xml_dtd(tc_basic,
6310                                 test_foreign_dtd_without_external_subset);
6311   tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
6312   tcase_add_test(tc_basic, test_set_base);
6313   tcase_add_test(tc_basic, test_attributes);
6314   tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
6315   tcase_add_test(tc_basic, test_resume_invalid_parse);
6316   tcase_add_test(tc_basic, test_resume_resuspended);
6317   tcase_add_test(tc_basic, test_cdata_default);
6318   tcase_add_test(tc_basic, test_subordinate_reset);
6319   tcase_add_test(tc_basic, test_subordinate_suspend);
6320   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
6321   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
6322   tcase_add_test__ifdef_xml_dtd(tc_basic,
6323                                 test_ext_entity_invalid_suspended_parse);
6324   tcase_add_test(tc_basic, test_explicit_encoding);
6325   tcase_add_test(tc_basic, test_trailing_cr);
6326   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
6327   tcase_add_test(tc_basic, test_trailing_rsqb);
6328   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
6329   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
6330   tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
6331   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
6332   tcase_add_test(tc_basic, test_empty_parse);
6333   tcase_add_test(tc_basic, test_negative_len_parse);
6334   tcase_add_test(tc_basic, test_negative_len_parse_buffer);
6335   tcase_add_test(tc_basic, test_get_buffer_1);
6336   tcase_add_test(tc_basic, test_get_buffer_2);
6337 #if XML_CONTEXT_BYTES > 0
6338   tcase_add_test(tc_basic, test_get_buffer_3_overflow);
6339 #endif
6340   tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
6341   tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
6342   tcase_add_test(tc_basic, test_byte_info_at_end);
6343   tcase_add_test(tc_basic, test_byte_info_at_error);
6344   tcase_add_test(tc_basic, test_byte_info_at_cdata);
6345   tcase_add_test(tc_basic, test_predefined_entities);
6346   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
6347   tcase_add_test(tc_basic, test_not_predefined_entities);
6348   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
6349   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
6350   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
6351   tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
6352   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
6353   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
6354   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
6355   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
6356   tcase_add_test(tc_basic, test_bad_public_doctype);
6357   tcase_add_test(tc_basic, test_attribute_enum_value);
6358   tcase_add_test(tc_basic, test_predefined_entity_redefinition);
6359   tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
6360   tcase_add_test(tc_basic, test_public_notation_no_sysid);
6361   tcase_add_test(tc_basic, test_nested_groups);
6362   tcase_add_test(tc_basic, test_group_choice);
6363   tcase_add_test(tc_basic, test_standalone_parameter_entity);
6364   tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
6365   tcase_add_test__ifdef_xml_dtd(tc_basic,
6366                                 test_recursive_external_parameter_entity);
6367   tcase_add_test__ifdef_xml_dtd(tc_basic,
6368                                 test_recursive_external_parameter_entity_2);
6369   tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
6370   tcase_add_test(tc_basic, test_suspend_xdecl);
6371   tcase_add_test(tc_basic, test_abort_epilog);
6372   tcase_add_test(tc_basic, test_abort_epilog_2);
6373   tcase_add_test(tc_basic, test_suspend_epilog);
6374   tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
6375   tcase_add_test(tc_basic, test_unfinished_epilog);
6376   tcase_add_test(tc_basic, test_partial_char_in_epilog);
6377   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6378   tcase_add_test__ifdef_xml_dtd(tc_basic,
6379                                 test_suspend_resume_internal_entity_issue_629);
6380   tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6381   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6382   tcase_add_test(tc_basic, test_restart_on_error);
6383   tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6384   tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6385   tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6386   tcase_add_test(tc_basic, test_standalone_internal_entity);
6387   tcase_add_test(tc_basic, test_skipped_external_entity);
6388   tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6389   tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6390   tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6391   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6392   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6393   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6394   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6395   tcase_add_test(tc_basic, test_pi_handled_in_default);
6396   tcase_add_test(tc_basic, test_comment_handled_in_default);
6397   tcase_add_test(tc_basic, test_pi_yml);
6398   tcase_add_test(tc_basic, test_pi_xnl);
6399   tcase_add_test(tc_basic, test_pi_xmm);
6400   tcase_add_test(tc_basic, test_utf16_pi);
6401   tcase_add_test(tc_basic, test_utf16_be_pi);
6402   tcase_add_test(tc_basic, test_utf16_be_comment);
6403   tcase_add_test(tc_basic, test_utf16_le_comment);
6404   tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6405   tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6406   tcase_add_test(tc_basic, test_unknown_encoding_success);
6407   tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6408   tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6409   tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6410   tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6411   tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6412   tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6413   tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6414   tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6415   tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6416   tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6417   tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6418   tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6419   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6420   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6421   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6422   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6423   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6424   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6425   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6426   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6427   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6428   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6429   tcase_add_test(tc_basic, test_utf8_in_start_tags);
6430   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6431   tcase_add_test(tc_basic, test_utf16_attribute);
6432   tcase_add_test(tc_basic, test_utf16_second_attr);
6433   tcase_add_test(tc_basic, test_attr_after_solidus);
6434   tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6435   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6436   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6437   tcase_add_test(tc_basic, test_bad_doctype);
6438   tcase_add_test(tc_basic, test_bad_doctype_utf8);
6439   tcase_add_test(tc_basic, test_bad_doctype_utf16);
6440   tcase_add_test(tc_basic, test_bad_doctype_plus);
6441   tcase_add_test(tc_basic, test_bad_doctype_star);
6442   tcase_add_test(tc_basic, test_bad_doctype_query);
6443   tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6444   tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6445   tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6446   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6447   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6448   tcase_add_test(tc_basic, test_short_doctype);
6449   tcase_add_test(tc_basic, test_short_doctype_2);
6450   tcase_add_test(tc_basic, test_short_doctype_3);
6451   tcase_add_test(tc_basic, test_long_doctype);
6452   tcase_add_test(tc_basic, test_bad_entity);
6453   tcase_add_test(tc_basic, test_bad_entity_2);
6454   tcase_add_test(tc_basic, test_bad_entity_3);
6455   tcase_add_test(tc_basic, test_bad_entity_4);
6456   tcase_add_test(tc_basic, test_bad_notation);
6457   tcase_add_test(tc_basic, test_default_doctype_handler);
6458   tcase_add_test(tc_basic, test_empty_element_abort);
6459   tcase_add_test__ifdef_xml_dtd(tc_basic,
6460                                 test_pool_integrity_with_unfinished_attr);
6461   tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements);
6462   tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity);
6463   tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity);
6464   tcase_add_test__if_xml_ge(tc_basic,
6465                             test_deep_nested_entity_delayed_interpretation);
6466   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6467   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2);
6468   tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6469   tcase_add_test(tc_basic, test_set_reparse_deferral);
6470   tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6471   tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6472   tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6473   tcase_add_test(tc_basic, test_set_bad_reparse_option);
6474   tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6475   tcase_add_test(tc_basic, test_varying_buffer_fills);
6476 }
6477