xref: /freebsd/contrib/expat/tests/basic_tests.c (revision aa1a8ff2d6dbc51ef058f46f3db5a8bb77967145)
1 /* Tests in the "basic" test case for the Expat test suite
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
17    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21    Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22    Licensed under the MIT license:
23 
24    Permission is  hereby granted,  free of charge,  to any  person obtaining
25    a  copy  of  this  software   and  associated  documentation  files  (the
26    "Software"),  to  deal in  the  Software  without restriction,  including
27    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
28    distribute, sublicense, and/or sell copies of the Software, and to permit
29    persons  to whom  the Software  is  furnished to  do so,  subject to  the
30    following conditions:
31 
32    The above copyright  notice and this permission notice  shall be included
33    in all copies or substantial portions of the Software.
34 
35    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
36    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
37    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
40    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41    USE OR OTHER DEALINGS IN THE SOFTWARE.
42 */
43 
44 #if defined(NDEBUG)
45 #  undef NDEBUG /* because test suite relies on assert(...) at the moment */
46 #endif
47 
48 #include <assert.h>
49 
50 #include <stdio.h>
51 #include <string.h>
52 #include <time.h>
53 
54 #if ! defined(__cplusplus)
55 #  include <stdbool.h>
56 #endif
57 
58 #include "expat_config.h"
59 
60 #include "expat.h"
61 #include "internal.h"
62 #include "minicheck.h"
63 #include "structdata.h"
64 #include "common.h"
65 #include "dummy.h"
66 #include "handlers.h"
67 #include "siphash.h"
68 #include "basic_tests.h"
69 
70 static void
71 basic_setup(void) {
72   g_parser = XML_ParserCreate(NULL);
73   if (g_parser == NULL)
74     fail("Parser not created.");
75 }
76 
77 /*
78  * Character & encoding tests.
79  */
80 
81 START_TEST(test_nul_byte) {
82   char text[] = "<doc>\0</doc>";
83 
84   /* test that a NUL byte (in US-ASCII data) is an error */
85   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
86       == XML_STATUS_OK)
87     fail("Parser did not report error on NUL-byte.");
88   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
89     xml_failure(g_parser);
90 }
91 END_TEST
92 
93 START_TEST(test_u0000_char) {
94   /* test that a NUL byte (in US-ASCII data) is an error */
95   expect_failure("<doc>&#0;</doc>", XML_ERROR_BAD_CHAR_REF,
96                  "Parser did not report error on NUL-byte.");
97 }
98 END_TEST
99 
100 START_TEST(test_siphash_self) {
101   if (! sip24_valid())
102     fail("SipHash self-test failed");
103 }
104 END_TEST
105 
106 START_TEST(test_siphash_spec) {
107   /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
108   const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
109                          "\x0a\x0b\x0c\x0d\x0e";
110   const size_t len = sizeof(message) - 1;
111   const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
112   struct siphash state;
113   struct sipkey key;
114 
115   sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
116                   "\x0a\x0b\x0c\x0d\x0e\x0f");
117   sip24_init(&state, &key);
118 
119   /* Cover spread across calls */
120   sip24_update(&state, message, 4);
121   sip24_update(&state, message + 4, len - 4);
122 
123   /* Cover null length */
124   sip24_update(&state, message, 0);
125 
126   if (sip24_final(&state) != expected)
127     fail("sip24_final failed spec test\n");
128 
129   /* Cover wrapper */
130   if (siphash24(message, len, &key) != expected)
131     fail("siphash24 failed spec test\n");
132 }
133 END_TEST
134 
135 START_TEST(test_bom_utf8) {
136   /* This test is really just making sure we don't core on a UTF-8 BOM. */
137   const char *text = "\357\273\277<e/>";
138 
139   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
140       == XML_STATUS_ERROR)
141     xml_failure(g_parser);
142 }
143 END_TEST
144 
145 START_TEST(test_bom_utf16_be) {
146   char text[] = "\376\377\0<\0e\0/\0>";
147 
148   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
149       == XML_STATUS_ERROR)
150     xml_failure(g_parser);
151 }
152 END_TEST
153 
154 START_TEST(test_bom_utf16_le) {
155   char text[] = "\377\376<\0e\0/\0>\0";
156 
157   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
158       == XML_STATUS_ERROR)
159     xml_failure(g_parser);
160 }
161 END_TEST
162 
163 START_TEST(test_nobom_utf16_le) {
164   char text[] = " \0<\0e\0/\0>\0";
165 
166   if (g_chunkSize == 1) {
167     // TODO: with just the first byte, we can't tell the difference between
168     // UTF-16-LE and UTF-8. Avoid the failure for now.
169     return;
170   }
171 
172   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
173       == XML_STATUS_ERROR)
174     xml_failure(g_parser);
175 }
176 END_TEST
177 
178 START_TEST(test_hash_collision) {
179   /* For full coverage of the lookup routine, we need to ensure a
180    * hash collision even though we can only tell that we have one
181    * through breakpoint debugging or coverage statistics.  The
182    * following will cause a hash collision on machines with a 64-bit
183    * long type; others will have to experiment.  The full coverage
184    * tests invoked from qa.sh usually provide a hash collision, but
185    * not always.  This is an attempt to provide insurance.
186    */
187 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
188   const char *text
189       = "<doc>\n"
190         "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
191         "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
192         "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
193         "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
194         "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
195         "<d8>This triggers the table growth and collides with b2</d8>\n"
196         "</doc>\n";
197 
198   XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
199   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
200       == XML_STATUS_ERROR)
201     xml_failure(g_parser);
202 }
203 END_TEST
204 #undef COLLIDING_HASH_SALT
205 
206 /* Regression test for SF bug #491986. */
207 START_TEST(test_danish_latin1) {
208   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
209                      "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
210 #ifdef XML_UNICODE
211   const XML_Char *expected
212       = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
213 #else
214   const XML_Char *expected
215       = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
216 #endif
217   run_character_check(text, expected);
218 }
219 END_TEST
220 
221 /* Regression test for SF bug #514281. */
222 START_TEST(test_french_charref_hexidecimal) {
223   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
224                      "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
225 #ifdef XML_UNICODE
226   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
227 #else
228   const XML_Char *expected
229       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
230 #endif
231   run_character_check(text, expected);
232 }
233 END_TEST
234 
235 START_TEST(test_french_charref_decimal) {
236   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
237                      "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
238 #ifdef XML_UNICODE
239   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
240 #else
241   const XML_Char *expected
242       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
243 #endif
244   run_character_check(text, expected);
245 }
246 END_TEST
247 
248 START_TEST(test_french_latin1) {
249   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
250                      "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
251 #ifdef XML_UNICODE
252   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
253 #else
254   const XML_Char *expected
255       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
256 #endif
257   run_character_check(text, expected);
258 }
259 END_TEST
260 
261 START_TEST(test_french_utf8) {
262   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
263                      "<doc>\xC3\xA9</doc>";
264 #ifdef XML_UNICODE
265   const XML_Char *expected = XCS("\x00e9");
266 #else
267   const XML_Char *expected = XCS("\xC3\xA9");
268 #endif
269   run_character_check(text, expected);
270 }
271 END_TEST
272 
273 /* Regression test for SF bug #600479.
274    XXX There should be a test that exercises all legal XML Unicode
275    characters as PCDATA and attribute value content, and XML Name
276    characters as part of element and attribute names.
277 */
278 START_TEST(test_utf8_false_rejection) {
279   const char *text = "<doc>\xEF\xBA\xBF</doc>";
280 #ifdef XML_UNICODE
281   const XML_Char *expected = XCS("\xfebf");
282 #else
283   const XML_Char *expected = XCS("\xEF\xBA\xBF");
284 #endif
285   run_character_check(text, expected);
286 }
287 END_TEST
288 
289 /* Regression test for SF bug #477667.
290    This test assures that any 8-bit character followed by a 7-bit
291    character will not be mistakenly interpreted as a valid UTF-8
292    sequence.
293 */
294 START_TEST(test_illegal_utf8) {
295   char text[100];
296   int i;
297 
298   for (i = 128; i <= 255; ++i) {
299     snprintf(text, sizeof(text), "<e>%ccd</e>", i);
300     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
301         == XML_STATUS_OK) {
302       snprintf(text, sizeof(text),
303                "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
304                i);
305       fail(text);
306     } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
307       xml_failure(g_parser);
308     /* Reset the parser since we use the same parser repeatedly. */
309     XML_ParserReset(g_parser, NULL);
310   }
311 }
312 END_TEST
313 
314 /* Examples, not masks: */
315 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
316 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
317 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
318 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
319 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
320 
321 START_TEST(test_utf8_auto_align) {
322   struct TestCase {
323     ptrdiff_t expectedMovementInChars;
324     const char *input;
325   };
326 
327   struct TestCase cases[] = {
328       {00, ""},
329 
330       {00, UTF8_LEAD_1},
331 
332       {-1, UTF8_LEAD_2},
333       {00, UTF8_LEAD_2 UTF8_FOLLOW},
334 
335       {-1, UTF8_LEAD_3},
336       {-2, UTF8_LEAD_3 UTF8_FOLLOW},
337       {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
338 
339       {-1, UTF8_LEAD_4},
340       {-2, UTF8_LEAD_4 UTF8_FOLLOW},
341       {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
342       {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
343   };
344 
345   size_t i = 0;
346   bool success = true;
347   for (; i < sizeof(cases) / sizeof(*cases); i++) {
348     const char *fromLim = cases[i].input + strlen(cases[i].input);
349     const char *const fromLimInitially = fromLim;
350     ptrdiff_t actualMovementInChars;
351 
352     _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
353 
354     actualMovementInChars = (fromLim - fromLimInitially);
355     if (actualMovementInChars != cases[i].expectedMovementInChars) {
356       size_t j = 0;
357       success = false;
358       printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
359              ", actually moved by %2d chars: \"",
360              (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
361              (int)actualMovementInChars);
362       for (; j < strlen(cases[i].input); j++) {
363         printf("\\x%02x", (unsigned char)cases[i].input[j]);
364       }
365       printf("\"\n");
366     }
367   }
368 
369   if (! success) {
370     fail("UTF-8 auto-alignment is not bullet-proof\n");
371   }
372 }
373 END_TEST
374 
375 START_TEST(test_utf16) {
376   /* <?xml version="1.0" encoding="UTF-16"?>
377    *  <doc a='123'>some {A} text</doc>
378    *
379    * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
380    */
381   char text[]
382       = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
383         "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
384         "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
385         "\000'\000?\000>\000\n"
386         "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
387         "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
388         "<\000/\000d\000o\000c\000>";
389 #ifdef XML_UNICODE
390   const XML_Char *expected = XCS("some \xff21 text");
391 #else
392   const XML_Char *expected = XCS("some \357\274\241 text");
393 #endif
394   CharData storage;
395 
396   CharData_Init(&storage);
397   XML_SetUserData(g_parser, &storage);
398   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
399   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
400       == XML_STATUS_ERROR)
401     xml_failure(g_parser);
402   CharData_CheckXMLChars(&storage, expected);
403 }
404 END_TEST
405 
406 START_TEST(test_utf16_le_epilog_newline) {
407   unsigned int first_chunk_bytes = 17;
408   char text[] = "\xFF\xFE"                  /* BOM */
409                 "<\000e\000/\000>\000"      /* document element */
410                 "\r\000\n\000\r\000\n\000"; /* epilog */
411 
412   if (first_chunk_bytes >= sizeof(text) - 1)
413     fail("bad value of first_chunk_bytes");
414   if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE)
415       == XML_STATUS_ERROR)
416     xml_failure(g_parser);
417   else {
418     enum XML_Status rc;
419     rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
420                                  sizeof(text) - first_chunk_bytes - 1,
421                                  XML_TRUE);
422     if (rc == XML_STATUS_ERROR)
423       xml_failure(g_parser);
424   }
425 }
426 END_TEST
427 
428 /* Test that an outright lie in the encoding is faulted */
429 START_TEST(test_not_utf16) {
430   const char *text = "<?xml version='1.0' encoding='utf-16'?>"
431                      "<doc>Hi</doc>";
432 
433   /* Use a handler to provoke the appropriate code paths */
434   XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
435   expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
436                  "UTF-16 declared in UTF-8 not faulted");
437 }
438 END_TEST
439 
440 /* Test that an unknown encoding is rejected */
441 START_TEST(test_bad_encoding) {
442   const char *text = "<doc>Hi</doc>";
443 
444   if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
445     fail("XML_SetEncoding failed");
446   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
447                  "Unknown encoding not faulted");
448 }
449 END_TEST
450 
451 /* Regression test for SF bug #481609, #774028. */
452 START_TEST(test_latin1_umlauts) {
453   const char *text
454       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
455         "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
456         "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
457 #ifdef XML_UNICODE
458   /* Expected results in UTF-16 */
459   const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
460       XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
461 #else
462   /* Expected results in UTF-8 */
463   const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
464       XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
465 #endif
466 
467   run_character_check(text, expected);
468   XML_ParserReset(g_parser, NULL);
469   run_attribute_check(text, expected);
470   /* Repeat with a default handler */
471   XML_ParserReset(g_parser, NULL);
472   XML_SetDefaultHandler(g_parser, dummy_default_handler);
473   run_character_check(text, expected);
474   XML_ParserReset(g_parser, NULL);
475   XML_SetDefaultHandler(g_parser, dummy_default_handler);
476   run_attribute_check(text, expected);
477 }
478 END_TEST
479 
480 /* Test that an element name with a 4-byte UTF-8 character is rejected */
481 START_TEST(test_long_utf8_character) {
482   const char *text
483       = "<?xml version='1.0' encoding='utf-8'?>\n"
484         /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
485         "<do\xf0\x90\x80\x80/>";
486   expect_failure(text, XML_ERROR_INVALID_TOKEN,
487                  "4-byte UTF-8 character in element name not faulted");
488 }
489 END_TEST
490 
491 /* Test that a long latin-1 attribute (too long to convert in one go)
492  * is correctly converted
493  */
494 START_TEST(test_long_latin1_attribute) {
495   const char *text
496       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
497         "<doc att='"
498         /* 64 characters per line */
499         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
500         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
512         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
513         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
514         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
515         /* Last character splits across a buffer boundary */
516         "\xe4'>\n</doc>";
517 
518   const XML_Char *expected =
519       /* 64 characters per line */
520       /* clang-format off */
521         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
522         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
523         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
524         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
525         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
526         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
527         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
528         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
529         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
530         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
531         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
532         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
533         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
534         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
535         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
536         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
537   /* clang-format on */
538 #ifdef XML_UNICODE
539                                                   XCS("\x00e4");
540 #else
541                                                   XCS("\xc3\xa4");
542 #endif
543 
544   run_attribute_check(text, expected);
545 }
546 END_TEST
547 
548 /* Test that a long ASCII attribute (too long to convert in one go)
549  * is correctly converted
550  */
551 START_TEST(test_long_ascii_attribute) {
552   const char *text
553       = "<?xml version='1.0' encoding='us-ascii'?>\n"
554         "<doc att='"
555         /* 64 characters per line */
556         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
557         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
558         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
559         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
560         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
561         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
562         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
563         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
564         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
565         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
566         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
567         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
568         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
569         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
570         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
571         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
572         "01234'>\n</doc>";
573   const XML_Char *expected =
574       /* 64 characters per line */
575       /* clang-format off */
576         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
577         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
578         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
579         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
580         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
581         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
582         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
583         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
584         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
585         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
586         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
587         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
588         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
589         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
590         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
591         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
592         XCS("01234");
593   /* clang-format on */
594 
595   run_attribute_check(text, expected);
596 }
597 END_TEST
598 
599 /* Regression test #1 for SF bug #653180. */
600 START_TEST(test_line_number_after_parse) {
601   const char *text = "<tag>\n"
602                      "\n"
603                      "\n</tag>";
604   XML_Size lineno;
605 
606   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
607       == XML_STATUS_ERROR)
608     xml_failure(g_parser);
609   lineno = XML_GetCurrentLineNumber(g_parser);
610   if (lineno != 4) {
611     char buffer[100];
612     snprintf(buffer, sizeof(buffer),
613              "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
614     fail(buffer);
615   }
616 }
617 END_TEST
618 
619 /* Regression test #2 for SF bug #653180. */
620 START_TEST(test_column_number_after_parse) {
621   const char *text = "<tag></tag>";
622   XML_Size colno;
623 
624   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
625       == XML_STATUS_ERROR)
626     xml_failure(g_parser);
627   colno = XML_GetCurrentColumnNumber(g_parser);
628   if (colno != 11) {
629     char buffer[100];
630     snprintf(buffer, sizeof(buffer),
631              "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
632     fail(buffer);
633   }
634 }
635 END_TEST
636 
637 /* Regression test #3 for SF bug #653180. */
638 START_TEST(test_line_and_column_numbers_inside_handlers) {
639   const char *text = "<a>\n"      /* Unix end-of-line */
640                      "  <b>\r\n"  /* Windows end-of-line */
641                      "    <c/>\r" /* Mac OS end-of-line */
642                      "  </b>\n"
643                      "  <d>\n"
644                      "    <f/>\n"
645                      "  </d>\n"
646                      "</a>";
647   const StructDataEntry expected[]
648       = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
649          {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
650          {XCS("b"), 2, 4, STRUCT_END_TAG},   {XCS("d"), 2, 5, STRUCT_START_TAG},
651          {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
652          {XCS("d"), 2, 7, STRUCT_END_TAG},   {XCS("a"), 0, 8, STRUCT_END_TAG}};
653   const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
654   StructData storage;
655 
656   StructData_Init(&storage);
657   XML_SetUserData(g_parser, &storage);
658   XML_SetStartElementHandler(g_parser, start_element_event_handler2);
659   XML_SetEndElementHandler(g_parser, end_element_event_handler2);
660   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
661       == XML_STATUS_ERROR)
662     xml_failure(g_parser);
663 
664   StructData_CheckItems(&storage, expected, expected_count);
665   StructData_Dispose(&storage);
666 }
667 END_TEST
668 
669 /* Regression test #4 for SF bug #653180. */
670 START_TEST(test_line_number_after_error) {
671   const char *text = "<a>\n"
672                      "  <b>\n"
673                      "  </a>"; /* missing </b> */
674   XML_Size lineno;
675   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
676       != XML_STATUS_ERROR)
677     fail("Expected a parse error");
678 
679   lineno = XML_GetCurrentLineNumber(g_parser);
680   if (lineno != 3) {
681     char buffer[100];
682     snprintf(buffer, sizeof(buffer),
683              "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
684     fail(buffer);
685   }
686 }
687 END_TEST
688 
689 /* Regression test #5 for SF bug #653180. */
690 START_TEST(test_column_number_after_error) {
691   const char *text = "<a>\n"
692                      "  <b>\n"
693                      "  </a>"; /* missing </b> */
694   XML_Size colno;
695   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
696       != XML_STATUS_ERROR)
697     fail("Expected a parse error");
698 
699   colno = XML_GetCurrentColumnNumber(g_parser);
700   if (colno != 4) {
701     char buffer[100];
702     snprintf(buffer, sizeof(buffer),
703              "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
704     fail(buffer);
705   }
706 }
707 END_TEST
708 
709 /* Regression test for SF bug #478332. */
710 START_TEST(test_really_long_lines) {
711   /* This parses an input line longer than INIT_DATA_BUF_SIZE
712      characters long (defined to be 1024 in xmlparse.c).  We take a
713      really cheesy approach to building the input buffer, because
714      this avoids writing bugs in buffer-filling code.
715   */
716   const char *text
717       = "<e>"
718         /* 64 chars */
719         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
720         /* until we have at least 1024 characters on the line: */
721         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
722         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
723         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
724         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
725         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
726         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
727         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
728         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
729         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
730         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
731         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
732         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
733         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
734         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
735         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
736         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
737         "</e>";
738   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
739       == XML_STATUS_ERROR)
740     xml_failure(g_parser);
741 }
742 END_TEST
743 
744 /* Test cdata processing across a buffer boundary */
745 START_TEST(test_really_long_encoded_lines) {
746   /* As above, except that we want to provoke an output buffer
747    * overflow with a non-trivial encoding.  For this we need to pass
748    * the whole cdata in one go, not byte-by-byte.
749    */
750   void *buffer;
751   const char *text
752       = "<?xml version='1.0' encoding='iso-8859-1'?>"
753         "<e>"
754         /* 64 chars */
755         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756         /* until we have at least 1024 characters on the line: */
757         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
765         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
766         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
767         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
768         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
769         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
770         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
771         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
772         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
773         "</e>";
774   int parse_len = (int)strlen(text);
775 
776   /* Need a cdata handler to provoke the code path we want to test */
777   XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
778   buffer = XML_GetBuffer(g_parser, parse_len);
779   if (buffer == NULL)
780     fail("Could not allocate parse buffer");
781   assert(buffer != NULL);
782   memcpy(buffer, text, parse_len);
783   if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
784     xml_failure(g_parser);
785 }
786 END_TEST
787 
788 /*
789  * Element event tests.
790  */
791 
792 START_TEST(test_end_element_events) {
793   const char *text = "<a><b><c/></b><d><f/></d></a>";
794   const XML_Char *expected = XCS("/c/b/f/d/a");
795   CharData storage;
796 
797   CharData_Init(&storage);
798   XML_SetUserData(g_parser, &storage);
799   XML_SetEndElementHandler(g_parser, end_element_event_handler);
800   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
801       == XML_STATUS_ERROR)
802     xml_failure(g_parser);
803   CharData_CheckXMLChars(&storage, expected);
804 }
805 END_TEST
806 
807 /*
808  * Attribute tests.
809  */
810 
811 /* Helper used by the following tests; this checks any "attr" and "refs"
812    attributes to make sure whitespace has been normalized.
813 
814    Return true if whitespace has been normalized in a string, using
815    the rules for attribute value normalization.  The 'is_cdata' flag
816    is needed since CDATA attributes don't need to have multiple
817    whitespace characters collapsed to a single space, while other
818    attribute data types do.  (Section 3.3.3 of the recommendation.)
819 */
820 static int
821 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
822   int blanks = 0;
823   int at_start = 1;
824   while (*s) {
825     if (*s == XCS(' '))
826       ++blanks;
827     else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
828       return 0;
829     else {
830       if (at_start) {
831         at_start = 0;
832         if (blanks && ! is_cdata)
833           /* illegal leading blanks */
834           return 0;
835       } else if (blanks > 1 && ! is_cdata)
836         return 0;
837       blanks = 0;
838     }
839     ++s;
840   }
841   if (blanks && ! is_cdata)
842     return 0;
843   return 1;
844 }
845 
846 /* Check the attribute whitespace checker: */
847 START_TEST(test_helper_is_whitespace_normalized) {
848   assert(is_whitespace_normalized(XCS("abc"), 0));
849   assert(is_whitespace_normalized(XCS("abc"), 1));
850   assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
851   assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
852   assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
853   assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
854   assert(! is_whitespace_normalized(XCS("abc  def ghi"), 0));
855   assert(is_whitespace_normalized(XCS("abc  def ghi"), 1));
856   assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
857   assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
858   assert(! is_whitespace_normalized(XCS(" "), 0));
859   assert(is_whitespace_normalized(XCS(" "), 1));
860   assert(! is_whitespace_normalized(XCS("\t"), 0));
861   assert(! is_whitespace_normalized(XCS("\t"), 1));
862   assert(! is_whitespace_normalized(XCS("\n"), 0));
863   assert(! is_whitespace_normalized(XCS("\n"), 1));
864   assert(! is_whitespace_normalized(XCS("\r"), 0));
865   assert(! is_whitespace_normalized(XCS("\r"), 1));
866   assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
867 }
868 END_TEST
869 
870 static void XMLCALL
871 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
872                                           const XML_Char **atts) {
873   int i;
874   UNUSED_P(userData);
875   UNUSED_P(name);
876   for (i = 0; atts[i] != NULL; i += 2) {
877     const XML_Char *attrname = atts[i];
878     const XML_Char *value = atts[i + 1];
879     if (xcstrcmp(XCS("attr"), attrname) == 0
880         || xcstrcmp(XCS("ents"), attrname) == 0
881         || xcstrcmp(XCS("refs"), attrname) == 0) {
882       if (! is_whitespace_normalized(value, 0)) {
883         char buffer[256];
884         snprintf(buffer, sizeof(buffer),
885                  "attribute value not normalized: %" XML_FMT_STR
886                  "='%" XML_FMT_STR "'",
887                  attrname, value);
888         fail(buffer);
889       }
890     }
891   }
892 }
893 
894 START_TEST(test_attr_whitespace_normalization) {
895   const char *text
896       = "<!DOCTYPE doc [\n"
897         "  <!ATTLIST doc\n"
898         "            attr NMTOKENS #REQUIRED\n"
899         "            ents ENTITIES #REQUIRED\n"
900         "            refs IDREFS   #REQUIRED>\n"
901         "]>\n"
902         "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
903         "     ents=' ent-1   \t\r\n"
904         "            ent-2  ' >\n"
905         "  <e id='id-1'/>\n"
906         "  <e id='id-2'/>\n"
907         "</doc>";
908 
909   XML_SetStartElementHandler(g_parser,
910                              check_attr_contains_normalized_whitespace);
911   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
912       == XML_STATUS_ERROR)
913     xml_failure(g_parser);
914 }
915 END_TEST
916 
917 /*
918  * XML declaration tests.
919  */
920 
921 START_TEST(test_xmldecl_misplaced) {
922   expect_failure("\n"
923                  "<?xml version='1.0'?>\n"
924                  "<a/>",
925                  XML_ERROR_MISPLACED_XML_PI,
926                  "failed to report misplaced XML declaration");
927 }
928 END_TEST
929 
930 START_TEST(test_xmldecl_invalid) {
931   expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
932                  "Failed to report invalid XML declaration");
933 }
934 END_TEST
935 
936 START_TEST(test_xmldecl_missing_attr) {
937   expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
938                  "Failed to report missing XML declaration attribute");
939 }
940 END_TEST
941 
942 START_TEST(test_xmldecl_missing_value) {
943   expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
944                  "<doc/>",
945                  XML_ERROR_XML_DECL,
946                  "Failed to report missing attribute value");
947 }
948 END_TEST
949 
950 /* Regression test for SF bug #584832. */
951 START_TEST(test_unknown_encoding_internal_entity) {
952   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
953                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
954                      "<test a='&foo;'/>";
955 
956   XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
957   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
958       == XML_STATUS_ERROR)
959     xml_failure(g_parser);
960 }
961 END_TEST
962 
963 /* Test unrecognised encoding handler */
964 START_TEST(test_unrecognised_encoding_internal_entity) {
965   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
966                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
967                      "<test a='&foo;'/>";
968 
969   XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
970   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
971       != XML_STATUS_ERROR)
972     fail("Unrecognised encoding not rejected");
973 }
974 END_TEST
975 
976 /* Regression test for SF bug #620106. */
977 START_TEST(test_ext_entity_set_encoding) {
978   const char *text = "<!DOCTYPE doc [\n"
979                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
980                      "]>\n"
981                      "<doc>&en;</doc>";
982   ExtTest test_data
983       = {/* This text says it's an unsupported encoding, but it's really
984             UTF-8, which we tell Expat using XML_SetEncoding().
985          */
986          "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
987 #ifdef XML_UNICODE
988   const XML_Char *expected = XCS("\x00e9");
989 #else
990   const XML_Char *expected = XCS("\xc3\xa9");
991 #endif
992 
993   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
994   run_ext_character_check(text, &test_data, expected);
995 }
996 END_TEST
997 
998 /* Test external entities with no handler */
999 START_TEST(test_ext_entity_no_handler) {
1000   const char *text = "<!DOCTYPE doc [\n"
1001                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1002                      "]>\n"
1003                      "<doc>&en;</doc>";
1004 
1005   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1006   run_character_check(text, XCS(""));
1007 }
1008 END_TEST
1009 
1010 /* Test UTF-8 BOM is accepted */
1011 START_TEST(test_ext_entity_set_bom) {
1012   const char *text = "<!DOCTYPE doc [\n"
1013                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1014                      "]>\n"
1015                      "<doc>&en;</doc>";
1016   ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1017                        "<?xml encoding='iso-8859-3'?>"
1018                        "\xC3\xA9",
1019                        XCS("utf-8"), NULL};
1020 #ifdef XML_UNICODE
1021   const XML_Char *expected = XCS("\x00e9");
1022 #else
1023   const XML_Char *expected = XCS("\xc3\xa9");
1024 #endif
1025 
1026   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1027   run_ext_character_check(text, &test_data, expected);
1028 }
1029 END_TEST
1030 
1031 /* Test that bad encodings are faulted */
1032 START_TEST(test_ext_entity_bad_encoding) {
1033   const char *text = "<!DOCTYPE doc [\n"
1034                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1035                      "]>\n"
1036                      "<doc>&en;</doc>";
1037   ExtFaults fault
1038       = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1039          XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1040 
1041   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1042   XML_SetUserData(g_parser, &fault);
1043   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1044                  "Bad encoding should not have been accepted");
1045 }
1046 END_TEST
1047 
1048 /* Try handing an invalid encoding to an external entity parser */
1049 START_TEST(test_ext_entity_bad_encoding_2) {
1050   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1051                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1052                      "<doc>&entity;</doc>";
1053   ExtFaults fault
1054       = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1055          XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1056 
1057   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1058   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1059   XML_SetUserData(g_parser, &fault);
1060   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1061                  "Bad encoding not faulted in external entity handler");
1062 }
1063 END_TEST
1064 
1065 /* Test that no error is reported for unknown entities if we don't
1066    read an external subset.  This was fixed in Expat 1.95.5.
1067 */
1068 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1069   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1070                      "<doc>&entity;</doc>";
1071 
1072   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1073       == XML_STATUS_ERROR)
1074     xml_failure(g_parser);
1075 }
1076 END_TEST
1077 
1078 /* Test that an error is reported for unknown entities if we don't
1079    have an external subset.
1080 */
1081 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1082   expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1083                  "Parser did not report undefined entity w/out a DTD.");
1084 }
1085 END_TEST
1086 
1087 /* Test that an error is reported for unknown entities if we don't
1088    read an external subset, but have been declared standalone.
1089 */
1090 START_TEST(test_wfc_undeclared_entity_standalone) {
1091   const char *text
1092       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1093         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1094         "<doc>&entity;</doc>";
1095 
1096   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1097                  "Parser did not report undefined entity (standalone).");
1098 }
1099 END_TEST
1100 
1101 /* Test that an error is reported for unknown entities if we have read
1102    an external subset, and standalone is true.
1103 */
1104 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1105   const char *text
1106       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1107         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1108         "<doc>&entity;</doc>";
1109   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1110 
1111   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1112   XML_SetUserData(g_parser, &test_data);
1113   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1114   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1115                  "Parser did not report undefined entity (external DTD).");
1116 }
1117 END_TEST
1118 
1119 /* Test that external entity handling is not done if the parsing flag
1120  * is set to UNLESS_STANDALONE
1121  */
1122 START_TEST(test_entity_with_external_subset_unless_standalone) {
1123   const char *text
1124       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1125         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1126         "<doc>&entity;</doc>";
1127   ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1128 
1129   XML_SetParamEntityParsing(g_parser,
1130                             XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1131   XML_SetUserData(g_parser, &test_data);
1132   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1133   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1134                  "Parser did not report undefined entity");
1135 }
1136 END_TEST
1137 
1138 /* Test that no error is reported for unknown entities if we have read
1139    an external subset, and standalone is false.
1140 */
1141 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1142   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1143                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1144                      "<doc>&entity;</doc>";
1145   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1146 
1147   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1148   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1149   run_ext_character_check(text, &test_data, XCS(""));
1150 }
1151 END_TEST
1152 
1153 /* Test that an error is reported if our NotStandalone handler fails */
1154 START_TEST(test_not_standalone_handler_reject) {
1155   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1156                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1157                      "<doc>&entity;</doc>";
1158   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1159 
1160   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1161   XML_SetUserData(g_parser, &test_data);
1162   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1163   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1164   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1165                  "NotStandalone handler failed to reject");
1166 
1167   /* Try again but without external entity handling */
1168   XML_ParserReset(g_parser, NULL);
1169   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1170   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1171                  "NotStandalone handler failed to reject");
1172 }
1173 END_TEST
1174 
1175 /* Test that no error is reported if our NotStandalone handler succeeds */
1176 START_TEST(test_not_standalone_handler_accept) {
1177   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1178                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1179                      "<doc>&entity;</doc>";
1180   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1181 
1182   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1183   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1184   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1185   run_ext_character_check(text, &test_data, XCS(""));
1186 
1187   /* Repeat without the external entity handler */
1188   XML_ParserReset(g_parser, NULL);
1189   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1190   run_character_check(text, XCS(""));
1191 }
1192 END_TEST
1193 
1194 START_TEST(test_wfc_no_recursive_entity_refs) {
1195   const char *text = "<!DOCTYPE doc [\n"
1196                      "  <!ENTITY entity '&#38;entity;'>\n"
1197                      "]>\n"
1198                      "<doc>&entity;</doc>";
1199 
1200   expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1201                  "Parser did not report recursive entity reference.");
1202 }
1203 END_TEST
1204 
1205 /* Test incomplete external entities are faulted */
1206 START_TEST(test_ext_entity_invalid_parse) {
1207   const char *text = "<!DOCTYPE doc [\n"
1208                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1209                      "]>\n"
1210                      "<doc>&en;</doc>";
1211   const ExtFaults faults[]
1212       = {{"<", "Incomplete element declaration not faulted", NULL,
1213           XML_ERROR_UNCLOSED_TOKEN},
1214          {"<\xe2\x82", /* First two bytes of a three-byte char */
1215           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1216          {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1217           XML_ERROR_PARTIAL_CHAR},
1218          {NULL, NULL, NULL, XML_ERROR_NONE}};
1219   const ExtFaults *fault = faults;
1220 
1221   for (; fault->parse_text != NULL; fault++) {
1222     set_subtest("\"%s\"", fault->parse_text);
1223     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1224     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1225     XML_SetUserData(g_parser, (void *)fault);
1226     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1227                    "Parser did not report external entity error");
1228     XML_ParserReset(g_parser, NULL);
1229   }
1230 }
1231 END_TEST
1232 
1233 /* Regression test for SF bug #483514. */
1234 START_TEST(test_dtd_default_handling) {
1235   const char *text = "<!DOCTYPE doc [\n"
1236                      "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1237                      "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1238                      "<!ELEMENT doc EMPTY>\n"
1239                      "<!ATTLIST doc a CDATA #IMPLIED>\n"
1240                      "<?pi in dtd?>\n"
1241                      "<!--comment in dtd-->\n"
1242                      "]><doc/>";
1243 
1244   XML_SetDefaultHandler(g_parser, accumulate_characters);
1245   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1246   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1247   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1248   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1249   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1250   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1251   XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1252   XML_SetCommentHandler(g_parser, dummy_comment_handler);
1253   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1254   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1255   run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1256 }
1257 END_TEST
1258 
1259 /* Test handling of attribute declarations */
1260 START_TEST(test_dtd_attr_handling) {
1261   const char *prolog = "<!DOCTYPE doc [\n"
1262                        "<!ELEMENT doc EMPTY>\n";
1263   AttTest attr_data[]
1264       = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1265           "]>"
1266           "<doc a='two'/>",
1267           XCS("doc"), XCS("a"),
1268           XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1269           NULL, XML_TRUE},
1270          {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1271           "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1272           "]>"
1273           "<doc/>",
1274           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1275          {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1276           "]>"
1277           "<doc/>",
1278           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1279          {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1280           "]>"
1281           "<doc/>",
1282           XCS("doc"), XCS("a"), XCS("CDATA"),
1283 #ifdef XML_UNICODE
1284           XCS("\x06f2"),
1285 #else
1286           XCS("\xdb\xb2"),
1287 #endif
1288           XML_FALSE},
1289          {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1290   AttTest *test;
1291 
1292   for (test = attr_data; test->definition != NULL; test++) {
1293     set_subtest("%s", test->definition);
1294     XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1295     XML_SetUserData(g_parser, test);
1296     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1297                                 XML_FALSE)
1298         == XML_STATUS_ERROR)
1299       xml_failure(g_parser);
1300     if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1301                                 (int)strlen(test->definition), XML_TRUE)
1302         == XML_STATUS_ERROR)
1303       xml_failure(g_parser);
1304     XML_ParserReset(g_parser, NULL);
1305   }
1306 }
1307 END_TEST
1308 
1309 /* See related SF bug #673791.
1310    When namespace processing is enabled, setting the namespace URI for
1311    a prefix is not allowed; this test ensures that it *is* allowed
1312    when namespace processing is not enabled.
1313    (See Namespaces in XML, section 2.)
1314 */
1315 START_TEST(test_empty_ns_without_namespaces) {
1316   const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1317                      "  <e xmlns:prefix=''/>\n"
1318                      "</doc>";
1319 
1320   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1321       == XML_STATUS_ERROR)
1322     xml_failure(g_parser);
1323 }
1324 END_TEST
1325 
1326 /* Regression test for SF bug #824420.
1327    Checks that an xmlns:prefix attribute set in an attribute's default
1328    value isn't misinterpreted.
1329 */
1330 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1331   const char *text = "<!DOCTYPE e:element [\n"
1332                      "  <!ATTLIST e:element\n"
1333                      "    xmlns:e CDATA 'http://example.org/'>\n"
1334                      "      ]>\n"
1335                      "<e:element/>";
1336 
1337   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1338       == XML_STATUS_ERROR)
1339     xml_failure(g_parser);
1340 }
1341 END_TEST
1342 
1343 /* Regression test for SF bug #1515266: missing check of stopped
1344    parser in doContext() 'for' loop. */
1345 START_TEST(test_stop_parser_between_char_data_calls) {
1346   /* The sample data must be big enough that there are two calls to
1347      the character data handler from within the inner "for" loop of
1348      the XML_TOK_DATA_CHARS case in doContent(), and the character
1349      handler must stop the parser and clear the character data
1350      handler.
1351   */
1352   const char *text = long_character_data_text;
1353 
1354   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1355   g_resumable = XML_FALSE;
1356   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1357       != XML_STATUS_ERROR)
1358     xml_failure(g_parser);
1359   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1360     xml_failure(g_parser);
1361 }
1362 END_TEST
1363 
1364 /* Regression test for SF bug #1515266: missing check of stopped
1365    parser in doContext() 'for' loop. */
1366 START_TEST(test_suspend_parser_between_char_data_calls) {
1367   /* The sample data must be big enough that there are two calls to
1368      the character data handler from within the inner "for" loop of
1369      the XML_TOK_DATA_CHARS case in doContent(), and the character
1370      handler must stop the parser and clear the character data
1371      handler.
1372   */
1373   const char *text = long_character_data_text;
1374 
1375   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1376   g_resumable = XML_TRUE;
1377   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1378       != XML_STATUS_SUSPENDED)
1379     xml_failure(g_parser);
1380   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1381     xml_failure(g_parser);
1382   /* Try parsing directly */
1383   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1384       != XML_STATUS_ERROR)
1385     fail("Attempt to continue parse while suspended not faulted");
1386   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1387     fail("Suspended parse not faulted with correct error");
1388 }
1389 END_TEST
1390 
1391 /* Test repeated calls to XML_StopParser are handled correctly */
1392 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1393   const char *text = long_character_data_text;
1394 
1395   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1396   g_resumable = XML_FALSE;
1397   g_abortable = XML_FALSE;
1398   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1399       != XML_STATUS_ERROR)
1400     fail("Failed to double-stop parser");
1401 
1402   XML_ParserReset(g_parser, NULL);
1403   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1404   g_resumable = XML_TRUE;
1405   g_abortable = XML_FALSE;
1406   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1407       != XML_STATUS_SUSPENDED)
1408     fail("Failed to double-suspend parser");
1409 
1410   XML_ParserReset(g_parser, NULL);
1411   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1412   g_resumable = XML_TRUE;
1413   g_abortable = XML_TRUE;
1414   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1415       != XML_STATUS_ERROR)
1416     fail("Failed to suspend-abort parser");
1417 }
1418 END_TEST
1419 
1420 START_TEST(test_good_cdata_ascii) {
1421   const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1422   const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1423 
1424   CharData storage;
1425   CharData_Init(&storage);
1426   XML_SetUserData(g_parser, &storage);
1427   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1428   /* Add start and end handlers for coverage */
1429   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1430   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1431 
1432   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1433       == XML_STATUS_ERROR)
1434     xml_failure(g_parser);
1435   CharData_CheckXMLChars(&storage, expected);
1436 
1437   /* Try again, this time with a default handler */
1438   XML_ParserReset(g_parser, NULL);
1439   CharData_Init(&storage);
1440   XML_SetUserData(g_parser, &storage);
1441   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1442   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1443 
1444   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1445       == XML_STATUS_ERROR)
1446     xml_failure(g_parser);
1447   CharData_CheckXMLChars(&storage, expected);
1448 }
1449 END_TEST
1450 
1451 START_TEST(test_good_cdata_utf16) {
1452   /* Test data is:
1453    *   <?xml version='1.0' encoding='utf-16'?>
1454    *   <a><![CDATA[hello]]></a>
1455    */
1456   const char text[]
1457       = "\0<\0?\0x\0m\0l\0"
1458         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1459         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1460         "1\0"
1461         "6\0'"
1462         "\0?\0>\0\n"
1463         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1464   const XML_Char *expected = XCS("hello");
1465 
1466   CharData storage;
1467   CharData_Init(&storage);
1468   XML_SetUserData(g_parser, &storage);
1469   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1470 
1471   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1472       == XML_STATUS_ERROR)
1473     xml_failure(g_parser);
1474   CharData_CheckXMLChars(&storage, expected);
1475 }
1476 END_TEST
1477 
1478 START_TEST(test_good_cdata_utf16_le) {
1479   /* Test data is:
1480    *   <?xml version='1.0' encoding='utf-16'?>
1481    *   <a><![CDATA[hello]]></a>
1482    */
1483   const char text[]
1484       = "<\0?\0x\0m\0l\0"
1485         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1486         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1487         "1\0"
1488         "6\0'"
1489         "\0?\0>\0\n"
1490         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1491   const XML_Char *expected = XCS("hello");
1492 
1493   CharData storage;
1494   CharData_Init(&storage);
1495   XML_SetUserData(g_parser, &storage);
1496   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1497 
1498   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1499       == XML_STATUS_ERROR)
1500     xml_failure(g_parser);
1501   CharData_CheckXMLChars(&storage, expected);
1502 }
1503 END_TEST
1504 
1505 /* Test UTF16 conversion of a long cdata string */
1506 
1507 /* 16 characters: handy macro to reduce visual clutter */
1508 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1509 
1510 START_TEST(test_long_cdata_utf16) {
1511   /* Test data is:
1512    * <?xlm version='1.0' encoding='utf-16'?>
1513    * <a><![CDATA[
1514    * ABCDEFGHIJKLMNOP
1515    * ]]></a>
1516    */
1517   const char text[]
1518       = "\0<\0?\0x\0m\0l\0 "
1519         "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1520         "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1521         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1522       /* 64 characters per line */
1523       /* clang-format off */
1524         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1525         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1526         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1527         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1528         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1529         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1530         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1531         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1532         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1533         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1534         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1535         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1536         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1537         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1538         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1539         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1540         A_TO_P_IN_UTF16
1541         /* clang-format on */
1542         "\0]\0]\0>\0<\0/\0a\0>";
1543   const XML_Char *expected =
1544       /* clang-format off */
1545         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1546         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1547         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1548         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1549         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1550         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1551         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1552         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1553         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1554         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1555         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1556         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1557         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1558         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1559         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1560         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1561         XCS("ABCDEFGHIJKLMNOP");
1562   /* clang-format on */
1563   CharData storage;
1564   void *buffer;
1565 
1566   CharData_Init(&storage);
1567   XML_SetUserData(g_parser, &storage);
1568   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1569   buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1570   if (buffer == NULL)
1571     fail("Could not allocate parse buffer");
1572   assert(buffer != NULL);
1573   memcpy(buffer, text, sizeof(text) - 1);
1574   if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1575     xml_failure(g_parser);
1576   CharData_CheckXMLChars(&storage, expected);
1577 }
1578 END_TEST
1579 
1580 /* Test handling of multiple unit UTF-16 characters */
1581 START_TEST(test_multichar_cdata_utf16) {
1582   /* Test data is:
1583    *   <?xml version='1.0' encoding='utf-16'?>
1584    *   <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1585    *
1586    * where {MINIM} is U+1d15e (a minim or half-note)
1587    *   UTF-16: 0xd834 0xdd5e
1588    *   UTF-8:  0xf0 0x9d 0x85 0x9e
1589    * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1590    *   UTF-16: 0xd834 0xdd5f
1591    *   UTF-8:  0xf0 0x9d 0x85 0x9f
1592    */
1593   const char text[] = "\0<\0?\0x\0m\0l\0"
1594                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1595                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1596                       "1\0"
1597                       "6\0'"
1598                       "\0?\0>\0\n"
1599                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1600                       "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1601                       "\0]\0]\0>\0<\0/\0a\0>";
1602 #ifdef XML_UNICODE
1603   const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1604 #else
1605   const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1606 #endif
1607   CharData storage;
1608 
1609   CharData_Init(&storage);
1610   XML_SetUserData(g_parser, &storage);
1611   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1612 
1613   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1614       == XML_STATUS_ERROR)
1615     xml_failure(g_parser);
1616   CharData_CheckXMLChars(&storage, expected);
1617 }
1618 END_TEST
1619 
1620 /* Test that an element name with a UTF-16 surrogate pair is rejected */
1621 START_TEST(test_utf16_bad_surrogate_pair) {
1622   /* Test data is:
1623    *   <?xml version='1.0' encoding='utf-16'?>
1624    *   <a><![CDATA[{BADLINB}]]></a>
1625    *
1626    * where {BADLINB} is U+10000 (the first Linear B character)
1627    * with the UTF-16 surrogate pair in the wrong order, i.e.
1628    *   0xdc00 0xd800
1629    */
1630   const char text[] = "\0<\0?\0x\0m\0l\0"
1631                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1632                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1633                       "1\0"
1634                       "6\0'"
1635                       "\0?\0>\0\n"
1636                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1637                       "\xdc\x00\xd8\x00"
1638                       "\0]\0]\0>\0<\0/\0a\0>";
1639 
1640   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1641       != XML_STATUS_ERROR)
1642     fail("Reversed UTF-16 surrogate pair not faulted");
1643   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1644     xml_failure(g_parser);
1645 }
1646 END_TEST
1647 
1648 START_TEST(test_bad_cdata) {
1649   struct CaseData {
1650     const char *text;
1651     enum XML_Error expectedError;
1652   };
1653 
1654   struct CaseData cases[]
1655       = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1656          {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1657          {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1658          {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1659          {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1660          {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1661          {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1662          {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1663 
1664          {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1665          {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1666          {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1667 
1668          {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1669          {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN},  /* ?! */
1670          {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1671          {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1672          {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1673          {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1674          {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1675 
1676          {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1677          {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1678          {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1679 
1680   size_t i = 0;
1681   for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1682     set_subtest("%s", cases[i].text);
1683     const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1684         g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1685     const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1686 
1687     assert(actualStatus == XML_STATUS_ERROR);
1688 
1689     if (actualError != cases[i].expectedError) {
1690       char message[100];
1691       snprintf(message, sizeof(message),
1692                "Expected error %d but got error %d for case %u: \"%s\"\n",
1693                cases[i].expectedError, actualError, (unsigned int)i + 1,
1694                cases[i].text);
1695       fail(message);
1696     }
1697 
1698     XML_ParserReset(g_parser, NULL);
1699   }
1700 }
1701 END_TEST
1702 
1703 /* Test failures in UTF-16 CDATA */
1704 START_TEST(test_bad_cdata_utf16) {
1705   struct CaseData {
1706     size_t text_bytes;
1707     const char *text;
1708     enum XML_Error expected_error;
1709   };
1710 
1711   const char prolog[] = "\0<\0?\0x\0m\0l\0"
1712                         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1713                         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1714                         "1\0"
1715                         "6\0'"
1716                         "\0?\0>\0\n"
1717                         "\0<\0a\0>";
1718   struct CaseData cases[] = {
1719       {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1720       {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1721       {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1722       {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1723       {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1724       {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1725       {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1726       {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1727       {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1728       {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1729       {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1730       {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1731       {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1732       {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1733       {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1734       {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1735       {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1736       {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1737       {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1738       {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1739       /* Now add a four-byte UTF-16 character */
1740       {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1741        XML_ERROR_UNCLOSED_CDATA_SECTION},
1742       {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1743       {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1744        XML_ERROR_PARTIAL_CHAR},
1745       {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1746        XML_ERROR_UNCLOSED_CDATA_SECTION}};
1747   size_t i;
1748 
1749   for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1750     set_subtest("case %lu", (long unsigned)(i + 1));
1751     enum XML_Status actual_status;
1752     enum XML_Error actual_error;
1753 
1754     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1755                                 XML_FALSE)
1756         == XML_STATUS_ERROR)
1757       xml_failure(g_parser);
1758     actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1759                                             (int)cases[i].text_bytes, XML_TRUE);
1760     assert(actual_status == XML_STATUS_ERROR);
1761     actual_error = XML_GetErrorCode(g_parser);
1762     if (actual_error != cases[i].expected_error) {
1763       char message[1024];
1764 
1765       snprintf(message, sizeof(message),
1766                "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1767                ") for case %lu\n",
1768                cases[i].expected_error,
1769                XML_ErrorString(cases[i].expected_error), actual_error,
1770                XML_ErrorString(actual_error), (long unsigned)(i + 1));
1771       fail(message);
1772     }
1773     XML_ParserReset(g_parser, NULL);
1774   }
1775 }
1776 END_TEST
1777 
1778 /* Test stopping the parser in cdata handler */
1779 START_TEST(test_stop_parser_between_cdata_calls) {
1780   const char *text = long_cdata_text;
1781 
1782   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1783   g_resumable = XML_FALSE;
1784   expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1785 }
1786 END_TEST
1787 
1788 /* Test suspending the parser in cdata handler */
1789 START_TEST(test_suspend_parser_between_cdata_calls) {
1790   const char *text = long_cdata_text;
1791   enum XML_Status result;
1792 
1793   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1794   g_resumable = XML_TRUE;
1795   result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
1796   if (result != XML_STATUS_SUSPENDED) {
1797     if (result == XML_STATUS_ERROR)
1798       xml_failure(g_parser);
1799     fail("Parse not suspended in CDATA handler");
1800   }
1801   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1802     xml_failure(g_parser);
1803 }
1804 END_TEST
1805 
1806 /* Test memory allocation functions */
1807 START_TEST(test_memory_allocation) {
1808   char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1809   char *p;
1810 
1811   if (buffer == NULL) {
1812     fail("Allocation failed");
1813   } else {
1814     /* Try writing to memory; some OSes try to cheat! */
1815     buffer[0] = 'T';
1816     buffer[1] = 'E';
1817     buffer[2] = 'S';
1818     buffer[3] = 'T';
1819     buffer[4] = '\0';
1820     if (strcmp(buffer, "TEST") != 0) {
1821       fail("Memory not writable");
1822     } else {
1823       p = (char *)XML_MemRealloc(g_parser, buffer, 512);
1824       if (p == NULL) {
1825         fail("Reallocation failed");
1826       } else {
1827         /* Write again, just to be sure */
1828         buffer = p;
1829         buffer[0] = 'V';
1830         if (strcmp(buffer, "VEST") != 0) {
1831           fail("Reallocated memory not writable");
1832         }
1833       }
1834     }
1835     XML_MemFree(g_parser, buffer);
1836   }
1837 }
1838 END_TEST
1839 
1840 /* Test XML_DefaultCurrent() passes handling on correctly */
1841 START_TEST(test_default_current) {
1842   const char *text = "<doc>hell]</doc>";
1843   const char *entity_text = "<!DOCTYPE doc [\n"
1844                             "<!ENTITY entity '&#37;'>\n"
1845                             "]>\n"
1846                             "<doc>&entity;</doc>";
1847 
1848   set_subtest("with defaulting");
1849   {
1850     struct handler_record_list storage;
1851     storage.count = 0;
1852     XML_SetDefaultHandler(g_parser, record_default_handler);
1853     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1854     XML_SetUserData(g_parser, &storage);
1855     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1856         == XML_STATUS_ERROR)
1857       xml_failure(g_parser);
1858     int i = 0;
1859     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1860     // we should have gotten one or more cdata callbacks, totaling 5 chars
1861     int cdata_len_remaining = 5;
1862     while (cdata_len_remaining > 0) {
1863       const struct handler_record_entry *c_entry
1864           = handler_record_get(&storage, i++);
1865       assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
1866       assert_true(c_entry->arg > 0);
1867       assert_true(c_entry->arg <= cdata_len_remaining);
1868       cdata_len_remaining -= c_entry->arg;
1869       // default handler must follow, with the exact same len argument.
1870       assert_record_handler_called(&storage, i++, "record_default_handler",
1871                                    c_entry->arg);
1872     }
1873     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1874     assert_true(storage.count == i);
1875   }
1876 
1877   /* Again, without the defaulting */
1878   set_subtest("no defaulting");
1879   {
1880     struct handler_record_list storage;
1881     storage.count = 0;
1882     XML_ParserReset(g_parser, NULL);
1883     XML_SetDefaultHandler(g_parser, record_default_handler);
1884     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
1885     XML_SetUserData(g_parser, &storage);
1886     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1887         == XML_STATUS_ERROR)
1888       xml_failure(g_parser);
1889     int i = 0;
1890     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1891     // we should have gotten one or more cdata callbacks, totaling 5 chars
1892     int cdata_len_remaining = 5;
1893     while (cdata_len_remaining > 0) {
1894       const struct handler_record_entry *c_entry
1895           = handler_record_get(&storage, i++);
1896       assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
1897       assert_true(c_entry->arg > 0);
1898       assert_true(c_entry->arg <= cdata_len_remaining);
1899       cdata_len_remaining -= c_entry->arg;
1900     }
1901     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1902     assert_true(storage.count == i);
1903   }
1904 
1905   /* Now with an internal entity to complicate matters */
1906   set_subtest("with internal entity");
1907   {
1908     struct handler_record_list storage;
1909     storage.count = 0;
1910     XML_ParserReset(g_parser, NULL);
1911     XML_SetDefaultHandler(g_parser, record_default_handler);
1912     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1913     XML_SetUserData(g_parser, &storage);
1914     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1915                                 XML_TRUE)
1916         == XML_STATUS_ERROR)
1917       xml_failure(g_parser);
1918     /* The default handler suppresses the entity */
1919     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1920     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1921     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1922     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1923     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1924     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1925     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1926     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1927     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1928     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1929     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1930     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1931     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1932     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1933     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1934     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1935     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1936     assert_record_handler_called(&storage, 17, "record_default_handler", 8);
1937     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1938     assert_true(storage.count == 19);
1939   }
1940 
1941   /* Again, with a skip handler */
1942   set_subtest("with skip handler");
1943   {
1944     struct handler_record_list storage;
1945     storage.count = 0;
1946     XML_ParserReset(g_parser, NULL);
1947     XML_SetDefaultHandler(g_parser, record_default_handler);
1948     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1949     XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
1950     XML_SetUserData(g_parser, &storage);
1951     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1952                                 XML_TRUE)
1953         == XML_STATUS_ERROR)
1954       xml_failure(g_parser);
1955     /* The default handler suppresses the entity */
1956     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1957     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1958     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1959     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1960     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1961     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1962     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1963     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1964     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1965     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1966     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1967     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1968     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1969     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1970     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1971     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1972     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1973     assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
1974     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1975     assert_true(storage.count == 19);
1976   }
1977 
1978   /* This time, allow the entity through */
1979   set_subtest("allow entity");
1980   {
1981     struct handler_record_list storage;
1982     storage.count = 0;
1983     XML_ParserReset(g_parser, NULL);
1984     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
1985     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1986     XML_SetUserData(g_parser, &storage);
1987     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1988                                 XML_TRUE)
1989         == XML_STATUS_ERROR)
1990       xml_failure(g_parser);
1991     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1992     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1993     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1994     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1995     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1996     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1997     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1998     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1999     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2000     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2001     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2002     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2003     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2004     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2005     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2006     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2007     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2008     assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2009     assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2010     assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2011     assert_true(storage.count == 20);
2012   }
2013 
2014   /* Finally, without passing the cdata to the default handler */
2015   set_subtest("not passing cdata");
2016   {
2017     struct handler_record_list storage;
2018     storage.count = 0;
2019     XML_ParserReset(g_parser, NULL);
2020     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2021     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2022     XML_SetUserData(g_parser, &storage);
2023     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2024                                 XML_TRUE)
2025         == XML_STATUS_ERROR)
2026       xml_failure(g_parser);
2027     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2028     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2029     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2030     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2031     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2032     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2033     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2034     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2035     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2036     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2037     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2038     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2039     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2040     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2041     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2042     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2043     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2044     assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2045                                  1);
2046     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2047     assert_true(storage.count == 19);
2048   }
2049 }
2050 END_TEST
2051 
2052 /* Test DTD element parsing code paths */
2053 START_TEST(test_dtd_elements) {
2054   const char *text = "<!DOCTYPE doc [\n"
2055                      "<!ELEMENT doc (chapter)>\n"
2056                      "<!ELEMENT chapter (#PCDATA)>\n"
2057                      "]>\n"
2058                      "<doc><chapter>Wombats are go</chapter></doc>";
2059 
2060   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2061   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2062       == XML_STATUS_ERROR)
2063     xml_failure(g_parser);
2064 }
2065 END_TEST
2066 
2067 static void XMLCALL
2068 element_decl_check_model(void *userData, const XML_Char *name,
2069                          XML_Content *model) {
2070   UNUSED_P(userData);
2071   uint32_t errorFlags = 0;
2072 
2073   /* Expected model array structure is this:
2074    * [0] (type 6, quant 0)
2075    *   [1] (type 5, quant 0)
2076    *     [3] (type 4, quant 0, name "bar")
2077    *     [4] (type 4, quant 0, name "foo")
2078    *     [5] (type 4, quant 3, name "xyz")
2079    *   [2] (type 4, quant 2, name "zebra")
2080    */
2081   errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2082   errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2083 
2084   if (model != NULL) {
2085     errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2086     errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2087     errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2088     errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2089     errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2090 
2091     errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2092     errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2093     errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2094     errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2095     errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2096 
2097     errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2098     errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2099     errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2100     errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2101     errorFlags
2102         |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2103 
2104     errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2105     errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2106     errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2107     errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2108     errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2109 
2110     errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2111     errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2112     errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2113     errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2114     errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2115 
2116     errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2117     errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2118     errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2119     errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2120     errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2121   }
2122 
2123   XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2124   XML_FreeContentModel(g_parser, model);
2125 }
2126 
2127 START_TEST(test_dtd_elements_nesting) {
2128   // Payload inspired by a test in Perl's XML::Parser
2129   const char *text = "<!DOCTYPE foo [\n"
2130                      "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2131                      "]>\n"
2132                      "<foo/>";
2133 
2134   XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2135 
2136   XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2137   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2138       == XML_STATUS_ERROR)
2139     xml_failure(g_parser);
2140 
2141   if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2142     fail("Element declaration model regression detected");
2143 }
2144 END_TEST
2145 
2146 /* Test foreign DTD handling */
2147 START_TEST(test_set_foreign_dtd) {
2148   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2149   const char *text2 = "<doc>&entity;</doc>";
2150   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2151 
2152   /* Check hash salt is passed through too */
2153   XML_SetHashSalt(g_parser, 0x12345678);
2154   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2155   XML_SetUserData(g_parser, &test_data);
2156   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2157   /* Add a default handler to exercise more code paths */
2158   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2159   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2160     fail("Could not set foreign DTD");
2161   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2162       == XML_STATUS_ERROR)
2163     xml_failure(g_parser);
2164 
2165   /* Ensure that trying to set the DTD after parsing has started
2166    * is faulted, even if it's the same setting.
2167    */
2168   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2169       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2170     fail("Failed to reject late foreign DTD setting");
2171   /* Ditto for the hash salt */
2172   if (XML_SetHashSalt(g_parser, 0x23456789))
2173     fail("Failed to reject late hash salt change");
2174 
2175   /* Now finish the parse */
2176   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2177       == XML_STATUS_ERROR)
2178     xml_failure(g_parser);
2179 }
2180 END_TEST
2181 
2182 /* Test foreign DTD handling with a failing NotStandalone handler */
2183 START_TEST(test_foreign_dtd_not_standalone) {
2184   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2185                      "<doc>&entity;</doc>";
2186   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2187 
2188   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2189   XML_SetUserData(g_parser, &test_data);
2190   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2191   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2192   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2193     fail("Could not set foreign DTD");
2194   expect_failure(text, XML_ERROR_NOT_STANDALONE,
2195                  "NotStandalonehandler failed to reject");
2196 }
2197 END_TEST
2198 
2199 /* Test invalid character in a foreign DTD is faulted */
2200 START_TEST(test_invalid_foreign_dtd) {
2201   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2202                      "<doc>&entity;</doc>";
2203   ExtFaults test_data
2204       = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2205 
2206   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2207   XML_SetUserData(g_parser, &test_data);
2208   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2209   XML_UseForeignDTD(g_parser, XML_TRUE);
2210   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2211                  "Bad DTD should not have been accepted");
2212 }
2213 END_TEST
2214 
2215 /* Test foreign DTD use with a doctype */
2216 START_TEST(test_foreign_dtd_with_doctype) {
2217   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2218                       "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2219   const char *text2 = "<doc>&entity;</doc>";
2220   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2221 
2222   /* Check hash salt is passed through too */
2223   XML_SetHashSalt(g_parser, 0x12345678);
2224   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2225   XML_SetUserData(g_parser, &test_data);
2226   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2227   /* Add a default handler to exercise more code paths */
2228   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2229   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2230     fail("Could not set foreign DTD");
2231   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2232       == XML_STATUS_ERROR)
2233     xml_failure(g_parser);
2234 
2235   /* Ensure that trying to set the DTD after parsing has started
2236    * is faulted, even if it's the same setting.
2237    */
2238   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2239       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2240     fail("Failed to reject late foreign DTD setting");
2241   /* Ditto for the hash salt */
2242   if (XML_SetHashSalt(g_parser, 0x23456789))
2243     fail("Failed to reject late hash salt change");
2244 
2245   /* Now finish the parse */
2246   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2247       == XML_STATUS_ERROR)
2248     xml_failure(g_parser);
2249 }
2250 END_TEST
2251 
2252 /* Test XML_UseForeignDTD with no external subset present */
2253 START_TEST(test_foreign_dtd_without_external_subset) {
2254   const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2255                      "<doc>&foo;</doc>";
2256 
2257   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2258   XML_SetUserData(g_parser, NULL);
2259   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2260   XML_UseForeignDTD(g_parser, XML_TRUE);
2261   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2262       == XML_STATUS_ERROR)
2263     xml_failure(g_parser);
2264 }
2265 END_TEST
2266 
2267 START_TEST(test_empty_foreign_dtd) {
2268   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2269                      "<doc>&entity;</doc>";
2270 
2271   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2272   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2273   XML_UseForeignDTD(g_parser, XML_TRUE);
2274   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2275                  "Undefined entity not faulted");
2276 }
2277 END_TEST
2278 
2279 /* Test XML Base is set and unset appropriately */
2280 START_TEST(test_set_base) {
2281   const XML_Char *old_base;
2282   const XML_Char *new_base = XCS("/local/file/name.xml");
2283 
2284   old_base = XML_GetBase(g_parser);
2285   if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2286     fail("Unable to set base");
2287   if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2288     fail("Base setting not correct");
2289   if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2290     fail("Unable to NULL base");
2291   if (XML_GetBase(g_parser) != NULL)
2292     fail("Base setting not nulled");
2293   XML_SetBase(g_parser, old_base);
2294 }
2295 END_TEST
2296 
2297 /* Test attribute counts, indexing, etc */
2298 START_TEST(test_attributes) {
2299   const char *text = "<!DOCTYPE doc [\n"
2300                      "<!ELEMENT doc (tag)>\n"
2301                      "<!ATTLIST doc id ID #REQUIRED>\n"
2302                      "]>"
2303                      "<doc a='1' id='one' b='2'>"
2304                      "<tag c='3'/>"
2305                      "</doc>";
2306   AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2307                          {XCS("b"), XCS("2")},
2308                          {XCS("id"), XCS("one")},
2309                          {NULL, NULL}};
2310   AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2311   ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2312                         {XCS("tag"), 1, NULL, NULL},
2313                         {NULL, 0, NULL, NULL}};
2314   info[0].attributes = doc_info;
2315   info[1].attributes = tag_info;
2316 
2317   XML_SetStartElementHandler(g_parser, counting_start_element_handler);
2318   XML_SetUserData(g_parser, info);
2319   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2320       == XML_STATUS_ERROR)
2321     xml_failure(g_parser);
2322 }
2323 END_TEST
2324 
2325 /* Test reset works correctly in the middle of processing an internal
2326  * entity.  Exercises some obscure code in XML_ParserReset().
2327  */
2328 START_TEST(test_reset_in_entity) {
2329   const char *text = "<!DOCTYPE doc [\n"
2330                      "<!ENTITY wombat 'wom'>\n"
2331                      "<!ENTITY entity 'hi &wom; there'>\n"
2332                      "]>\n"
2333                      "<doc>&entity;</doc>";
2334   XML_ParsingStatus status;
2335 
2336   g_resumable = XML_TRUE;
2337   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2338   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2339       == XML_STATUS_ERROR)
2340     xml_failure(g_parser);
2341   XML_GetParsingStatus(g_parser, &status);
2342   if (status.parsing != XML_SUSPENDED)
2343     fail("Parsing status not SUSPENDED");
2344   XML_ParserReset(g_parser, NULL);
2345   XML_GetParsingStatus(g_parser, &status);
2346   if (status.parsing != XML_INITIALIZED)
2347     fail("Parsing status doesn't reset to INITIALIZED");
2348 }
2349 END_TEST
2350 
2351 /* Test that resume correctly passes through parse errors */
2352 START_TEST(test_resume_invalid_parse) {
2353   const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2354 
2355   g_resumable = XML_TRUE;
2356   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2357   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2358       == XML_STATUS_ERROR)
2359     xml_failure(g_parser);
2360   if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2361     fail("Resumed invalid parse not faulted");
2362   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2363     fail("Invalid parse not correctly faulted");
2364 }
2365 END_TEST
2366 
2367 /* Test that re-suspended parses are correctly passed through */
2368 START_TEST(test_resume_resuspended) {
2369   const char *text = "<doc>Hello<meep/>world</doc>";
2370 
2371   g_resumable = XML_TRUE;
2372   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2373   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2374       == XML_STATUS_ERROR)
2375     xml_failure(g_parser);
2376   g_resumable = XML_TRUE;
2377   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2378   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2379     fail("Resumption not suspended");
2380   /* This one should succeed and finish up */
2381   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2382     xml_failure(g_parser);
2383 }
2384 END_TEST
2385 
2386 /* Test that CDATA shows up correctly through a default handler */
2387 START_TEST(test_cdata_default) {
2388   const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2389   const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2390   CharData storage;
2391 
2392   CharData_Init(&storage);
2393   XML_SetUserData(g_parser, &storage);
2394   XML_SetDefaultHandler(g_parser, accumulate_characters);
2395 
2396   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2397       == XML_STATUS_ERROR)
2398     xml_failure(g_parser);
2399   CharData_CheckXMLChars(&storage, expected);
2400 }
2401 END_TEST
2402 
2403 /* Test resetting a subordinate parser does exactly nothing */
2404 START_TEST(test_subordinate_reset) {
2405   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2406                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2407                      "<doc>&entity;</doc>";
2408 
2409   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2410   XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2411   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2412       == XML_STATUS_ERROR)
2413     xml_failure(g_parser);
2414 }
2415 END_TEST
2416 
2417 /* Test suspending a subordinate parser */
2418 START_TEST(test_subordinate_suspend) {
2419   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2420                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2421                      "<doc>&entity;</doc>";
2422 
2423   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2424   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2425   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2426       == XML_STATUS_ERROR)
2427     xml_failure(g_parser);
2428 }
2429 END_TEST
2430 
2431 /* Test suspending a subordinate parser from an XML declaration */
2432 /* Increases code coverage of the tests */
2433 
2434 START_TEST(test_subordinate_xdecl_suspend) {
2435   const char *text
2436       = "<!DOCTYPE doc [\n"
2437         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2438         "]>\n"
2439         "<doc>&entity;</doc>";
2440 
2441   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2442   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2443   g_resumable = XML_TRUE;
2444   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2445       == XML_STATUS_ERROR)
2446     xml_failure(g_parser);
2447 }
2448 END_TEST
2449 
2450 START_TEST(test_subordinate_xdecl_abort) {
2451   const char *text
2452       = "<!DOCTYPE doc [\n"
2453         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2454         "]>\n"
2455         "<doc>&entity;</doc>";
2456 
2457   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2458   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2459   g_resumable = XML_FALSE;
2460   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2461       == XML_STATUS_ERROR)
2462     xml_failure(g_parser);
2463 }
2464 END_TEST
2465 
2466 /* Test external entity fault handling with suspension */
2467 START_TEST(test_ext_entity_invalid_suspended_parse) {
2468   const char *text = "<!DOCTYPE doc [\n"
2469                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2470                      "]>\n"
2471                      "<doc>&en;</doc>";
2472   ExtFaults faults[]
2473       = {{"<?xml version='1.0' encoding='us-ascii'?><",
2474           "Incomplete element declaration not faulted", NULL,
2475           XML_ERROR_UNCLOSED_TOKEN},
2476          {/* First two bytes of a three-byte char */
2477           "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2478           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2479          {NULL, NULL, NULL, XML_ERROR_NONE}};
2480   ExtFaults *fault;
2481 
2482   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2483     set_subtest("%s", fault->parse_text);
2484     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2485     XML_SetExternalEntityRefHandler(g_parser,
2486                                     external_entity_suspending_faulter);
2487     XML_SetUserData(g_parser, fault);
2488     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2489                    "Parser did not report external entity error");
2490     XML_ParserReset(g_parser, NULL);
2491   }
2492 }
2493 END_TEST
2494 
2495 /* Test setting an explicit encoding */
2496 START_TEST(test_explicit_encoding) {
2497   const char *text1 = "<doc>Hello ";
2498   const char *text2 = " World</doc>";
2499 
2500   /* Just check that we can set the encoding to NULL before starting */
2501   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2502     fail("Failed to initialise encoding to NULL");
2503   /* Say we are UTF-8 */
2504   if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2505     fail("Failed to set explicit encoding");
2506   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2507       == XML_STATUS_ERROR)
2508     xml_failure(g_parser);
2509   /* Try to switch encodings mid-parse */
2510   if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2511     fail("Allowed encoding change");
2512   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2513       == XML_STATUS_ERROR)
2514     xml_failure(g_parser);
2515   /* Try now the parse is over */
2516   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2517     fail("Failed to unset encoding");
2518 }
2519 END_TEST
2520 
2521 /* Test handling of trailing CR (rather than newline) */
2522 START_TEST(test_trailing_cr) {
2523   const char *text = "<doc>\r";
2524   int found_cr;
2525 
2526   /* Try with a character handler, for code coverage */
2527   XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2528   XML_SetUserData(g_parser, &found_cr);
2529   found_cr = 0;
2530   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2531       == XML_STATUS_OK)
2532     fail("Failed to fault unclosed doc");
2533   if (found_cr == 0)
2534     fail("Did not catch the carriage return");
2535   XML_ParserReset(g_parser, NULL);
2536 
2537   /* Now with a default handler instead */
2538   XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2539   XML_SetUserData(g_parser, &found_cr);
2540   found_cr = 0;
2541   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2542       == XML_STATUS_OK)
2543     fail("Failed to fault unclosed doc");
2544   if (found_cr == 0)
2545     fail("Did not catch default carriage return");
2546 }
2547 END_TEST
2548 
2549 /* Test trailing CR in an external entity parse */
2550 START_TEST(test_ext_entity_trailing_cr) {
2551   const char *text = "<!DOCTYPE doc [\n"
2552                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2553                      "]>\n"
2554                      "<doc>&en;</doc>";
2555   int found_cr;
2556 
2557   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2558   XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2559   XML_SetUserData(g_parser, &found_cr);
2560   found_cr = 0;
2561   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2562       != XML_STATUS_OK)
2563     xml_failure(g_parser);
2564   if (found_cr == 0)
2565     fail("No carriage return found");
2566   XML_ParserReset(g_parser, NULL);
2567 
2568   /* Try again with a different trailing CR */
2569   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2570   XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2571   XML_SetUserData(g_parser, &found_cr);
2572   found_cr = 0;
2573   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2574       != XML_STATUS_OK)
2575     xml_failure(g_parser);
2576   if (found_cr == 0)
2577     fail("No carriage return found");
2578 }
2579 END_TEST
2580 
2581 /* Test handling of trailing square bracket */
2582 START_TEST(test_trailing_rsqb) {
2583   const char *text8 = "<doc>]";
2584   const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2585   int found_rsqb;
2586   int text8_len = (int)strlen(text8);
2587 
2588   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2589   XML_SetUserData(g_parser, &found_rsqb);
2590   found_rsqb = 0;
2591   if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2592       == XML_STATUS_OK)
2593     fail("Failed to fault unclosed doc");
2594   if (found_rsqb == 0)
2595     fail("Did not catch the right square bracket");
2596 
2597   /* Try again with a different encoding */
2598   XML_ParserReset(g_parser, NULL);
2599   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2600   XML_SetUserData(g_parser, &found_rsqb);
2601   found_rsqb = 0;
2602   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2603                               XML_TRUE)
2604       == XML_STATUS_OK)
2605     fail("Failed to fault unclosed doc");
2606   if (found_rsqb == 0)
2607     fail("Did not catch the right square bracket");
2608 
2609   /* And finally with a default handler */
2610   XML_ParserReset(g_parser, NULL);
2611   XML_SetDefaultHandler(g_parser, rsqb_handler);
2612   XML_SetUserData(g_parser, &found_rsqb);
2613   found_rsqb = 0;
2614   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2615                               XML_TRUE)
2616       == XML_STATUS_OK)
2617     fail("Failed to fault unclosed doc");
2618   if (found_rsqb == 0)
2619     fail("Did not catch the right square bracket");
2620 }
2621 END_TEST
2622 
2623 /* Test trailing right square bracket in an external entity parse */
2624 START_TEST(test_ext_entity_trailing_rsqb) {
2625   const char *text = "<!DOCTYPE doc [\n"
2626                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2627                      "]>\n"
2628                      "<doc>&en;</doc>";
2629   int found_rsqb;
2630 
2631   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2632   XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2633   XML_SetUserData(g_parser, &found_rsqb);
2634   found_rsqb = 0;
2635   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2636       != XML_STATUS_OK)
2637     xml_failure(g_parser);
2638   if (found_rsqb == 0)
2639     fail("No right square bracket found");
2640 }
2641 END_TEST
2642 
2643 /* Test CDATA handling in an external entity */
2644 START_TEST(test_ext_entity_good_cdata) {
2645   const char *text = "<!DOCTYPE doc [\n"
2646                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2647                      "]>\n"
2648                      "<doc>&en;</doc>";
2649 
2650   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2651   XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2652   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2653       != XML_STATUS_OK)
2654     xml_failure(g_parser);
2655 }
2656 END_TEST
2657 
2658 /* Test user parameter settings */
2659 START_TEST(test_user_parameters) {
2660   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2661                      "<!-- Primary parse -->\n"
2662                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2663                      "<doc>&entity;";
2664   const char *epilog = "<!-- Back to primary parser -->\n"
2665                        "</doc>";
2666 
2667   g_comment_count = 0;
2668   g_skip_count = 0;
2669   g_xdecl_count = 0;
2670   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2671   XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2672   XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2673   XML_SetCommentHandler(g_parser, data_check_comment_handler);
2674   XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2675   XML_UseParserAsHandlerArg(g_parser);
2676   XML_SetUserData(g_parser, (void *)1);
2677   g_handler_data = g_parser;
2678   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2679       == XML_STATUS_ERROR)
2680     xml_failure(g_parser);
2681   /* Ensure we can't change policy mid-parse */
2682   if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2683     fail("Changed param entity parsing policy while parsing");
2684   if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2685       == XML_STATUS_ERROR)
2686     xml_failure(g_parser);
2687   if (g_comment_count != 3)
2688     fail("Comment handler not invoked enough times");
2689   if (g_skip_count != 1)
2690     fail("Skip handler not invoked enough times");
2691   if (g_xdecl_count != 1)
2692     fail("XML declaration handler not invoked");
2693 }
2694 END_TEST
2695 
2696 /* Test that an explicit external entity handler argument replaces
2697  * the parser as the first argument.
2698  *
2699  * We do not call the first parameter to the external entity handler
2700  * 'parser' for once, since the first time the handler is called it
2701  * will actually be a text string.  We need to be able to access the
2702  * global 'parser' variable to create our external entity parser from,
2703  * since there are code paths we need to ensure get executed.
2704  */
2705 START_TEST(test_ext_entity_ref_parameter) {
2706   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2707                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2708                      "<doc>&entity;</doc>";
2709 
2710   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2711   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2712   /* Set a handler arg that is not NULL and not parser (which is
2713    * what NULL would cause to be passed.
2714    */
2715   XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2716   g_handler_data = text;
2717   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2718       == XML_STATUS_ERROR)
2719     xml_failure(g_parser);
2720 
2721   /* Now try again with unset args */
2722   XML_ParserReset(g_parser, NULL);
2723   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2724   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2725   XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2726   g_handler_data = g_parser;
2727   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2728       == XML_STATUS_ERROR)
2729     xml_failure(g_parser);
2730 }
2731 END_TEST
2732 
2733 /* Test the parsing of an empty string */
2734 START_TEST(test_empty_parse) {
2735   const char *text = "<doc></doc>";
2736   const char *partial = "<doc>";
2737 
2738   if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2739     fail("Parsing empty string faulted");
2740   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2741     fail("Parsing final empty string not faulted");
2742   if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2743     fail("Parsing final empty string faulted for wrong reason");
2744 
2745   /* Now try with valid text before the empty end */
2746   XML_ParserReset(g_parser, NULL);
2747   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2748       == XML_STATUS_ERROR)
2749     xml_failure(g_parser);
2750   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2751     fail("Parsing final empty string faulted");
2752 
2753   /* Now try with invalid text before the empty end */
2754   XML_ParserReset(g_parser, NULL);
2755   if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2756                               XML_FALSE)
2757       == XML_STATUS_ERROR)
2758     xml_failure(g_parser);
2759   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2760     fail("Parsing final incomplete empty string not faulted");
2761 }
2762 END_TEST
2763 
2764 /* Test odd corners of the XML_GetBuffer interface */
2765 static enum XML_Status
2766 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
2767   const XML_Feature *feature = XML_GetFeatureList();
2768 
2769   if (feature == NULL)
2770     return XML_STATUS_ERROR;
2771   for (; feature->feature != XML_FEATURE_END; feature++) {
2772     if (feature->feature == feature_id) {
2773       *presult = feature->value;
2774       return XML_STATUS_OK;
2775     }
2776   }
2777   return XML_STATUS_ERROR;
2778 }
2779 
2780 /* Test odd corners of the XML_GetBuffer interface */
2781 START_TEST(test_get_buffer_1) {
2782   const char *text = get_buffer_test_text;
2783   void *buffer;
2784   long context_bytes;
2785 
2786   /* Attempt to allocate a negative length buffer */
2787   if (XML_GetBuffer(g_parser, -12) != NULL)
2788     fail("Negative length buffer not failed");
2789 
2790   /* Now get a small buffer and extend it past valid length */
2791   buffer = XML_GetBuffer(g_parser, 1536);
2792   if (buffer == NULL)
2793     fail("1.5K buffer failed");
2794   assert(buffer != NULL);
2795   memcpy(buffer, text, strlen(text));
2796   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2797       == XML_STATUS_ERROR)
2798     xml_failure(g_parser);
2799   if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
2800     fail("INT_MAX buffer not failed");
2801 
2802   /* Now try extending it a more reasonable but still too large
2803    * amount.  The allocator in XML_GetBuffer() doubles the buffer
2804    * size until it exceeds the requested amount or INT_MAX.  If it
2805    * exceeds INT_MAX, it rejects the request, so we want a request
2806    * between INT_MAX and INT_MAX/2.  A gap of 1K seems comfortable,
2807    * with an extra byte just to ensure that the request is off any
2808    * boundary.  The request will be inflated internally by
2809    * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
2810    * request.
2811    */
2812   if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
2813     context_bytes = 0;
2814   if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
2815     fail("INT_MAX- buffer not failed");
2816 
2817   /* Now try extending it a carefully crafted amount */
2818   if (XML_GetBuffer(g_parser, 1000) == NULL)
2819     fail("1000 buffer failed");
2820 }
2821 END_TEST
2822 
2823 /* Test more corners of the XML_GetBuffer interface */
2824 START_TEST(test_get_buffer_2) {
2825   const char *text = get_buffer_test_text;
2826   void *buffer;
2827 
2828   /* Now get a decent buffer */
2829   buffer = XML_GetBuffer(g_parser, 1536);
2830   if (buffer == NULL)
2831     fail("1.5K buffer failed");
2832   assert(buffer != NULL);
2833   memcpy(buffer, text, strlen(text));
2834   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2835       == XML_STATUS_ERROR)
2836     xml_failure(g_parser);
2837 
2838   /* Extend it, to catch a different code path */
2839   if (XML_GetBuffer(g_parser, 1024) == NULL)
2840     fail("1024 buffer failed");
2841 }
2842 END_TEST
2843 
2844 /* Test for signed integer overflow CVE-2022-23852 */
2845 #if XML_CONTEXT_BYTES > 0
2846 START_TEST(test_get_buffer_3_overflow) {
2847   XML_Parser parser = XML_ParserCreate(NULL);
2848   assert(parser != NULL);
2849 
2850   const char *const text = "\n";
2851   const int expectedKeepValue = (int)strlen(text);
2852 
2853   // After this call, variable "keep" in XML_GetBuffer will
2854   // have value expectedKeepValue
2855   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
2856                               XML_FALSE /* isFinal */)
2857       == XML_STATUS_ERROR)
2858     xml_failure(parser);
2859 
2860   assert(expectedKeepValue > 0);
2861   if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
2862     fail("enlarging buffer not failed");
2863 
2864   XML_ParserFree(parser);
2865 }
2866 END_TEST
2867 #endif // XML_CONTEXT_BYTES > 0
2868 
2869 START_TEST(test_buffer_can_grow_to_max) {
2870   const char *const prefixes[] = {
2871       "",
2872       "<",
2873       "<x a='",
2874       "<doc><x a='",
2875       "<document><x a='",
2876       "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
2877       "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
2878       "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
2879       "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
2880       "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
2881   const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
2882   int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
2883 #if defined(__MINGW32__) && ! defined(__MINGW64__)
2884   // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
2885   // Can we make a big allocation?
2886   void *big = malloc(maxbuf);
2887   if (! big) {
2888     // The big allocation failed. Let's be a little lenient.
2889     maxbuf = maxbuf / 2;
2890   }
2891   free(big);
2892 #endif
2893 
2894   for (int i = 0; i < num_prefixes; ++i) {
2895     set_subtest("\"%s\"", prefixes[i]);
2896     XML_Parser parser = XML_ParserCreate(NULL);
2897     const int prefix_len = (int)strlen(prefixes[i]);
2898     const enum XML_Status s
2899         = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
2900     if (s != XML_STATUS_OK)
2901       xml_failure(parser);
2902 
2903     // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
2904     // subtracting the whole prefix is easiest, and close enough.
2905     assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
2906     // The limit should be consistent; no prefix should allow us to
2907     // reach above the max buffer size.
2908     assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
2909     XML_ParserFree(parser);
2910   }
2911 }
2912 END_TEST
2913 
2914 START_TEST(test_getbuffer_allocates_on_zero_len) {
2915   for (int first_len = 1; first_len >= 0; first_len--) {
2916     set_subtest("with len=%d first", first_len);
2917     XML_Parser parser = XML_ParserCreate(NULL);
2918     assert_true(parser != NULL);
2919     assert_true(XML_GetBuffer(parser, first_len) != NULL);
2920     assert_true(XML_GetBuffer(parser, 0) != NULL);
2921     if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
2922       xml_failure(parser);
2923     XML_ParserFree(parser);
2924   }
2925 }
2926 END_TEST
2927 
2928 /* Test position information macros */
2929 START_TEST(test_byte_info_at_end) {
2930   const char *text = "<doc></doc>";
2931 
2932   if (XML_GetCurrentByteIndex(g_parser) != -1
2933       || XML_GetCurrentByteCount(g_parser) != 0)
2934     fail("Byte index/count incorrect at start of parse");
2935   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2936       == XML_STATUS_ERROR)
2937     xml_failure(g_parser);
2938   /* At end, the count will be zero and the index the end of string */
2939   if (XML_GetCurrentByteCount(g_parser) != 0)
2940     fail("Terminal byte count incorrect");
2941   if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
2942     fail("Terminal byte index incorrect");
2943 }
2944 END_TEST
2945 
2946 /* Test position information from errors */
2947 #define PRE_ERROR_STR "<doc></"
2948 #define POST_ERROR_STR "wombat></doc>"
2949 START_TEST(test_byte_info_at_error) {
2950   const char *text = PRE_ERROR_STR POST_ERROR_STR;
2951 
2952   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2953       == XML_STATUS_OK)
2954     fail("Syntax error not faulted");
2955   if (XML_GetCurrentByteCount(g_parser) != 0)
2956     fail("Error byte count incorrect");
2957   if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
2958     fail("Error byte index incorrect");
2959 }
2960 END_TEST
2961 #undef PRE_ERROR_STR
2962 #undef POST_ERROR_STR
2963 
2964 /* Test position information in handler */
2965 #define START_ELEMENT "<e>"
2966 #define CDATA_TEXT "Hello"
2967 #define END_ELEMENT "</e>"
2968 START_TEST(test_byte_info_at_cdata) {
2969   const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
2970   int offset, size;
2971   ByteTestData data;
2972 
2973   /* Check initial context is empty */
2974   if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
2975     fail("Unexpected context at start of parse");
2976 
2977   data.start_element_len = (int)strlen(START_ELEMENT);
2978   data.cdata_len = (int)strlen(CDATA_TEXT);
2979   data.total_string_len = (int)strlen(text);
2980   XML_SetCharacterDataHandler(g_parser, byte_character_handler);
2981   XML_SetUserData(g_parser, &data);
2982   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
2983     xml_failure(g_parser);
2984 }
2985 END_TEST
2986 #undef START_ELEMENT
2987 #undef CDATA_TEXT
2988 #undef END_ELEMENT
2989 
2990 /* Test predefined entities are correctly recognised */
2991 START_TEST(test_predefined_entities) {
2992   const char *text = "<doc>&lt;&gt;&amp;&quot;&apos;</doc>";
2993   const XML_Char *expected = XCS("<doc>&lt;&gt;&amp;&quot;&apos;</doc>");
2994   const XML_Char *result = XCS("<>&\"'");
2995   CharData storage;
2996 
2997   XML_SetDefaultHandler(g_parser, accumulate_characters);
2998   /* run_character_check uses XML_SetCharacterDataHandler(), which
2999    * unfortunately heads off a code path that we need to exercise.
3000    */
3001   CharData_Init(&storage);
3002   XML_SetUserData(g_parser, &storage);
3003   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3004       == XML_STATUS_ERROR)
3005     xml_failure(g_parser);
3006   /* The default handler doesn't translate the entities */
3007   CharData_CheckXMLChars(&storage, expected);
3008 
3009   /* Now try again and check the translation */
3010   XML_ParserReset(g_parser, NULL);
3011   run_character_check(text, result);
3012 }
3013 END_TEST
3014 
3015 /* Regression test that an invalid tag in an external parameter
3016  * reference in an external DTD is correctly faulted.
3017  *
3018  * Only a few specific tags are legal in DTDs ignoring comments and
3019  * processing instructions, all of which begin with an exclamation
3020  * mark.  "<el/>" is not one of them, so the parser should raise an
3021  * error on encountering it.
3022  */
3023 START_TEST(test_invalid_tag_in_dtd) {
3024   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3025                      "<doc></doc>\n";
3026 
3027   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3028   XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3029   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3030                  "Invalid tag IN DTD external param not rejected");
3031 }
3032 END_TEST
3033 
3034 /* Test entities not quite the predefined ones are not mis-recognised */
3035 START_TEST(test_not_predefined_entities) {
3036   const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3037                         "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3038   int i = 0;
3039 
3040   while (text[i] != NULL) {
3041     expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3042                    "Undefined entity not rejected");
3043     XML_ParserReset(g_parser, NULL);
3044     i++;
3045   }
3046 }
3047 END_TEST
3048 
3049 /* Test conditional inclusion (IGNORE) */
3050 START_TEST(test_ignore_section) {
3051   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3052                      "<doc><e>&entity;</e></doc>";
3053   const XML_Char *expected
3054       = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3055   CharData storage;
3056 
3057   CharData_Init(&storage);
3058   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3059   XML_SetUserData(g_parser, &storage);
3060   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3061   XML_SetDefaultHandler(g_parser, accumulate_characters);
3062   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3063   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3064   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3065   XML_SetStartElementHandler(g_parser, dummy_start_element);
3066   XML_SetEndElementHandler(g_parser, dummy_end_element);
3067   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3068       == XML_STATUS_ERROR)
3069     xml_failure(g_parser);
3070   CharData_CheckXMLChars(&storage, expected);
3071 }
3072 END_TEST
3073 
3074 START_TEST(test_ignore_section_utf16) {
3075   const char text[] =
3076       /* <!DOCTYPE d SYSTEM 's'> */
3077       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3078       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3079       /* <d><e>&en;</e></d> */
3080       "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3081   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3082   CharData storage;
3083 
3084   CharData_Init(&storage);
3085   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3086   XML_SetUserData(g_parser, &storage);
3087   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3088   XML_SetDefaultHandler(g_parser, accumulate_characters);
3089   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3090   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3091   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3092   XML_SetStartElementHandler(g_parser, dummy_start_element);
3093   XML_SetEndElementHandler(g_parser, dummy_end_element);
3094   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3095       == XML_STATUS_ERROR)
3096     xml_failure(g_parser);
3097   CharData_CheckXMLChars(&storage, expected);
3098 }
3099 END_TEST
3100 
3101 START_TEST(test_ignore_section_utf16_be) {
3102   const char text[] =
3103       /* <!DOCTYPE d SYSTEM 's'> */
3104       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3105       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3106       /* <d><e>&en;</e></d> */
3107       "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3108   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3109   CharData storage;
3110 
3111   CharData_Init(&storage);
3112   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3113   XML_SetUserData(g_parser, &storage);
3114   XML_SetExternalEntityRefHandler(g_parser,
3115                                   external_entity_load_ignore_utf16_be);
3116   XML_SetDefaultHandler(g_parser, accumulate_characters);
3117   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3118   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3119   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3120   XML_SetStartElementHandler(g_parser, dummy_start_element);
3121   XML_SetEndElementHandler(g_parser, dummy_end_element);
3122   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3123       == XML_STATUS_ERROR)
3124     xml_failure(g_parser);
3125   CharData_CheckXMLChars(&storage, expected);
3126 }
3127 END_TEST
3128 
3129 /* Test mis-formatted conditional exclusion */
3130 START_TEST(test_bad_ignore_section) {
3131   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3132                      "<doc><e>&entity;</e></doc>";
3133   ExtFaults faults[]
3134       = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3135           XML_ERROR_SYNTAX},
3136          {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3137           XML_ERROR_INVALID_TOKEN},
3138          {/* FIrst two bytes of a three-byte char */
3139           "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3140           XML_ERROR_PARTIAL_CHAR},
3141          {NULL, NULL, NULL, XML_ERROR_NONE}};
3142   ExtFaults *fault;
3143 
3144   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3145     set_subtest("%s", fault->parse_text);
3146     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3147     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3148     XML_SetUserData(g_parser, fault);
3149     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3150                    "Incomplete IGNORE section not failed");
3151     XML_ParserReset(g_parser, NULL);
3152   }
3153 }
3154 END_TEST
3155 
3156 struct bom_testdata {
3157   const char *external;
3158   int split;
3159   XML_Bool nested_callback_happened;
3160 };
3161 
3162 static int XMLCALL
3163 external_bom_checker(XML_Parser parser, const XML_Char *context,
3164                      const XML_Char *base, const XML_Char *systemId,
3165                      const XML_Char *publicId) {
3166   const char *text;
3167   UNUSED_P(base);
3168   UNUSED_P(systemId);
3169   UNUSED_P(publicId);
3170 
3171   XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3172   if (ext_parser == NULL)
3173     fail("Could not create external entity parser");
3174 
3175   if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3176     struct bom_testdata *const testdata
3177         = (struct bom_testdata *)XML_GetUserData(parser);
3178     const char *const external = testdata->external;
3179     const int split = testdata->split;
3180     testdata->nested_callback_happened = XML_TRUE;
3181 
3182     if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3183         != XML_STATUS_OK) {
3184       xml_failure(ext_parser);
3185     }
3186     text = external + split; // the parse below will continue where we left off.
3187   } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3188     text = "<!ELEMENT doc EMPTY>\n"
3189            "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3190            "<!ENTITY % e2 '%e1;'>\n";
3191   } else {
3192     fail("unknown systemId");
3193   }
3194 
3195   if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3196       != XML_STATUS_OK)
3197     xml_failure(ext_parser);
3198 
3199   XML_ParserFree(ext_parser);
3200   return XML_STATUS_OK;
3201 }
3202 
3203 /* regression test: BOM should be consumed when followed by a partial token. */
3204 START_TEST(test_external_bom_consumed) {
3205   const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3206                            "<doc></doc>\n";
3207   const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3208   const int len = (int)strlen(external);
3209   for (int split = 0; split <= len; ++split) {
3210     set_subtest("split at byte %d", split);
3211 
3212     struct bom_testdata testdata;
3213     testdata.external = external;
3214     testdata.split = split;
3215     testdata.nested_callback_happened = XML_FALSE;
3216 
3217     XML_Parser parser = XML_ParserCreate(NULL);
3218     if (parser == NULL) {
3219       fail("Couldn't create parser");
3220     }
3221     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3222     XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3223     XML_SetUserData(parser, &testdata);
3224     if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3225         == XML_STATUS_ERROR)
3226       xml_failure(parser);
3227     if (! testdata.nested_callback_happened) {
3228       fail("ref handler not called");
3229     }
3230     XML_ParserFree(parser);
3231   }
3232 }
3233 END_TEST
3234 
3235 /* Test recursive parsing */
3236 START_TEST(test_external_entity_values) {
3237   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3238                      "<doc></doc>\n";
3239   ExtFaults data_004_2[] = {
3240       {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3241       {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3242        XML_ERROR_INVALID_TOKEN},
3243       {"'wombat", "Unterminated string not faulted", NULL,
3244        XML_ERROR_UNCLOSED_TOKEN},
3245       {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3246        XML_ERROR_PARTIAL_CHAR},
3247       {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3248       {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3249        XML_ERROR_XML_DECL},
3250       {/* UTF-8 BOM */
3251        "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3252        XML_ERROR_NONE},
3253       {"<?xml version='1.0' encoding='utf-8'?>\n$",
3254        "Invalid token after text declaration not faulted", NULL,
3255        XML_ERROR_INVALID_TOKEN},
3256       {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3257        "Unterminated string after text decl not faulted", NULL,
3258        XML_ERROR_UNCLOSED_TOKEN},
3259       {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3260        "Partial UTF-8 character after text decl not faulted", NULL,
3261        XML_ERROR_PARTIAL_CHAR},
3262       {"%e1;", "Recursive parameter entity not faulted", NULL,
3263        XML_ERROR_RECURSIVE_ENTITY_REF},
3264       {NULL, NULL, NULL, XML_ERROR_NONE}};
3265   int i;
3266 
3267   for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3268     set_subtest("%s", data_004_2[i].parse_text);
3269     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3270     XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3271     XML_SetUserData(g_parser, &data_004_2[i]);
3272     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3273         == XML_STATUS_ERROR)
3274       xml_failure(g_parser);
3275     XML_ParserReset(g_parser, NULL);
3276   }
3277 }
3278 END_TEST
3279 
3280 /* Test the recursive parse interacts with a not standalone handler */
3281 START_TEST(test_ext_entity_not_standalone) {
3282   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3283                      "<doc></doc>";
3284 
3285   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3286   XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3287   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3288                  "Standalone rejection not caught");
3289 }
3290 END_TEST
3291 
3292 START_TEST(test_ext_entity_value_abort) {
3293   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3294                      "<doc></doc>\n";
3295 
3296   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3297   XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3298   g_resumable = XML_FALSE;
3299   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3300       == XML_STATUS_ERROR)
3301     xml_failure(g_parser);
3302 }
3303 END_TEST
3304 
3305 START_TEST(test_bad_public_doctype) {
3306   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3307                      "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3308                      "<doc></doc>";
3309 
3310   /* Setting a handler provokes a particular code path */
3311   XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3312                             dummy_end_doctype_handler);
3313   expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3314 }
3315 END_TEST
3316 
3317 /* Test based on ibm/valid/P32/ibm32v04.xml */
3318 START_TEST(test_attribute_enum_value) {
3319   const char *text = "<?xml version='1.0' standalone='no'?>\n"
3320                      "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3321                      "<animal>This is a \n    <a/>  \n\nyellow tiger</animal>";
3322   ExtTest dtd_data
3323       = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3324          "<!ELEMENT a EMPTY>\n"
3325          "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3326          NULL, NULL};
3327   const XML_Char *expected = XCS("This is a \n      \n\nyellow tiger");
3328 
3329   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3330   XML_SetUserData(g_parser, &dtd_data);
3331   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3332   /* An attribute list handler provokes a different code path */
3333   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3334   run_ext_character_check(text, &dtd_data, expected);
3335 }
3336 END_TEST
3337 
3338 /* Slightly bizarrely, the library seems to silently ignore entity
3339  * definitions for predefined entities, even when they are wrong.  The
3340  * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3341  * to happen, so this is currently treated as acceptable.
3342  */
3343 START_TEST(test_predefined_entity_redefinition) {
3344   const char *text = "<!DOCTYPE doc [\n"
3345                      "<!ENTITY apos 'foo'>\n"
3346                      "]>\n"
3347                      "<doc>&apos;</doc>";
3348   run_character_check(text, XCS("'"));
3349 }
3350 END_TEST
3351 
3352 /* Test that the parser stops processing the DTD after an unresolved
3353  * parameter entity is encountered.
3354  */
3355 START_TEST(test_dtd_stop_processing) {
3356   const char *text = "<!DOCTYPE doc [\n"
3357                      "%foo;\n"
3358                      "<!ENTITY bar 'bas'>\n"
3359                      "]><doc/>";
3360 
3361   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3362   init_dummy_handlers();
3363   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3364       == XML_STATUS_ERROR)
3365     xml_failure(g_parser);
3366   if (get_dummy_handler_flags() != 0)
3367     fail("DTD processing still going after undefined PE");
3368 }
3369 END_TEST
3370 
3371 /* Test public notations with no system ID */
3372 START_TEST(test_public_notation_no_sysid) {
3373   const char *text = "<!DOCTYPE doc [\n"
3374                      "<!NOTATION note PUBLIC 'foo'>\n"
3375                      "<!ELEMENT doc EMPTY>\n"
3376                      "]>\n<doc/>";
3377 
3378   init_dummy_handlers();
3379   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3380   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3381       == XML_STATUS_ERROR)
3382     xml_failure(g_parser);
3383   if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3384     fail("Notation declaration handler not called");
3385 }
3386 END_TEST
3387 
3388 START_TEST(test_nested_groups) {
3389   const char *text
3390       = "<!DOCTYPE doc [\n"
3391         "<!ELEMENT doc "
3392         /* Sixteen elements per line */
3393         "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3394         "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3395         "))))))))))))))))))))))))))))))))>\n"
3396         "<!ELEMENT e EMPTY>"
3397         "]>\n"
3398         "<doc><e/></doc>";
3399   CharData storage;
3400 
3401   CharData_Init(&storage);
3402   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3403   XML_SetStartElementHandler(g_parser, record_element_start_handler);
3404   XML_SetUserData(g_parser, &storage);
3405   init_dummy_handlers();
3406   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3407       == XML_STATUS_ERROR)
3408     xml_failure(g_parser);
3409   CharData_CheckXMLChars(&storage, XCS("doce"));
3410   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3411     fail("Element handler not fired");
3412 }
3413 END_TEST
3414 
3415 START_TEST(test_group_choice) {
3416   const char *text = "<!DOCTYPE doc [\n"
3417                      "<!ELEMENT doc (a|b|c)+>\n"
3418                      "<!ELEMENT a EMPTY>\n"
3419                      "<!ELEMENT b (#PCDATA)>\n"
3420                      "<!ELEMENT c ANY>\n"
3421                      "]>\n"
3422                      "<doc>\n"
3423                      "<a/>\n"
3424                      "<b attr='foo'>This is a foo</b>\n"
3425                      "<c></c>\n"
3426                      "</doc>\n";
3427 
3428   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3429   init_dummy_handlers();
3430   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3431       == XML_STATUS_ERROR)
3432     xml_failure(g_parser);
3433   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3434     fail("Element handler flag not raised");
3435 }
3436 END_TEST
3437 
3438 START_TEST(test_standalone_parameter_entity) {
3439   const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3440                      "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3441                      "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3442                      "%entity;\n"
3443                      "]>\n"
3444                      "<doc></doc>";
3445   char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3446 
3447   XML_SetUserData(g_parser, dtd_data);
3448   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3449   XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3450   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3451       == XML_STATUS_ERROR)
3452     xml_failure(g_parser);
3453 }
3454 END_TEST
3455 
3456 /* Test skipping of parameter entity in an external DTD */
3457 /* Derived from ibm/invalid/P69/ibm69i01.xml */
3458 START_TEST(test_skipped_parameter_entity) {
3459   const char *text = "<?xml version='1.0'?>\n"
3460                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3461                      "<!ELEMENT root (#PCDATA|a)* >\n"
3462                      "]>\n"
3463                      "<root></root>";
3464   ExtTest dtd_data = {"%pe2;", NULL, NULL};
3465 
3466   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3467   XML_SetUserData(g_parser, &dtd_data);
3468   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3469   XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3470   init_dummy_handlers();
3471   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3472       == XML_STATUS_ERROR)
3473     xml_failure(g_parser);
3474   if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3475     fail("Skip handler not executed");
3476 }
3477 END_TEST
3478 
3479 /* Test recursive parameter entity definition rejected in external DTD */
3480 START_TEST(test_recursive_external_parameter_entity) {
3481   const char *text = "<?xml version='1.0'?>\n"
3482                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3483                      "<!ELEMENT root (#PCDATA|a)* >\n"
3484                      "]>\n"
3485                      "<root></root>";
3486   ExtFaults dtd_data = {"<!ENTITY % pe2 '&#37;pe2;'>\n%pe2;",
3487                         "Recursive external parameter entity not faulted", NULL,
3488                         XML_ERROR_RECURSIVE_ENTITY_REF};
3489 
3490   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3491   XML_SetUserData(g_parser, &dtd_data);
3492   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3493   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3494                  "Recursive external parameter not spotted");
3495 }
3496 END_TEST
3497 
3498 /* Test undefined parameter entity in external entity handler */
3499 START_TEST(test_undefined_ext_entity_in_external_dtd) {
3500   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3501                      "<doc></doc>\n";
3502 
3503   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3504   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3505   XML_SetUserData(g_parser, NULL);
3506   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3507       == XML_STATUS_ERROR)
3508     xml_failure(g_parser);
3509 
3510   /* Now repeat without the external entity ref handler invoking
3511    * another copy of itself.
3512    */
3513   XML_ParserReset(g_parser, NULL);
3514   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3515   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3516   XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3517   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3518       == XML_STATUS_ERROR)
3519     xml_failure(g_parser);
3520 }
3521 END_TEST
3522 
3523 /* Test suspending the parse on receiving an XML declaration works */
3524 START_TEST(test_suspend_xdecl) {
3525   const char *text = long_character_data_text;
3526 
3527   XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3528   XML_SetUserData(g_parser, g_parser);
3529   g_resumable = XML_TRUE;
3530   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3531       != XML_STATUS_SUSPENDED)
3532     xml_failure(g_parser);
3533   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3534     xml_failure(g_parser);
3535   /* Attempt to start a new parse while suspended */
3536   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3537       != XML_STATUS_ERROR)
3538     fail("Attempt to parse while suspended not faulted");
3539   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3540     fail("Suspended parse not faulted with correct error");
3541 }
3542 END_TEST
3543 
3544 /* Test aborting the parse in an epilog works */
3545 START_TEST(test_abort_epilog) {
3546   const char *text = "<doc></doc>\n\r\n";
3547   XML_Char trigger_char = XCS('\r');
3548 
3549   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3550   XML_SetUserData(g_parser, &trigger_char);
3551   g_resumable = XML_FALSE;
3552   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3553       != XML_STATUS_ERROR)
3554     fail("Abort not triggered");
3555   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3556     xml_failure(g_parser);
3557 }
3558 END_TEST
3559 
3560 /* Test a different code path for abort in the epilog */
3561 START_TEST(test_abort_epilog_2) {
3562   const char *text = "<doc></doc>\n";
3563   XML_Char trigger_char = XCS('\n');
3564 
3565   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3566   XML_SetUserData(g_parser, &trigger_char);
3567   g_resumable = XML_FALSE;
3568   expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3569 }
3570 END_TEST
3571 
3572 /* Test suspension from the epilog */
3573 START_TEST(test_suspend_epilog) {
3574   const char *text = "<doc></doc>\n";
3575   XML_Char trigger_char = XCS('\n');
3576 
3577   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3578   XML_SetUserData(g_parser, &trigger_char);
3579   g_resumable = XML_TRUE;
3580   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3581       != XML_STATUS_SUSPENDED)
3582     xml_failure(g_parser);
3583 }
3584 END_TEST
3585 
3586 START_TEST(test_suspend_in_sole_empty_tag) {
3587   const char *text = "<doc/>";
3588   enum XML_Status rc;
3589 
3590   XML_SetEndElementHandler(g_parser, suspending_end_handler);
3591   XML_SetUserData(g_parser, g_parser);
3592   rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3593   if (rc == XML_STATUS_ERROR)
3594     xml_failure(g_parser);
3595   else if (rc != XML_STATUS_SUSPENDED)
3596     fail("Suspend not triggered");
3597   rc = XML_ResumeParser(g_parser);
3598   if (rc == XML_STATUS_ERROR)
3599     xml_failure(g_parser);
3600   else if (rc != XML_STATUS_OK)
3601     fail("Resume failed");
3602 }
3603 END_TEST
3604 
3605 START_TEST(test_unfinished_epilog) {
3606   const char *text = "<doc></doc><";
3607 
3608   expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3609                  "Incomplete epilog entry not faulted");
3610 }
3611 END_TEST
3612 
3613 START_TEST(test_partial_char_in_epilog) {
3614   const char *text = "<doc></doc>\xe2\x82";
3615 
3616   /* First check that no fault is raised if the parse is not finished */
3617   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3618       == XML_STATUS_ERROR)
3619     xml_failure(g_parser);
3620   /* Now check that it is faulted once we finish */
3621   if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3622     fail("Partial character in epilog not faulted");
3623   if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3624     xml_failure(g_parser);
3625 }
3626 END_TEST
3627 
3628 /* Test resuming a parse suspended in entity substitution */
3629 START_TEST(test_suspend_resume_internal_entity) {
3630   const char *text
3631       = "<!DOCTYPE doc [\n"
3632         "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3633         "]>\n"
3634         "<doc>&foo;</doc>\n";
3635   const XML_Char *expected1 = XCS("Hi");
3636   const XML_Char *expected2 = XCS("HiHo");
3637   CharData storage;
3638 
3639   CharData_Init(&storage);
3640   XML_SetStartElementHandler(g_parser, start_element_suspender);
3641   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3642   XML_SetUserData(g_parser, &storage);
3643   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3644   // we won't know exactly how much input we actually managed to give Expat.
3645   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3646       != XML_STATUS_SUSPENDED)
3647     xml_failure(g_parser);
3648   CharData_CheckXMLChars(&storage, XCS(""));
3649   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3650     xml_failure(g_parser);
3651   CharData_CheckXMLChars(&storage, expected1);
3652   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3653     xml_failure(g_parser);
3654   CharData_CheckXMLChars(&storage, expected2);
3655 }
3656 END_TEST
3657 
3658 START_TEST(test_suspend_resume_internal_entity_issue_629) {
3659   const char *const text
3660       = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3661         "<"
3662         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3663         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3664         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3665         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3666         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3667         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3668         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3669         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3670         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3671         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3672         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3673         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3674         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3675         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3676         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3677         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3678         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3679         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3680         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3681         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3682         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3683         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3684         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3685         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3686         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3687         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3688         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3689         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3690         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3691         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3692         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3693         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3694         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3695         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3696         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3697         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3698         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3699         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3700         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3701         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3702         "/>"
3703         "</b></a>";
3704   const size_t firstChunkSizeBytes = 54;
3705 
3706   XML_Parser parser = XML_ParserCreate(NULL);
3707   XML_SetUserData(parser, parser);
3708   XML_SetCommentHandler(parser, suspending_comment_handler);
3709 
3710   if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3711       != XML_STATUS_SUSPENDED)
3712     xml_failure(parser);
3713   if (XML_ResumeParser(parser) != XML_STATUS_OK)
3714     xml_failure(parser);
3715   if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3716                               (int)(strlen(text) - firstChunkSizeBytes),
3717                               XML_TRUE)
3718       != XML_STATUS_OK)
3719     xml_failure(parser);
3720   XML_ParserFree(parser);
3721 }
3722 END_TEST
3723 
3724 /* Test syntax error is caught at parse resumption */
3725 START_TEST(test_resume_entity_with_syntax_error) {
3726   const char *text = "<!DOCTYPE doc [\n"
3727                      "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3728                      "]>\n"
3729                      "<doc>&foo;</doc>\n";
3730 
3731   XML_SetStartElementHandler(g_parser, start_element_suspender);
3732   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3733       != XML_STATUS_SUSPENDED)
3734     xml_failure(g_parser);
3735   if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
3736     fail("Syntax error in entity not faulted");
3737   if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
3738     xml_failure(g_parser);
3739 }
3740 END_TEST
3741 
3742 /* Test suspending and resuming in a parameter entity substitution */
3743 START_TEST(test_suspend_resume_parameter_entity) {
3744   const char *text = "<!DOCTYPE doc [\n"
3745                      "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
3746                      "%foo;\n"
3747                      "]>\n"
3748                      "<doc>Hello, world</doc>";
3749   const XML_Char *expected = XCS("Hello, world");
3750   CharData storage;
3751 
3752   CharData_Init(&storage);
3753   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3754   XML_SetElementDeclHandler(g_parser, element_decl_suspender);
3755   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3756   XML_SetUserData(g_parser, &storage);
3757   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3758       != XML_STATUS_SUSPENDED)
3759     xml_failure(g_parser);
3760   CharData_CheckXMLChars(&storage, XCS(""));
3761   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3762     xml_failure(g_parser);
3763   CharData_CheckXMLChars(&storage, expected);
3764 }
3765 END_TEST
3766 
3767 /* Test attempting to use parser after an error is faulted */
3768 START_TEST(test_restart_on_error) {
3769   const char *text = "<$doc><doc></doc>";
3770 
3771   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3772       != XML_STATUS_ERROR)
3773     fail("Invalid tag name not faulted");
3774   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3775     xml_failure(g_parser);
3776   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3777     fail("Restarting invalid parse not faulted");
3778   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3779     xml_failure(g_parser);
3780 }
3781 END_TEST
3782 
3783 /* Test that angle brackets in an attribute default value are faulted */
3784 START_TEST(test_reject_lt_in_attribute_value) {
3785   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
3786                      "<doc></doc>";
3787 
3788   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3789                  "Bad attribute default not faulted");
3790 }
3791 END_TEST
3792 
3793 START_TEST(test_reject_unfinished_param_in_att_value) {
3794   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
3795                      "<doc></doc>";
3796 
3797   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3798                  "Bad attribute default not faulted");
3799 }
3800 END_TEST
3801 
3802 START_TEST(test_trailing_cr_in_att_value) {
3803   const char *text = "<doc a='value\r'/>";
3804 
3805   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3806       == XML_STATUS_ERROR)
3807     xml_failure(g_parser);
3808 }
3809 END_TEST
3810 
3811 /* Try parsing a general entity within a parameter entity in a
3812  * standalone internal DTD.  Covers a corner case in the parser.
3813  */
3814 START_TEST(test_standalone_internal_entity) {
3815   const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
3816                      "<!DOCTYPE doc [\n"
3817                      "  <!ELEMENT doc (#PCDATA)>\n"
3818                      "  <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"&ge;\">'>\n"
3819                      "  <!ENTITY ge 'AttDefaultValue'>\n"
3820                      "  %pe;\n"
3821                      "]>\n"
3822                      "<doc att2='any'/>";
3823 
3824   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3825   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3826       == XML_STATUS_ERROR)
3827     xml_failure(g_parser);
3828 }
3829 END_TEST
3830 
3831 /* Test that a reference to an unknown external entity is skipped */
3832 START_TEST(test_skipped_external_entity) {
3833   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3834                      "<doc></doc>\n";
3835   ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
3836                        "<!ENTITY % e2 '%e1;'>\n",
3837                        NULL, NULL};
3838 
3839   XML_SetUserData(g_parser, &test_data);
3840   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3841   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3842   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3843       == XML_STATUS_ERROR)
3844     xml_failure(g_parser);
3845 }
3846 END_TEST
3847 
3848 /* Test a different form of unknown external entity */
3849 START_TEST(test_skipped_null_loaded_ext_entity) {
3850   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3851                      "<doc />";
3852   ExtHdlrData test_data
3853       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3854          "<!ENTITY % pe2 '%pe1;'>\n"
3855          "%pe2;\n",
3856          external_entity_null_loader};
3857 
3858   XML_SetUserData(g_parser, &test_data);
3859   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3860   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3861   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3862       == XML_STATUS_ERROR)
3863     xml_failure(g_parser);
3864 }
3865 END_TEST
3866 
3867 START_TEST(test_skipped_unloaded_ext_entity) {
3868   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3869                      "<doc />";
3870   ExtHdlrData test_data
3871       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3872          "<!ENTITY % pe2 '%pe1;'>\n"
3873          "%pe2;\n",
3874          NULL};
3875 
3876   XML_SetUserData(g_parser, &test_data);
3877   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3878   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3879   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3880       == XML_STATUS_ERROR)
3881     xml_failure(g_parser);
3882 }
3883 END_TEST
3884 
3885 /* Test that a parameter entity value ending with a carriage return
3886  * has it translated internally into a newline.
3887  */
3888 START_TEST(test_param_entity_with_trailing_cr) {
3889 #define PARAM_ENTITY_NAME "pe"
3890 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
3891   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3892                      "<doc/>";
3893   ExtTest test_data
3894       = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
3895          "%" PARAM_ENTITY_NAME ";\n",
3896          NULL, NULL};
3897 
3898   XML_SetUserData(g_parser, &test_data);
3899   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3900   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3901   XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
3902   param_entity_match_init(XCS(PARAM_ENTITY_NAME),
3903                           XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
3904   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3905       == XML_STATUS_ERROR)
3906     xml_failure(g_parser);
3907   int entity_match_flag = get_param_entity_match_flag();
3908   if (entity_match_flag == ENTITY_MATCH_FAIL)
3909     fail("Parameter entity CR->NEWLINE conversion failed");
3910   else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
3911     fail("Parameter entity not parsed");
3912 }
3913 #undef PARAM_ENTITY_NAME
3914 #undef PARAM_ENTITY_CORE_VALUE
3915 END_TEST
3916 
3917 START_TEST(test_invalid_character_entity) {
3918   const char *text = "<!DOCTYPE doc [\n"
3919                      "  <!ENTITY entity '&#x110000;'>\n"
3920                      "]>\n"
3921                      "<doc>&entity;</doc>";
3922 
3923   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
3924                  "Out of range character reference not faulted");
3925 }
3926 END_TEST
3927 
3928 START_TEST(test_invalid_character_entity_2) {
3929   const char *text = "<!DOCTYPE doc [\n"
3930                      "  <!ENTITY entity '&#xg0;'>\n"
3931                      "]>\n"
3932                      "<doc>&entity;</doc>";
3933 
3934   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3935                  "Out of range character reference not faulted");
3936 }
3937 END_TEST
3938 
3939 START_TEST(test_invalid_character_entity_3) {
3940   const char text[] =
3941       /* <!DOCTYPE doc [\n */
3942       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
3943       /* U+0E04 = KHO KHWAI
3944        * U+0E08 = CHO CHAN */
3945       /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
3946       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
3947       "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
3948       /* ]>\n */
3949       "\0]\0>\0\n"
3950       /* <doc>&entity;</doc> */
3951       "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
3952 
3953   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3954       != XML_STATUS_ERROR)
3955     fail("Invalid start of entity name not faulted");
3956   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
3957     xml_failure(g_parser);
3958 }
3959 END_TEST
3960 
3961 START_TEST(test_invalid_character_entity_4) {
3962   const char *text = "<!DOCTYPE doc [\n"
3963                      "  <!ENTITY entity '&#1114112;'>\n" /* = &#x110000 */
3964                      "]>\n"
3965                      "<doc>&entity;</doc>";
3966 
3967   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
3968                  "Out of range character reference not faulted");
3969 }
3970 END_TEST
3971 
3972 /* Test that processing instructions are picked up by a default handler */
3973 START_TEST(test_pi_handled_in_default) {
3974   const char *text = "<?test processing instruction?>\n<doc/>";
3975   const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
3976   CharData storage;
3977 
3978   CharData_Init(&storage);
3979   XML_SetDefaultHandler(g_parser, accumulate_characters);
3980   XML_SetUserData(g_parser, &storage);
3981   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3982       == XML_STATUS_ERROR)
3983     xml_failure(g_parser);
3984   CharData_CheckXMLChars(&storage, expected);
3985 }
3986 END_TEST
3987 
3988 /* Test that comments are picked up by a default handler */
3989 START_TEST(test_comment_handled_in_default) {
3990   const char *text = "<!-- This is a comment -->\n<doc/>";
3991   const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
3992   CharData storage;
3993 
3994   CharData_Init(&storage);
3995   XML_SetDefaultHandler(g_parser, accumulate_characters);
3996   XML_SetUserData(g_parser, &storage);
3997   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3998       == XML_STATUS_ERROR)
3999     xml_failure(g_parser);
4000   CharData_CheckXMLChars(&storage, expected);
4001 }
4002 END_TEST
4003 
4004 /* Test PIs that look almost but not quite like XML declarations */
4005 START_TEST(test_pi_yml) {
4006   const char *text = "<?yml something like data?><doc/>";
4007   const XML_Char *expected = XCS("yml: something like data\n");
4008   CharData storage;
4009 
4010   CharData_Init(&storage);
4011   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4012   XML_SetUserData(g_parser, &storage);
4013   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4014       == XML_STATUS_ERROR)
4015     xml_failure(g_parser);
4016   CharData_CheckXMLChars(&storage, expected);
4017 }
4018 END_TEST
4019 
4020 START_TEST(test_pi_xnl) {
4021   const char *text = "<?xnl nothing like data?><doc/>";
4022   const XML_Char *expected = XCS("xnl: nothing like data\n");
4023   CharData storage;
4024 
4025   CharData_Init(&storage);
4026   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4027   XML_SetUserData(g_parser, &storage);
4028   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4029       == XML_STATUS_ERROR)
4030     xml_failure(g_parser);
4031   CharData_CheckXMLChars(&storage, expected);
4032 }
4033 END_TEST
4034 
4035 START_TEST(test_pi_xmm) {
4036   const char *text = "<?xmm everything like data?><doc/>";
4037   const XML_Char *expected = XCS("xmm: everything like data\n");
4038   CharData storage;
4039 
4040   CharData_Init(&storage);
4041   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4042   XML_SetUserData(g_parser, &storage);
4043   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4044       == XML_STATUS_ERROR)
4045     xml_failure(g_parser);
4046   CharData_CheckXMLChars(&storage, expected);
4047 }
4048 END_TEST
4049 
4050 START_TEST(test_utf16_pi) {
4051   const char text[] =
4052       /* <?{KHO KHWAI}{CHO CHAN}?>
4053        * where {KHO KHWAI} = U+0E04
4054        * and   {CHO CHAN}  = U+0E08
4055        */
4056       "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4057       /* <q/> */
4058       "<\0q\0/\0>\0";
4059 #ifdef XML_UNICODE
4060   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4061 #else
4062   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4063 #endif
4064   CharData storage;
4065 
4066   CharData_Init(&storage);
4067   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4068   XML_SetUserData(g_parser, &storage);
4069   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4070       == XML_STATUS_ERROR)
4071     xml_failure(g_parser);
4072   CharData_CheckXMLChars(&storage, expected);
4073 }
4074 END_TEST
4075 
4076 START_TEST(test_utf16_be_pi) {
4077   const char text[] =
4078       /* <?{KHO KHWAI}{CHO CHAN}?>
4079        * where {KHO KHWAI} = U+0E04
4080        * and   {CHO CHAN}  = U+0E08
4081        */
4082       "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4083       /* <q/> */
4084       "\0<\0q\0/\0>";
4085 #ifdef XML_UNICODE
4086   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4087 #else
4088   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4089 #endif
4090   CharData storage;
4091 
4092   CharData_Init(&storage);
4093   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4094   XML_SetUserData(g_parser, &storage);
4095   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4096       == XML_STATUS_ERROR)
4097     xml_failure(g_parser);
4098   CharData_CheckXMLChars(&storage, expected);
4099 }
4100 END_TEST
4101 
4102 /* Test that comments can be picked up and translated */
4103 START_TEST(test_utf16_be_comment) {
4104   const char text[] =
4105       /* <!-- Comment A --> */
4106       "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4107       /* <doc/> */
4108       "\0<\0d\0o\0c\0/\0>";
4109   const XML_Char *expected = XCS(" Comment A ");
4110   CharData storage;
4111 
4112   CharData_Init(&storage);
4113   XML_SetCommentHandler(g_parser, accumulate_comment);
4114   XML_SetUserData(g_parser, &storage);
4115   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4116       == XML_STATUS_ERROR)
4117     xml_failure(g_parser);
4118   CharData_CheckXMLChars(&storage, expected);
4119 }
4120 END_TEST
4121 
4122 START_TEST(test_utf16_le_comment) {
4123   const char text[] =
4124       /* <!-- Comment B --> */
4125       "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4126       /* <doc/> */
4127       "<\0d\0o\0c\0/\0>\0";
4128   const XML_Char *expected = XCS(" Comment B ");
4129   CharData storage;
4130 
4131   CharData_Init(&storage);
4132   XML_SetCommentHandler(g_parser, accumulate_comment);
4133   XML_SetUserData(g_parser, &storage);
4134   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4135       == XML_STATUS_ERROR)
4136     xml_failure(g_parser);
4137   CharData_CheckXMLChars(&storage, expected);
4138 }
4139 END_TEST
4140 
4141 /* Test that the unknown encoding handler with map entries that expect
4142  * conversion but no conversion function is faulted
4143  */
4144 START_TEST(test_missing_encoding_conversion_fn) {
4145   const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4146                      "<doc>\x81</doc>";
4147 
4148   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4149   /* MiscEncodingHandler sets up an encoding with every top-bit-set
4150    * character introducing a two-byte sequence.  For this, it
4151    * requires a convert function.  The above function call doesn't
4152    * pass one through, so when BadEncodingHandler actually gets
4153    * called it should supply an invalid encoding.
4154    */
4155   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4156                  "Encoding with missing convert() not faulted");
4157 }
4158 END_TEST
4159 
4160 START_TEST(test_failing_encoding_conversion_fn) {
4161   const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4162                      "<doc>\x81</doc>";
4163 
4164   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4165   /* BadEncodingHandler sets up an encoding with every top-bit-set
4166    * character introducing a two-byte sequence.  For this, it
4167    * requires a convert function.  The above function call passes
4168    * one that insists all possible sequences are invalid anyway.
4169    */
4170   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4171                  "Encoding with failing convert() not faulted");
4172 }
4173 END_TEST
4174 
4175 /* Test unknown encoding conversions */
4176 START_TEST(test_unknown_encoding_success) {
4177   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4178                      /* Equivalent to <eoc>Hello, world</eoc> */
4179                      "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4180 
4181   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4182   run_character_check(text, XCS("Hello, world"));
4183 }
4184 END_TEST
4185 
4186 /* Test bad name character in unknown encoding */
4187 START_TEST(test_unknown_encoding_bad_name) {
4188   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4189                      "<\xff\x64oc>Hello, world</\xff\x64oc>";
4190 
4191   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4192   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4193                  "Bad name start in unknown encoding not faulted");
4194 }
4195 END_TEST
4196 
4197 /* Test bad mid-name character in unknown encoding */
4198 START_TEST(test_unknown_encoding_bad_name_2) {
4199   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4200                      "<d\xffoc>Hello, world</d\xffoc>";
4201 
4202   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4203   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4204                  "Bad name in unknown encoding not faulted");
4205 }
4206 END_TEST
4207 
4208 /* Test element name that is long enough to fill the conversion buffer
4209  * in an unknown encoding, finishing with an encoded character.
4210  */
4211 START_TEST(test_unknown_encoding_long_name_1) {
4212   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4213                      "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4214                      "Hi"
4215                      "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4216   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4217   CharData storage;
4218 
4219   CharData_Init(&storage);
4220   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4221   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4222   XML_SetUserData(g_parser, &storage);
4223   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4224       == XML_STATUS_ERROR)
4225     xml_failure(g_parser);
4226   CharData_CheckXMLChars(&storage, expected);
4227 }
4228 END_TEST
4229 
4230 /* Test element name that is long enough to fill the conversion buffer
4231  * in an unknown encoding, finishing with an simple character.
4232  */
4233 START_TEST(test_unknown_encoding_long_name_2) {
4234   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4235                      "<abcdefghabcdefghabcdefghijklmnop>"
4236                      "Hi"
4237                      "</abcdefghabcdefghabcdefghijklmnop>";
4238   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4239   CharData storage;
4240 
4241   CharData_Init(&storage);
4242   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4243   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4244   XML_SetUserData(g_parser, &storage);
4245   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4246       == XML_STATUS_ERROR)
4247     xml_failure(g_parser);
4248   CharData_CheckXMLChars(&storage, expected);
4249 }
4250 END_TEST
4251 
4252 START_TEST(test_invalid_unknown_encoding) {
4253   const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4254                      "<doc>Hello world</doc>";
4255 
4256   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4257   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4258                  "Invalid unknown encoding not faulted");
4259 }
4260 END_TEST
4261 
4262 START_TEST(test_unknown_ascii_encoding_ok) {
4263   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4264                      "<doc>Hello, world</doc>";
4265 
4266   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4267   run_character_check(text, XCS("Hello, world"));
4268 }
4269 END_TEST
4270 
4271 START_TEST(test_unknown_ascii_encoding_fail) {
4272   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4273                      "<doc>Hello, \x80 world</doc>";
4274 
4275   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4276   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4277                  "Invalid character not faulted");
4278 }
4279 END_TEST
4280 
4281 START_TEST(test_unknown_encoding_invalid_length) {
4282   const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4283                      "<doc>Hello, world</doc>";
4284 
4285   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4286   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4287                  "Invalid unknown encoding not faulted");
4288 }
4289 END_TEST
4290 
4291 START_TEST(test_unknown_encoding_invalid_topbit) {
4292   const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4293                      "<doc>Hello, world</doc>";
4294 
4295   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4296   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4297                  "Invalid unknown encoding not faulted");
4298 }
4299 END_TEST
4300 
4301 START_TEST(test_unknown_encoding_invalid_surrogate) {
4302   const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4303                      "<doc>Hello, \x82 world</doc>";
4304 
4305   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4306   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4307                  "Invalid unknown encoding not faulted");
4308 }
4309 END_TEST
4310 
4311 START_TEST(test_unknown_encoding_invalid_high) {
4312   const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4313                      "<doc>Hello, world</doc>";
4314 
4315   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4316   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4317                  "Invalid unknown encoding not faulted");
4318 }
4319 END_TEST
4320 
4321 START_TEST(test_unknown_encoding_invalid_attr_value) {
4322   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4323                      "<doc attr='\xff\x30'/>";
4324 
4325   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4326   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4327                  "Invalid attribute valid not faulted");
4328 }
4329 END_TEST
4330 
4331 /* Test an external entity parser set to use latin-1 detects UTF-16
4332  * BOMs correctly.
4333  */
4334 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
4335 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4336   const char *text = "<!DOCTYPE doc [\n"
4337                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4338                      "]>\n"
4339                      "<doc>&en;</doc>";
4340   ExtTest2 test_data
4341       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4342          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4343           *   0x4c = L and 0x20 is a space
4344           */
4345          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4346 #ifdef XML_UNICODE
4347   const XML_Char *expected = XCS("\x00ff\x00feL ");
4348 #else
4349   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4350   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4351 #endif
4352   CharData storage;
4353 
4354   CharData_Init(&storage);
4355   test_data.storage = &storage;
4356   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4357   XML_SetUserData(g_parser, &test_data);
4358   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4359   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4360       == XML_STATUS_ERROR)
4361     xml_failure(g_parser);
4362   CharData_CheckXMLChars(&storage, expected);
4363 }
4364 END_TEST
4365 
4366 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4367   const char *text = "<!DOCTYPE doc [\n"
4368                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4369                      "]>\n"
4370                      "<doc>&en;</doc>";
4371   ExtTest2 test_data
4372       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4373          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4374           *   0x4c = L and 0x20 is a space
4375           */
4376          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4377 #ifdef XML_UNICODE
4378   const XML_Char *expected = XCS("\x00fe\x00ff L");
4379 #else
4380   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4381   const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4382 #endif
4383   CharData storage;
4384 
4385   CharData_Init(&storage);
4386   test_data.storage = &storage;
4387   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4388   XML_SetUserData(g_parser, &test_data);
4389   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4390   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4391       == XML_STATUS_ERROR)
4392     xml_failure(g_parser);
4393   CharData_CheckXMLChars(&storage, expected);
4394 }
4395 END_TEST
4396 
4397 /* Parsing the full buffer rather than a byte at a time makes a
4398  * difference to the encoding scanning code, so repeat the above tests
4399  * without breaking them down by byte.
4400  */
4401 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4402   const char *text = "<!DOCTYPE doc [\n"
4403                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4404                      "]>\n"
4405                      "<doc>&en;</doc>";
4406   ExtTest2 test_data
4407       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4408          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4409           *   0x4c = L and 0x20 is a space
4410           */
4411          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4412 #ifdef XML_UNICODE
4413   const XML_Char *expected = XCS("\x00ff\x00feL ");
4414 #else
4415   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4416   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4417 #endif
4418   CharData storage;
4419 
4420   CharData_Init(&storage);
4421   test_data.storage = &storage;
4422   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4423   XML_SetUserData(g_parser, &test_data);
4424   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4425   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4426       == XML_STATUS_ERROR)
4427     xml_failure(g_parser);
4428   CharData_CheckXMLChars(&storage, expected);
4429 }
4430 END_TEST
4431 
4432 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4433   const char *text = "<!DOCTYPE doc [\n"
4434                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4435                      "]>\n"
4436                      "<doc>&en;</doc>";
4437   ExtTest2 test_data
4438       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4439          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4440           *   0x4c = L and 0x20 is a space
4441           */
4442          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4443 #ifdef XML_UNICODE
4444   const XML_Char *expected = XCS("\x00fe\x00ff L");
4445 #else
4446   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4447   const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4448 #endif
4449   CharData storage;
4450 
4451   CharData_Init(&storage);
4452   test_data.storage = &storage;
4453   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4454   XML_SetUserData(g_parser, &test_data);
4455   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4456   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4457       == XML_STATUS_ERROR)
4458     xml_failure(g_parser);
4459   CharData_CheckXMLChars(&storage, expected);
4460 }
4461 END_TEST
4462 
4463 /* Test little-endian UTF-16 given an explicit big-endian encoding */
4464 START_TEST(test_ext_entity_utf16_be) {
4465   const char *text = "<!DOCTYPE doc [\n"
4466                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4467                      "]>\n"
4468                      "<doc>&en;</doc>";
4469   ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4470 #ifdef XML_UNICODE
4471   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4472 #else
4473   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4474                                  "\xe6\x94\x80"   /* U+6500 */
4475                                  "\xe2\xbc\x80"   /* U+2F00 */
4476                                  "\xe3\xb8\x80"); /* U+3E00 */
4477 #endif
4478   CharData storage;
4479 
4480   CharData_Init(&storage);
4481   test_data.storage = &storage;
4482   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4483   XML_SetUserData(g_parser, &test_data);
4484   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4485   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4486       == XML_STATUS_ERROR)
4487     xml_failure(g_parser);
4488   CharData_CheckXMLChars(&storage, expected);
4489 }
4490 END_TEST
4491 
4492 /* Test big-endian UTF-16 given an explicit little-endian encoding */
4493 START_TEST(test_ext_entity_utf16_le) {
4494   const char *text = "<!DOCTYPE doc [\n"
4495                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4496                      "]>\n"
4497                      "<doc>&en;</doc>";
4498   ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4499 #ifdef XML_UNICODE
4500   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4501 #else
4502   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4503                                  "\xe6\x94\x80"   /* U+6500 */
4504                                  "\xe2\xbc\x80"   /* U+2F00 */
4505                                  "\xe3\xb8\x80"); /* U+3E00 */
4506 #endif
4507   CharData storage;
4508 
4509   CharData_Init(&storage);
4510   test_data.storage = &storage;
4511   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4512   XML_SetUserData(g_parser, &test_data);
4513   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4514   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4515       == XML_STATUS_ERROR)
4516     xml_failure(g_parser);
4517   CharData_CheckXMLChars(&storage, expected);
4518 }
4519 END_TEST
4520 
4521 /* Test little-endian UTF-16 given no explicit encoding.
4522  * The existing default encoding (UTF-8) is assumed to hold without a
4523  * BOM to contradict it, so the entity value will in fact provoke an
4524  * error because 0x00 is not a valid XML character.  We parse the
4525  * whole buffer in one go rather than feeding it in byte by byte to
4526  * exercise different code paths in the initial scanning routines.
4527  */
4528 START_TEST(test_ext_entity_utf16_unknown) {
4529   const char *text = "<!DOCTYPE doc [\n"
4530                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4531                      "]>\n"
4532                      "<doc>&en;</doc>";
4533   ExtFaults2 test_data
4534       = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4535          XML_ERROR_INVALID_TOKEN};
4536 
4537   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4538   XML_SetUserData(g_parser, &test_data);
4539   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4540                  "Invalid character should not have been accepted");
4541 }
4542 END_TEST
4543 
4544 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
4545 START_TEST(test_ext_entity_utf8_non_bom) {
4546   const char *text = "<!DOCTYPE doc [\n"
4547                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4548                      "]>\n"
4549                      "<doc>&en;</doc>";
4550   ExtTest2 test_data
4551       = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4552          3, NULL, NULL};
4553 #ifdef XML_UNICODE
4554   const XML_Char *expected = XCS("\xfec0");
4555 #else
4556   const XML_Char *expected = XCS("\xef\xbb\x80");
4557 #endif
4558   CharData storage;
4559 
4560   CharData_Init(&storage);
4561   test_data.storage = &storage;
4562   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4563   XML_SetUserData(g_parser, &test_data);
4564   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4565   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4566       == XML_STATUS_ERROR)
4567     xml_failure(g_parser);
4568   CharData_CheckXMLChars(&storage, expected);
4569 }
4570 END_TEST
4571 
4572 /* Test that UTF-8 in a CDATA section is correctly passed through */
4573 START_TEST(test_utf8_in_cdata_section) {
4574   const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4575 #ifdef XML_UNICODE
4576   const XML_Char *expected = XCS("one \x00e9 two");
4577 #else
4578   const XML_Char *expected = XCS("one \xc3\xa9 two");
4579 #endif
4580 
4581   run_character_check(text, expected);
4582 }
4583 END_TEST
4584 
4585 /* Test that little-endian UTF-16 in a CDATA section is handled */
4586 START_TEST(test_utf8_in_cdata_section_2) {
4587   const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4588 #ifdef XML_UNICODE
4589   const XML_Char *expected = XCS("\x00e9]\x00e9two");
4590 #else
4591   const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4592 #endif
4593 
4594   run_character_check(text, expected);
4595 }
4596 END_TEST
4597 
4598 START_TEST(test_utf8_in_start_tags) {
4599   struct test_case {
4600     bool goodName;
4601     bool goodNameStart;
4602     const char *tagName;
4603   };
4604 
4605   // The idea with the tests below is this:
4606   // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4607   // go to isNever and are hence not a concern.
4608   //
4609   // We start with a character that is a valid name character
4610   // (or even name-start character, see XML 1.0r4 spec) and then we flip
4611   // single bits at places where (1) the result leaves the UTF-8 encoding space
4612   // and (2) we stay in the same n-byte sequence family.
4613   //
4614   // The flipped bits are highlighted in angle brackets in comments,
4615   // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4616   // the most significant bit to 1 to leave UTF-8 encoding space.
4617   struct test_case cases[] = {
4618       // 1-byte UTF-8: [0xxx xxxx]
4619       {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
4620       {false, false, "\xBA"}, // [<1>011 1010]
4621       {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
4622       {false, false, "\xB9"}, // [<1>011 1001]
4623 
4624       // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4625       {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
4626                                   // Arabic small waw U+06E5
4627       {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4628       {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4629       {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4630       {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
4631                                   // combining char U+0301
4632       {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4633       {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4634       {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4635 
4636       // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4637       {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
4638                                       // Devanagari Letter A U+0905
4639       {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4640       {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4641       {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4642       {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4643       {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4644       {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
4645                                       // combining char U+0901
4646       {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4647       {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4648       {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4649       {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4650       {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4651   };
4652   const bool atNameStart[] = {true, false};
4653 
4654   size_t i = 0;
4655   char doc[1024];
4656   size_t failCount = 0;
4657 
4658   // we need all the bytes to be parsed, but we don't want the errors that can
4659   // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4660   if (g_reparseDeferralEnabledDefault) {
4661     return;
4662   }
4663 
4664   for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4665     size_t j = 0;
4666     for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4667       const bool expectedSuccess
4668           = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4669       snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4670                cases[i].tagName);
4671       XML_Parser parser = XML_ParserCreate(NULL);
4672 
4673       const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4674           parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4675 
4676       bool success = true;
4677       if ((status == XML_STATUS_OK) != expectedSuccess) {
4678         success = false;
4679       }
4680       if ((status == XML_STATUS_ERROR)
4681           && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4682         success = false;
4683       }
4684 
4685       if (! success) {
4686         fprintf(
4687             stderr,
4688             "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
4689             (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
4690             (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
4691         failCount++;
4692       }
4693 
4694       XML_ParserFree(parser);
4695     }
4696   }
4697 
4698   if (failCount > 0) {
4699     fail("UTF-8 regression detected");
4700   }
4701 }
4702 END_TEST
4703 
4704 /* Test trailing spaces in elements are accepted */
4705 START_TEST(test_trailing_spaces_in_elements) {
4706   const char *text = "<doc   >Hi</doc >";
4707   const XML_Char *expected = XCS("doc/doc");
4708   CharData storage;
4709 
4710   CharData_Init(&storage);
4711   XML_SetElementHandler(g_parser, record_element_start_handler,
4712                         record_element_end_handler);
4713   XML_SetUserData(g_parser, &storage);
4714   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4715       == XML_STATUS_ERROR)
4716     xml_failure(g_parser);
4717   CharData_CheckXMLChars(&storage, expected);
4718 }
4719 END_TEST
4720 
4721 START_TEST(test_utf16_attribute) {
4722   const char text[] =
4723       /* <d {KHO KHWAI}{CHO CHAN}='a'/>
4724        * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4725        * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4726        */
4727       "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
4728   const XML_Char *expected = XCS("a");
4729   CharData storage;
4730 
4731   CharData_Init(&storage);
4732   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4733   XML_SetUserData(g_parser, &storage);
4734   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4735       == XML_STATUS_ERROR)
4736     xml_failure(g_parser);
4737   CharData_CheckXMLChars(&storage, expected);
4738 }
4739 END_TEST
4740 
4741 START_TEST(test_utf16_second_attr) {
4742   /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
4743    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4744    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4745    */
4746   const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
4747                       "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
4748   const XML_Char *expected = XCS("1");
4749   CharData storage;
4750 
4751   CharData_Init(&storage);
4752   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4753   XML_SetUserData(g_parser, &storage);
4754   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4755       == XML_STATUS_ERROR)
4756     xml_failure(g_parser);
4757   CharData_CheckXMLChars(&storage, expected);
4758 }
4759 END_TEST
4760 
4761 START_TEST(test_attr_after_solidus) {
4762   const char *text = "<doc attr1='a' / attr2='b'>";
4763 
4764   expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
4765 }
4766 END_TEST
4767 
4768 START_TEST(test_utf16_pe) {
4769   /* <!DOCTYPE doc [
4770    * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
4771    * %{KHO KHWAI}{CHO CHAN};
4772    * ]>
4773    * <doc></doc>
4774    *
4775    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4776    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4777    */
4778   const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4779                       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
4780                       "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
4781                       "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
4782                       "\0%\x0e\x04\x0e\x08\0;\0\n"
4783                       "\0]\0>\0\n"
4784                       "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
4785 #ifdef XML_UNICODE
4786   const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
4787 #else
4788   const XML_Char *expected
4789       = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
4790 #endif
4791   CharData storage;
4792 
4793   CharData_Init(&storage);
4794   XML_SetUserData(g_parser, &storage);
4795   XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
4796   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4797       == XML_STATUS_ERROR)
4798     xml_failure(g_parser);
4799   CharData_CheckXMLChars(&storage, expected);
4800 }
4801 END_TEST
4802 
4803 /* Test that duff attribute description keywords are rejected */
4804 START_TEST(test_bad_attr_desc_keyword) {
4805   const char *text = "<!DOCTYPE doc [\n"
4806                      "  <!ATTLIST doc attr CDATA #!IMPLIED>\n"
4807                      "]>\n"
4808                      "<doc />";
4809 
4810   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4811                  "Bad keyword !IMPLIED not faulted");
4812 }
4813 END_TEST
4814 
4815 /* Test that an invalid attribute description keyword consisting of
4816  * UTF-16 characters with their top bytes non-zero are correctly
4817  * faulted
4818  */
4819 START_TEST(test_bad_attr_desc_keyword_utf16) {
4820   /* <!DOCTYPE d [
4821    * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
4822    * ]><d/>
4823    *
4824    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4825    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4826    */
4827   const char text[]
4828       = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4829         "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
4830         "\0#\x0e\x04\x0e\x08\0>\0\n"
4831         "\0]\0>\0<\0d\0/\0>";
4832 
4833   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4834       != XML_STATUS_ERROR)
4835     fail("Invalid UTF16 attribute keyword not faulted");
4836   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4837     xml_failure(g_parser);
4838 }
4839 END_TEST
4840 
4841 /* Test that invalid syntax in a <!DOCTYPE> is rejected.  Do this
4842  * using prefix-encoding (see above) to trigger specific code paths
4843  */
4844 START_TEST(test_bad_doctype) {
4845   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4846                      "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
4847 
4848   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4849   expect_failure(text, XML_ERROR_SYNTAX,
4850                  "Invalid bytes in DOCTYPE not faulted");
4851 }
4852 END_TEST
4853 
4854 START_TEST(test_bad_doctype_utf8) {
4855   const char *text = "<!DOCTYPE \xDB\x25"
4856                      "doc><doc/>"; // [1101 1011] [<0>010 0101]
4857   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4858                  "Invalid UTF-8 in DOCTYPE not faulted");
4859 }
4860 END_TEST
4861 
4862 START_TEST(test_bad_doctype_utf16) {
4863   const char text[] =
4864       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
4865        *
4866        * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
4867        * (name character) but not a valid letter (name start character)
4868        */
4869       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
4870       "\x06\xf2"
4871       "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
4872 
4873   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4874       != XML_STATUS_ERROR)
4875     fail("Invalid bytes in DOCTYPE not faulted");
4876   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4877     xml_failure(g_parser);
4878 }
4879 END_TEST
4880 
4881 START_TEST(test_bad_doctype_plus) {
4882   const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
4883                      "<1+>&foo;</1+>";
4884 
4885   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4886                  "'+' in document name not faulted");
4887 }
4888 END_TEST
4889 
4890 START_TEST(test_bad_doctype_star) {
4891   const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
4892                      "<1*>&foo;</1*>";
4893 
4894   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4895                  "'*' in document name not faulted");
4896 }
4897 END_TEST
4898 
4899 START_TEST(test_bad_doctype_query) {
4900   const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
4901                      "<1?>&foo;</1?>";
4902 
4903   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4904                  "'?' in document name not faulted");
4905 }
4906 END_TEST
4907 
4908 START_TEST(test_unknown_encoding_bad_ignore) {
4909   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
4910                      "<!DOCTYPE doc SYSTEM 'foo'>"
4911                      "<doc><e>&entity;</e></doc>";
4912   ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
4913                      "Invalid character not faulted", XCS("prefix-conv"),
4914                      XML_ERROR_INVALID_TOKEN};
4915 
4916   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4917   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4918   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
4919   XML_SetUserData(g_parser, &fault);
4920   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4921                  "Bad IGNORE section with unknown encoding not failed");
4922 }
4923 END_TEST
4924 
4925 START_TEST(test_entity_in_utf16_be_attr) {
4926   const char text[] =
4927       /* <e a='&#228; &#x00E4;'></e> */
4928       "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
4929       "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
4930 #ifdef XML_UNICODE
4931   const XML_Char *expected = XCS("\x00e4 \x00e4");
4932 #else
4933   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
4934 #endif
4935   CharData storage;
4936 
4937   CharData_Init(&storage);
4938   XML_SetUserData(g_parser, &storage);
4939   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4940   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4941       == XML_STATUS_ERROR)
4942     xml_failure(g_parser);
4943   CharData_CheckXMLChars(&storage, expected);
4944 }
4945 END_TEST
4946 
4947 START_TEST(test_entity_in_utf16_le_attr) {
4948   const char text[] =
4949       /* <e a='&#228; &#x00E4;'></e> */
4950       "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
4951       "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
4952 #ifdef XML_UNICODE
4953   const XML_Char *expected = XCS("\x00e4 \x00e4");
4954 #else
4955   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
4956 #endif
4957   CharData storage;
4958 
4959   CharData_Init(&storage);
4960   XML_SetUserData(g_parser, &storage);
4961   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4962   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4963       == XML_STATUS_ERROR)
4964     xml_failure(g_parser);
4965   CharData_CheckXMLChars(&storage, expected);
4966 }
4967 END_TEST
4968 
4969 START_TEST(test_entity_public_utf16_be) {
4970   const char text[] =
4971       /* <!DOCTYPE d [ */
4972       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4973       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
4974       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
4975       "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
4976       /* %e; */
4977       "\0%\0e\0;\0\n"
4978       /* ]> */
4979       "\0]\0>\0\n"
4980       /* <d>&j;</d> */
4981       "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
4982   ExtTest2 test_data
4983       = {/* <!ENTITY j 'baz'> */
4984          "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
4985   const XML_Char *expected = XCS("baz");
4986   CharData storage;
4987 
4988   CharData_Init(&storage);
4989   test_data.storage = &storage;
4990   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4991   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4992   XML_SetUserData(g_parser, &test_data);
4993   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4994   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4995       == XML_STATUS_ERROR)
4996     xml_failure(g_parser);
4997   CharData_CheckXMLChars(&storage, expected);
4998 }
4999 END_TEST
5000 
5001 START_TEST(test_entity_public_utf16_le) {
5002   const char text[] =
5003       /* <!DOCTYPE d [ */
5004       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5005       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5006       "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5007       "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5008       /* %e; */
5009       "%\0e\0;\0\n\0"
5010       /* ]> */
5011       "]\0>\0\n\0"
5012       /* <d>&j;</d> */
5013       "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5014   ExtTest2 test_data
5015       = {/* <!ENTITY j 'baz'> */
5016          "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5017   const XML_Char *expected = XCS("baz");
5018   CharData storage;
5019 
5020   CharData_Init(&storage);
5021   test_data.storage = &storage;
5022   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5023   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5024   XML_SetUserData(g_parser, &test_data);
5025   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5026   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5027       == XML_STATUS_ERROR)
5028     xml_failure(g_parser);
5029   CharData_CheckXMLChars(&storage, expected);
5030 }
5031 END_TEST
5032 
5033 /* Test that a doctype with neither an internal nor external subset is
5034  * faulted
5035  */
5036 START_TEST(test_short_doctype) {
5037   const char *text = "<!DOCTYPE doc></doc>";
5038   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5039                  "DOCTYPE without subset not rejected");
5040 }
5041 END_TEST
5042 
5043 START_TEST(test_short_doctype_2) {
5044   const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5045   expect_failure(text, XML_ERROR_SYNTAX,
5046                  "DOCTYPE without Public ID not rejected");
5047 }
5048 END_TEST
5049 
5050 START_TEST(test_short_doctype_3) {
5051   const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5052   expect_failure(text, XML_ERROR_SYNTAX,
5053                  "DOCTYPE without System ID not rejected");
5054 }
5055 END_TEST
5056 
5057 START_TEST(test_long_doctype) {
5058   const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5059   expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5060 }
5061 END_TEST
5062 
5063 START_TEST(test_bad_entity) {
5064   const char *text = "<!DOCTYPE doc [\n"
5065                      "  <!ENTITY foo PUBLIC>\n"
5066                      "]>\n"
5067                      "<doc/>";
5068   expect_failure(text, XML_ERROR_SYNTAX,
5069                  "ENTITY without Public ID is not rejected");
5070 }
5071 END_TEST
5072 
5073 /* Test unquoted value is faulted */
5074 START_TEST(test_bad_entity_2) {
5075   const char *text = "<!DOCTYPE doc [\n"
5076                      "  <!ENTITY % foo bar>\n"
5077                      "]>\n"
5078                      "<doc/>";
5079   expect_failure(text, XML_ERROR_SYNTAX,
5080                  "ENTITY without Public ID is not rejected");
5081 }
5082 END_TEST
5083 
5084 START_TEST(test_bad_entity_3) {
5085   const char *text = "<!DOCTYPE doc [\n"
5086                      "  <!ENTITY % foo PUBLIC>\n"
5087                      "]>\n"
5088                      "<doc/>";
5089   expect_failure(text, XML_ERROR_SYNTAX,
5090                  "Parameter ENTITY without Public ID is not rejected");
5091 }
5092 END_TEST
5093 
5094 START_TEST(test_bad_entity_4) {
5095   const char *text = "<!DOCTYPE doc [\n"
5096                      "  <!ENTITY % foo SYSTEM>\n"
5097                      "]>\n"
5098                      "<doc/>";
5099   expect_failure(text, XML_ERROR_SYNTAX,
5100                  "Parameter ENTITY without Public ID is not rejected");
5101 }
5102 END_TEST
5103 
5104 START_TEST(test_bad_notation) {
5105   const char *text = "<!DOCTYPE doc [\n"
5106                      "  <!NOTATION n SYSTEM>\n"
5107                      "]>\n"
5108                      "<doc/>";
5109   expect_failure(text, XML_ERROR_SYNTAX,
5110                  "Notation without System ID is not rejected");
5111 }
5112 END_TEST
5113 
5114 /* Test for issue #11, wrongly suppressed default handler */
5115 START_TEST(test_default_doctype_handler) {
5116   const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5117                      "  <!ENTITY foo 'bar'>\n"
5118                      "]>\n"
5119                      "<doc>&foo;</doc>";
5120   DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5121                               {XCS("'test.dtd'"), 10, XML_FALSE},
5122                               {NULL, 0, XML_FALSE}};
5123   int i;
5124 
5125   XML_SetUserData(g_parser, &test_data);
5126   XML_SetDefaultHandler(g_parser, checking_default_handler);
5127   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5128   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5129       == XML_STATUS_ERROR)
5130     xml_failure(g_parser);
5131   for (i = 0; test_data[i].expected != NULL; i++)
5132     if (! test_data[i].seen)
5133       fail("Default handler not run for public !DOCTYPE");
5134 }
5135 END_TEST
5136 
5137 START_TEST(test_empty_element_abort) {
5138   const char *text = "<abort/>";
5139 
5140   XML_SetStartElementHandler(g_parser, start_element_suspender);
5141   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5142       != XML_STATUS_ERROR)
5143     fail("Expected to error on abort");
5144 }
5145 END_TEST
5146 
5147 /* Regression test for GH issue #612: unfinished m_declAttributeType
5148  * allocation in ->m_tempPool can corrupt following allocation.
5149  */
5150 START_TEST(test_pool_integrity_with_unfinished_attr) {
5151   const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5152                      "<!DOCTYPE foo [\n"
5153                      "<!ELEMENT foo ANY>\n"
5154                      "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5155                      "%entp;\n"
5156                      "]>\n"
5157                      "<a></a>\n";
5158   const XML_Char *expected = XCS("COMMENT");
5159   CharData storage;
5160 
5161   CharData_Init(&storage);
5162   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5163   XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5164   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5165   XML_SetCommentHandler(g_parser, accumulate_comment);
5166   XML_SetUserData(g_parser, &storage);
5167   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5168       == XML_STATUS_ERROR)
5169     xml_failure(g_parser);
5170   CharData_CheckXMLChars(&storage, expected);
5171 }
5172 END_TEST
5173 
5174 START_TEST(test_nested_entity_suspend) {
5175   const char *const text = "<!DOCTYPE a [\n"
5176                            "  <!ENTITY e1 '<!--e1-->'>\n"
5177                            "  <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5178                            "  <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5179                            "]>\n"
5180                            "<a><!--start-->&e3;<!--end--></a>";
5181   const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5182       XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5183   CharData storage;
5184   CharData_Init(&storage);
5185   XML_Parser parser = XML_ParserCreate(NULL);
5186   ParserPlusStorage parserPlusStorage = {parser, &storage};
5187 
5188   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5189   XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5190   XML_SetUserData(parser, &parserPlusStorage);
5191 
5192   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5193   while (status == XML_STATUS_SUSPENDED) {
5194     status = XML_ResumeParser(parser);
5195   }
5196   if (status != XML_STATUS_OK)
5197     xml_failure(parser);
5198 
5199   CharData_CheckXMLChars(&storage, expected);
5200   XML_ParserFree(parser);
5201 }
5202 END_TEST
5203 
5204 /* Regression test for quadratic parsing on large tokens */
5205 START_TEST(test_big_tokens_take_linear_time) {
5206   const char *const too_slow_failure_message
5207       = "Compared to the baseline runtime of the first test, this test has a "
5208         "slowdown of more than <max_slowdown>. "
5209         "Please keep increasing the value by 1 until it reliably passes the "
5210         "test on your hardware and open a bug sharing that number with us. "
5211         "Thanks in advance!";
5212   const struct {
5213     const char *pre;
5214     const char *post;
5215   } text[] = {
5216       {"<a>", "</a>"},                      // assumed good, used as baseline
5217       {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5218       {"<c attr='", "'></c>"},              // big attribute, used to be O(N²)
5219       {"<d><!-- ", " --></d>"},             // long comment, used to be O(N²)
5220       {"<e><", "/></e>"},                   // big elem name, used to be O(N²)
5221   };
5222   const int num_cases = sizeof(text) / sizeof(text[0]);
5223   // For the test we need a <max_slowdown> value that is:
5224   // (1) big enough that the test passes reliably (avoiding flaky tests), and
5225   // (2) small enough that the test actually catches regressions.
5226   const int max_slowdown = 15;
5227   char aaaaaa[4096];
5228   const int fillsize = (int)sizeof(aaaaaa);
5229   const int fillcount = 100;
5230 
5231   memset(aaaaaa, 'a', fillsize);
5232 
5233   if (! g_reparseDeferralEnabledDefault) {
5234     return; // heuristic is disabled; we would get O(n^2) and fail.
5235   }
5236 #if ! defined(__linux__)
5237   if (CLOCKS_PER_SEC < 100000) {
5238     // Skip this test if clock() doesn't have reasonably good resolution.
5239     // This workaround is primarily targeting Windows and FreeBSD, since
5240     // XSI requires the value to be 1.000.000 (10x the condition here), and
5241     // we want to be very sure that at least one platform in CI can catch
5242     // regressions (through a failing test).
5243     return;
5244   }
5245 #endif
5246 
5247   clock_t baseline = 0;
5248   for (int i = 0; i < num_cases; ++i) {
5249     XML_Parser parser = XML_ParserCreate(NULL);
5250     assert_true(parser != NULL);
5251     enum XML_Status status;
5252     set_subtest("max_slowdown=%d text=\"%saaaaaa%s\"", max_slowdown,
5253                 text[i].pre, text[i].post);
5254     const clock_t start = clock();
5255 
5256     // parse the start text
5257     status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5258                                      (int)strlen(text[i].pre), XML_FALSE);
5259     if (status != XML_STATUS_OK) {
5260       xml_failure(parser);
5261     }
5262     // parse lots of 'a', failing the test early if it takes too long
5263     for (int f = 0; f < fillcount; ++f) {
5264       status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5265       if (status != XML_STATUS_OK) {
5266         xml_failure(parser);
5267       }
5268       // i == 0 means we're still calculating the baseline value
5269       if (i > 0) {
5270         const clock_t now = clock();
5271         const clock_t clocks_so_far = now - start;
5272         const int slowdown = clocks_so_far / baseline;
5273         if (slowdown >= max_slowdown) {
5274           fprintf(
5275               stderr,
5276               "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n",
5277               f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown);
5278           fail(too_slow_failure_message);
5279         }
5280       }
5281     }
5282     // parse the end text
5283     status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5284                                      (int)strlen(text[i].post), XML_TRUE);
5285     if (status != XML_STATUS_OK) {
5286       xml_failure(parser);
5287     }
5288 
5289     // how long did it take in total?
5290     const clock_t end = clock();
5291     const clock_t taken = end - start;
5292     if (i == 0) {
5293       assert_true(taken > 0); // just to make sure we don't div-by-0 later
5294       baseline = taken;
5295     }
5296     const int slowdown = taken / baseline;
5297     if (slowdown >= max_slowdown) {
5298       fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n",
5299               (int)taken, (int)baseline, slowdown, max_slowdown);
5300       fail(too_slow_failure_message);
5301     }
5302 
5303     XML_ParserFree(parser);
5304   }
5305 }
5306 END_TEST
5307 
5308 START_TEST(test_set_reparse_deferral) {
5309   const char *const pre = "<d>";
5310   const char *const start = "<x attr='";
5311   const char *const end = "'></x>";
5312   char eeeeee[100];
5313   const int fillsize = (int)sizeof(eeeeee);
5314   memset(eeeeee, 'e', fillsize);
5315 
5316   for (int enabled = 0; enabled <= 1; enabled += 1) {
5317     set_subtest("deferral=%d", enabled);
5318 
5319     XML_Parser parser = XML_ParserCreate(NULL);
5320     assert_true(parser != NULL);
5321     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5322     // pre-grow the buffer to avoid reparsing due to almost-fullness
5323     assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5324 
5325     CharData storage;
5326     CharData_Init(&storage);
5327     XML_SetUserData(parser, &storage);
5328     XML_SetStartElementHandler(parser, start_element_event_handler);
5329 
5330     enum XML_Status status;
5331     // parse the start text
5332     status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5333     if (status != XML_STATUS_OK) {
5334       xml_failure(parser);
5335     }
5336     CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5337 
5338     // ..and the start of the token
5339     status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5340     if (status != XML_STATUS_OK) {
5341       xml_failure(parser);
5342     }
5343     CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5344 
5345     // try to parse lots of 'e', but the token isn't finished
5346     for (int c = 0; c < 100; ++c) {
5347       status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5348       if (status != XML_STATUS_OK) {
5349         xml_failure(parser);
5350       }
5351     }
5352     CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5353 
5354     // end the <x> token.
5355     status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5356     if (status != XML_STATUS_OK) {
5357       xml_failure(parser);
5358     }
5359 
5360     if (enabled) {
5361       // In general, we may need to push more data to trigger a reparse attempt,
5362       // but in this test, the data is constructed to always require it.
5363       CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5364       // 2x the token length should suffice; the +1 covers the start and end.
5365       for (int c = 0; c < 101; ++c) {
5366         status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5367         if (status != XML_STATUS_OK) {
5368           xml_failure(parser);
5369         }
5370       }
5371     }
5372     CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5373 
5374     XML_ParserFree(parser);
5375   }
5376 }
5377 END_TEST
5378 
5379 struct element_decl_data {
5380   XML_Parser parser;
5381   int count;
5382 };
5383 
5384 static void
5385 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5386   UNUSED_P(name);
5387   struct element_decl_data *testdata = (struct element_decl_data *)userData;
5388   testdata->count += 1;
5389   XML_FreeContentModel(testdata->parser, model);
5390 }
5391 
5392 static int
5393 external_inherited_parser(XML_Parser p, const XML_Char *context,
5394                           const XML_Char *base, const XML_Char *systemId,
5395                           const XML_Char *publicId) {
5396   UNUSED_P(base);
5397   UNUSED_P(systemId);
5398   UNUSED_P(publicId);
5399   const char *const pre = "<!ELEMENT document ANY>\n";
5400   const char *const start = "<!ELEMENT ";
5401   const char *const end = " ANY>\n";
5402   const char *const post = "<!ELEMENT xyz ANY>\n";
5403   const int enabled = *(int *)XML_GetUserData(p);
5404   char eeeeee[100];
5405   char spaces[100];
5406   const int fillsize = (int)sizeof(eeeeee);
5407   assert_true(fillsize == (int)sizeof(spaces));
5408   memset(eeeeee, 'e', fillsize);
5409   memset(spaces, ' ', fillsize);
5410 
5411   XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5412   assert_true(parser != NULL);
5413   // pre-grow the buffer to avoid reparsing due to almost-fullness
5414   assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5415 
5416   struct element_decl_data testdata;
5417   testdata.parser = parser;
5418   testdata.count = 0;
5419   XML_SetUserData(parser, &testdata);
5420   XML_SetElementDeclHandler(parser, element_decl_counter);
5421 
5422   enum XML_Status status;
5423   // parse the initial text
5424   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5425   if (status != XML_STATUS_OK) {
5426     xml_failure(parser);
5427   }
5428   assert_true(testdata.count == 1); // first element should be done
5429 
5430   // ..and the start of the big token
5431   status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5432   if (status != XML_STATUS_OK) {
5433     xml_failure(parser);
5434   }
5435   assert_true(testdata.count == 1); // still just the first one
5436 
5437   // try to parse lots of 'e', but the token isn't finished
5438   for (int c = 0; c < 100; ++c) {
5439     status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5440     if (status != XML_STATUS_OK) {
5441       xml_failure(parser);
5442     }
5443   }
5444   assert_true(testdata.count == 1); // *still* just the first one
5445 
5446   // end the big token.
5447   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5448   if (status != XML_STATUS_OK) {
5449     xml_failure(parser);
5450   }
5451 
5452   if (enabled) {
5453     // In general, we may need to push more data to trigger a reparse attempt,
5454     // but in this test, the data is constructed to always require it.
5455     assert_true(testdata.count == 1); // or the test is incorrect
5456     // 2x the token length should suffice; the +1 covers the start and end.
5457     for (int c = 0; c < 101; ++c) {
5458       status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5459       if (status != XML_STATUS_OK) {
5460         xml_failure(parser);
5461       }
5462     }
5463   }
5464   assert_true(testdata.count == 2); // the big token should be done
5465 
5466   // parse the final text
5467   status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5468   if (status != XML_STATUS_OK) {
5469     xml_failure(parser);
5470   }
5471   assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5472 
5473   XML_ParserFree(parser);
5474   return XML_STATUS_OK;
5475 }
5476 
5477 START_TEST(test_reparse_deferral_is_inherited) {
5478   const char *const text
5479       = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5480   for (int enabled = 0; enabled <= 1; ++enabled) {
5481     set_subtest("deferral=%d", enabled);
5482 
5483     XML_Parser parser = XML_ParserCreate(NULL);
5484     assert_true(parser != NULL);
5485     XML_SetUserData(parser, (void *)&enabled);
5486     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5487     // this handler creates a sub-parser and checks that its deferral behavior
5488     // is what we expected, based on the value of `enabled` (in userdata).
5489     XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5490     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5491     if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5492       xml_failure(parser);
5493 
5494     XML_ParserFree(parser);
5495   }
5496 }
5497 END_TEST
5498 
5499 START_TEST(test_set_reparse_deferral_on_null_parser) {
5500   assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5501   assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5502   assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5503   assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5504   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5505               == XML_FALSE);
5506   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5507               == XML_FALSE);
5508 }
5509 END_TEST
5510 
5511 START_TEST(test_set_reparse_deferral_on_the_fly) {
5512   const char *const pre = "<d><x attr='";
5513   const char *const end = "'></x>";
5514   char iiiiii[100];
5515   const int fillsize = (int)sizeof(iiiiii);
5516   memset(iiiiii, 'i', fillsize);
5517 
5518   XML_Parser parser = XML_ParserCreate(NULL);
5519   assert_true(parser != NULL);
5520   assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5521 
5522   CharData storage;
5523   CharData_Init(&storage);
5524   XML_SetUserData(parser, &storage);
5525   XML_SetStartElementHandler(parser, start_element_event_handler);
5526 
5527   enum XML_Status status;
5528   // parse the start text
5529   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5530   if (status != XML_STATUS_OK) {
5531     xml_failure(parser);
5532   }
5533   CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5534 
5535   // try to parse some 'i', but the token isn't finished
5536   status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
5537   if (status != XML_STATUS_OK) {
5538     xml_failure(parser);
5539   }
5540   CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5541 
5542   // end the <x> token.
5543   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5544   if (status != XML_STATUS_OK) {
5545     xml_failure(parser);
5546   }
5547   CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
5548 
5549   // now change the heuristic setting and add *no* data
5550   assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
5551   // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
5552   status = XML_Parse(parser, "", 0, XML_FALSE);
5553   if (status != XML_STATUS_OK) {
5554     xml_failure(parser);
5555   }
5556   CharData_CheckXMLChars(&storage, XCS("dx"));
5557 
5558   XML_ParserFree(parser);
5559 }
5560 END_TEST
5561 
5562 START_TEST(test_set_bad_reparse_option) {
5563   XML_Parser parser = XML_ParserCreate(NULL);
5564   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
5565   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
5566   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
5567   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
5568   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
5569   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
5570   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
5571   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
5572   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
5573   XML_ParserFree(parser);
5574 }
5575 END_TEST
5576 
5577 static size_t g_totalAlloc = 0;
5578 static size_t g_biggestAlloc = 0;
5579 
5580 static void *
5581 counting_realloc(void *ptr, size_t size) {
5582   g_totalAlloc += size;
5583   if (size > g_biggestAlloc) {
5584     g_biggestAlloc = size;
5585   }
5586   return realloc(ptr, size);
5587 }
5588 
5589 static void *
5590 counting_malloc(size_t size) {
5591   return counting_realloc(NULL, size);
5592 }
5593 
5594 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
5595   if (g_chunkSize != 0) {
5596     // this test does not use SINGLE_BYTES, because it depends on very precise
5597     // buffer fills.
5598     return;
5599   }
5600   if (! g_reparseDeferralEnabledDefault) {
5601     return; // this test is irrelevant when the deferral heuristic is disabled.
5602   }
5603 
5604   const int document_length = 65536;
5605   char *const document = (char *)malloc(document_length);
5606 
5607   const XML_Memory_Handling_Suite memfuncs = {
5608       counting_malloc,
5609       counting_realloc,
5610       free,
5611   };
5612 
5613   const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
5614   const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
5615   const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
5616 
5617   for (const int *leading = leading_list; *leading >= 0; leading++) {
5618     for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
5619       for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
5620         set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
5621                     *fillsize);
5622         // start by checking that the test looks reasonably valid
5623         assert_true(*leading + *bigtoken <= document_length);
5624 
5625         // put 'x' everywhere; some will be overwritten by elements.
5626         memset(document, 'x', document_length);
5627         // maybe add an initial tag
5628         if (*leading) {
5629           assert_true(*leading >= 3); // or the test case is invalid
5630           memcpy(document, "<a>", 3);
5631         }
5632         // add the large token
5633         document[*leading + 0] = '<';
5634         document[*leading + 1] = 'b';
5635         memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
5636         document[*leading + *bigtoken - 1] = '>';
5637 
5638         // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
5639         const int expected_elem_total = 1 + (*leading ? 1 : 0);
5640 
5641         XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
5642         assert_true(parser != NULL);
5643 
5644         CharData storage;
5645         CharData_Init(&storage);
5646         XML_SetUserData(parser, &storage);
5647         XML_SetStartElementHandler(parser, start_element_event_handler);
5648 
5649         g_biggestAlloc = 0;
5650         g_totalAlloc = 0;
5651         int offset = 0;
5652         // fill data until the big token is covered (but not necessarily parsed)
5653         while (offset < *leading + *bigtoken) {
5654           assert_true(offset + *fillsize <= document_length);
5655           const enum XML_Status status
5656               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5657           if (status != XML_STATUS_OK) {
5658             xml_failure(parser);
5659           }
5660           offset += *fillsize;
5661         }
5662         // Now, check that we've had a buffer allocation that could fit the
5663         // context bytes and our big token. In order to detect a special case,
5664         // we need to know how many bytes of our big token were included in the
5665         // first push that contained _any_ bytes of the big token:
5666         const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
5667         if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
5668           // Special case: we aren't saving any context, and the whole big token
5669           // was covered by a single fill, so Expat may have parsed directly
5670           // from our input pointer, without allocating an internal buffer.
5671         } else if (*leading < XML_CONTEXT_BYTES) {
5672           assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
5673         } else {
5674           assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
5675         }
5676         // fill data until the big token is actually parsed
5677         while (storage.count < expected_elem_total) {
5678           const size_t alloc_before = g_totalAlloc;
5679           assert_true(offset + *fillsize <= document_length);
5680           const enum XML_Status status
5681               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5682           if (status != XML_STATUS_OK) {
5683             xml_failure(parser);
5684           }
5685           offset += *fillsize;
5686           // since all the bytes of the big token are already in the buffer,
5687           // the bufsize ceiling should make us finish its parsing without any
5688           // further buffer allocations. We assume that there will be no other
5689           // large allocations in this test.
5690           assert_true(g_totalAlloc - alloc_before < 4096);
5691         }
5692         // test-the-test: was our alloc even called?
5693         assert_true(g_totalAlloc > 0);
5694         // test-the-test: there shouldn't be any extra start elements
5695         assert_true(storage.count == expected_elem_total);
5696 
5697         XML_ParserFree(parser);
5698       }
5699     }
5700   }
5701   free(document);
5702 }
5703 END_TEST
5704 
5705 START_TEST(test_varying_buffer_fills) {
5706   const int KiB = 1024;
5707   const int MiB = 1024 * KiB;
5708   const int document_length = 16 * MiB;
5709   const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
5710 
5711   if (g_chunkSize != 0) {
5712     return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
5713   }
5714 
5715   char *const document = (char *)malloc(document_length);
5716   assert_true(document != NULL);
5717   memset(document, 'x', document_length);
5718   document[0] = '<';
5719   document[1] = 't';
5720   memset(&document[2], ' ', big - 2); // a very spacy token
5721   document[big - 1] = '>';
5722 
5723   // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
5724   // When reparse deferral is enabled, the final (negated) value is the expected
5725   // maximum number of bytes scanned in parse attempts.
5726   const int testcases[][30] = {
5727       {8 * MiB, -8 * MiB},
5728       {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
5729       // zero-size fills shouldn't trigger the bypass
5730       {4 * MiB, 0, 4 * MiB, -12 * MiB},
5731       {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
5732       {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
5733       // try to hit the buffer ceiling only once (at the end)
5734       {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
5735       // try to hit the same buffer ceiling multiple times
5736       {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
5737 
5738       // try to hit every ceiling, by always landing 1K shy of the buffer size
5739       {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
5740        128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
5741 
5742       // try to avoid every ceiling, by always landing 1B past the buffer size
5743       // the normal 2x heuristic threshold still forces parse attempts.
5744       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
5745        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
5746        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
5747        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
5748        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5749        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5750        2 * MiB, 4 * MiB,     // will attempt 8MiB + 1 ==> total 10M + 682K + 7
5751        -(10 * MiB + 682 * KiB + 7)},
5752       // try to avoid every ceiling again, except on our last fill.
5753       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
5754        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
5755        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
5756        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
5757        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5758        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5759        2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
5760        -(10 * MiB + 682 * KiB + 6)},
5761 
5762       // try to hit ceilings on the way multiple times
5763       {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
5764        512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
5765        1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1,   // 4 MiB buffer
5766        2 * MiB + 1, 1 * MiB, 512 * KiB,                    // 8 MiB buffer
5767        // we'll make a parse attempt at every parse call
5768        -(45 * MiB + 12)},
5769   };
5770   const int testcount = sizeof(testcases) / sizeof(testcases[0]);
5771   for (int test_i = 0; test_i < testcount; test_i++) {
5772     const int *fillsize = testcases[test_i];
5773     set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
5774                 fillsize[2], fillsize[3]);
5775     XML_Parser parser = XML_ParserCreate(NULL);
5776     assert_true(parser != NULL);
5777     g_parseAttempts = 0;
5778 
5779     CharData storage;
5780     CharData_Init(&storage);
5781     XML_SetUserData(parser, &storage);
5782     XML_SetStartElementHandler(parser, start_element_event_handler);
5783 
5784     int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
5785     int scanned_bytes = 0;   // sum of (buffered bytes at each actual parse)
5786     int offset = 0;
5787     while (*fillsize >= 0) {
5788       assert_true(offset + *fillsize <= document_length); // or test is invalid
5789       const unsigned attempts_before = g_parseAttempts;
5790       const enum XML_Status status
5791           = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5792       if (status != XML_STATUS_OK) {
5793         xml_failure(parser);
5794       }
5795       offset += *fillsize;
5796       fillsize++;
5797       assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
5798       worstcase_bytes += offset; // we might've tried to parse all pending bytes
5799       if (g_parseAttempts != attempts_before) {
5800         assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse
5801         assert_true(offset <= INT_MAX - scanned_bytes);      // avoid overflow
5802         scanned_bytes += offset; // we *did* try to parse all pending bytes
5803       }
5804     }
5805     assert_true(storage.count == 1); // the big token should've been parsed
5806     assert_true(scanned_bytes > 0);  // test-the-test: does our counter work?
5807     if (g_reparseDeferralEnabledDefault) {
5808       // heuristic is enabled; some XML_Parse calls may have deferred reparsing
5809       const int max_bytes_scanned = -*fillsize;
5810       if (scanned_bytes > max_bytes_scanned) {
5811         fprintf(stderr,
5812                 "bytes scanned in parse attempts: actual=%d limit=%d \n",
5813                 scanned_bytes, max_bytes_scanned);
5814         fail("too many bytes scanned in parse attempts");
5815       }
5816       assert_true(scanned_bytes <= worstcase_bytes);
5817     } else {
5818       // heuristic is disabled; every XML_Parse() will have reparsed
5819       assert_true(scanned_bytes == worstcase_bytes);
5820     }
5821 
5822     XML_ParserFree(parser);
5823   }
5824   free(document);
5825 }
5826 END_TEST
5827 
5828 void
5829 make_basic_test_case(Suite *s) {
5830   TCase *tc_basic = tcase_create("basic tests");
5831 
5832   suite_add_tcase(s, tc_basic);
5833   tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
5834 
5835   tcase_add_test(tc_basic, test_nul_byte);
5836   tcase_add_test(tc_basic, test_u0000_char);
5837   tcase_add_test(tc_basic, test_siphash_self);
5838   tcase_add_test(tc_basic, test_siphash_spec);
5839   tcase_add_test(tc_basic, test_bom_utf8);
5840   tcase_add_test(tc_basic, test_bom_utf16_be);
5841   tcase_add_test(tc_basic, test_bom_utf16_le);
5842   tcase_add_test(tc_basic, test_nobom_utf16_le);
5843   tcase_add_test(tc_basic, test_hash_collision);
5844   tcase_add_test(tc_basic, test_illegal_utf8);
5845   tcase_add_test(tc_basic, test_utf8_auto_align);
5846   tcase_add_test(tc_basic, test_utf16);
5847   tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
5848   tcase_add_test(tc_basic, test_not_utf16);
5849   tcase_add_test(tc_basic, test_bad_encoding);
5850   tcase_add_test(tc_basic, test_latin1_umlauts);
5851   tcase_add_test(tc_basic, test_long_utf8_character);
5852   tcase_add_test(tc_basic, test_long_latin1_attribute);
5853   tcase_add_test(tc_basic, test_long_ascii_attribute);
5854   /* Regression test for SF bug #491986. */
5855   tcase_add_test(tc_basic, test_danish_latin1);
5856   /* Regression test for SF bug #514281. */
5857   tcase_add_test(tc_basic, test_french_charref_hexidecimal);
5858   tcase_add_test(tc_basic, test_french_charref_decimal);
5859   tcase_add_test(tc_basic, test_french_latin1);
5860   tcase_add_test(tc_basic, test_french_utf8);
5861   tcase_add_test(tc_basic, test_utf8_false_rejection);
5862   tcase_add_test(tc_basic, test_line_number_after_parse);
5863   tcase_add_test(tc_basic, test_column_number_after_parse);
5864   tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
5865   tcase_add_test(tc_basic, test_line_number_after_error);
5866   tcase_add_test(tc_basic, test_column_number_after_error);
5867   tcase_add_test(tc_basic, test_really_long_lines);
5868   tcase_add_test(tc_basic, test_really_long_encoded_lines);
5869   tcase_add_test(tc_basic, test_end_element_events);
5870   tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
5871   tcase_add_test(tc_basic, test_attr_whitespace_normalization);
5872   tcase_add_test(tc_basic, test_xmldecl_misplaced);
5873   tcase_add_test(tc_basic, test_xmldecl_invalid);
5874   tcase_add_test(tc_basic, test_xmldecl_missing_attr);
5875   tcase_add_test(tc_basic, test_xmldecl_missing_value);
5876   tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
5877   tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
5878   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
5879   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
5880   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
5881   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
5882   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
5883   tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
5884   tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
5885   tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
5886   tcase_add_test(tc_basic,
5887                  test_wfc_undeclared_entity_with_external_subset_standalone);
5888   tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
5889   tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
5890   tcase_add_test(tc_basic, test_not_standalone_handler_reject);
5891   tcase_add_test(tc_basic, test_not_standalone_handler_accept);
5892   tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
5893   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
5894   tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
5895   tcase_add_test(tc_basic, test_dtd_attr_handling);
5896   tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
5897   tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
5898   tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
5899   tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
5900   tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
5901   tcase_add_test(tc_basic, test_good_cdata_ascii);
5902   tcase_add_test(tc_basic, test_good_cdata_utf16);
5903   tcase_add_test(tc_basic, test_good_cdata_utf16_le);
5904   tcase_add_test(tc_basic, test_long_cdata_utf16);
5905   tcase_add_test(tc_basic, test_multichar_cdata_utf16);
5906   tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
5907   tcase_add_test(tc_basic, test_bad_cdata);
5908   tcase_add_test(tc_basic, test_bad_cdata_utf16);
5909   tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
5910   tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
5911   tcase_add_test(tc_basic, test_memory_allocation);
5912   tcase_add_test__if_xml_ge(tc_basic, test_default_current);
5913   tcase_add_test(tc_basic, test_dtd_elements);
5914   tcase_add_test(tc_basic, test_dtd_elements_nesting);
5915   tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
5916   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
5917   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
5918   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
5919   tcase_add_test__ifdef_xml_dtd(tc_basic,
5920                                 test_foreign_dtd_without_external_subset);
5921   tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
5922   tcase_add_test(tc_basic, test_set_base);
5923   tcase_add_test(tc_basic, test_attributes);
5924   tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
5925   tcase_add_test(tc_basic, test_resume_invalid_parse);
5926   tcase_add_test(tc_basic, test_resume_resuspended);
5927   tcase_add_test(tc_basic, test_cdata_default);
5928   tcase_add_test(tc_basic, test_subordinate_reset);
5929   tcase_add_test(tc_basic, test_subordinate_suspend);
5930   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
5931   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
5932   tcase_add_test__ifdef_xml_dtd(tc_basic,
5933                                 test_ext_entity_invalid_suspended_parse);
5934   tcase_add_test(tc_basic, test_explicit_encoding);
5935   tcase_add_test(tc_basic, test_trailing_cr);
5936   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
5937   tcase_add_test(tc_basic, test_trailing_rsqb);
5938   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
5939   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
5940   tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
5941   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
5942   tcase_add_test(tc_basic, test_empty_parse);
5943   tcase_add_test(tc_basic, test_get_buffer_1);
5944   tcase_add_test(tc_basic, test_get_buffer_2);
5945 #if XML_CONTEXT_BYTES > 0
5946   tcase_add_test(tc_basic, test_get_buffer_3_overflow);
5947 #endif
5948   tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
5949   tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
5950   tcase_add_test(tc_basic, test_byte_info_at_end);
5951   tcase_add_test(tc_basic, test_byte_info_at_error);
5952   tcase_add_test(tc_basic, test_byte_info_at_cdata);
5953   tcase_add_test(tc_basic, test_predefined_entities);
5954   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
5955   tcase_add_test(tc_basic, test_not_predefined_entities);
5956   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
5957   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
5958   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
5959   tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
5960   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
5961   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
5962   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
5963   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
5964   tcase_add_test(tc_basic, test_bad_public_doctype);
5965   tcase_add_test(tc_basic, test_attribute_enum_value);
5966   tcase_add_test(tc_basic, test_predefined_entity_redefinition);
5967   tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
5968   tcase_add_test(tc_basic, test_public_notation_no_sysid);
5969   tcase_add_test(tc_basic, test_nested_groups);
5970   tcase_add_test(tc_basic, test_group_choice);
5971   tcase_add_test(tc_basic, test_standalone_parameter_entity);
5972   tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
5973   tcase_add_test__ifdef_xml_dtd(tc_basic,
5974                                 test_recursive_external_parameter_entity);
5975   tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
5976   tcase_add_test(tc_basic, test_suspend_xdecl);
5977   tcase_add_test(tc_basic, test_abort_epilog);
5978   tcase_add_test(tc_basic, test_abort_epilog_2);
5979   tcase_add_test(tc_basic, test_suspend_epilog);
5980   tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
5981   tcase_add_test(tc_basic, test_unfinished_epilog);
5982   tcase_add_test(tc_basic, test_partial_char_in_epilog);
5983   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
5984   tcase_add_test__ifdef_xml_dtd(tc_basic,
5985                                 test_suspend_resume_internal_entity_issue_629);
5986   tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
5987   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
5988   tcase_add_test(tc_basic, test_restart_on_error);
5989   tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
5990   tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
5991   tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
5992   tcase_add_test(tc_basic, test_standalone_internal_entity);
5993   tcase_add_test(tc_basic, test_skipped_external_entity);
5994   tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
5995   tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
5996   tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
5997   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
5998   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
5999   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6000   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6001   tcase_add_test(tc_basic, test_pi_handled_in_default);
6002   tcase_add_test(tc_basic, test_comment_handled_in_default);
6003   tcase_add_test(tc_basic, test_pi_yml);
6004   tcase_add_test(tc_basic, test_pi_xnl);
6005   tcase_add_test(tc_basic, test_pi_xmm);
6006   tcase_add_test(tc_basic, test_utf16_pi);
6007   tcase_add_test(tc_basic, test_utf16_be_pi);
6008   tcase_add_test(tc_basic, test_utf16_be_comment);
6009   tcase_add_test(tc_basic, test_utf16_le_comment);
6010   tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6011   tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6012   tcase_add_test(tc_basic, test_unknown_encoding_success);
6013   tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6014   tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6015   tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6016   tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6017   tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6018   tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6019   tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6020   tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6021   tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6022   tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6023   tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6024   tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6025   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6026   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6027   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6028   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6029   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6030   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6031   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6032   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6033   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6034   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6035   tcase_add_test(tc_basic, test_utf8_in_start_tags);
6036   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6037   tcase_add_test(tc_basic, test_utf16_attribute);
6038   tcase_add_test(tc_basic, test_utf16_second_attr);
6039   tcase_add_test(tc_basic, test_attr_after_solidus);
6040   tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6041   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6042   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6043   tcase_add_test(tc_basic, test_bad_doctype);
6044   tcase_add_test(tc_basic, test_bad_doctype_utf8);
6045   tcase_add_test(tc_basic, test_bad_doctype_utf16);
6046   tcase_add_test(tc_basic, test_bad_doctype_plus);
6047   tcase_add_test(tc_basic, test_bad_doctype_star);
6048   tcase_add_test(tc_basic, test_bad_doctype_query);
6049   tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6050   tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6051   tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6052   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6053   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6054   tcase_add_test(tc_basic, test_short_doctype);
6055   tcase_add_test(tc_basic, test_short_doctype_2);
6056   tcase_add_test(tc_basic, test_short_doctype_3);
6057   tcase_add_test(tc_basic, test_long_doctype);
6058   tcase_add_test(tc_basic, test_bad_entity);
6059   tcase_add_test(tc_basic, test_bad_entity_2);
6060   tcase_add_test(tc_basic, test_bad_entity_3);
6061   tcase_add_test(tc_basic, test_bad_entity_4);
6062   tcase_add_test(tc_basic, test_bad_notation);
6063   tcase_add_test(tc_basic, test_default_doctype_handler);
6064   tcase_add_test(tc_basic, test_empty_element_abort);
6065   tcase_add_test__ifdef_xml_dtd(tc_basic,
6066                                 test_pool_integrity_with_unfinished_attr);
6067   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6068   tcase_add_test(tc_basic, test_big_tokens_take_linear_time);
6069   tcase_add_test(tc_basic, test_set_reparse_deferral);
6070   tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6071   tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6072   tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6073   tcase_add_test(tc_basic, test_set_bad_reparse_option);
6074   tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6075   tcase_add_test(tc_basic, test_varying_buffer_fills);
6076 }
6077