xref: /freebsd/contrib/expat/tests/basic_tests.c (revision dd21556857e8d40f66bf5ad54754d9d52669ebf7)
1 /* Tests in the "basic" test case for the Expat test suite
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
17    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21    Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22    Licensed under the MIT license:
23 
24    Permission is  hereby granted,  free of charge,  to any  person obtaining
25    a  copy  of  this  software   and  associated  documentation  files  (the
26    "Software"),  to  deal in  the  Software  without restriction,  including
27    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
28    distribute, sublicense, and/or sell copies of the Software, and to permit
29    persons  to whom  the Software  is  furnished to  do so,  subject to  the
30    following conditions:
31 
32    The above copyright  notice and this permission notice  shall be included
33    in all copies or substantial portions of the Software.
34 
35    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
36    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
37    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
40    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41    USE OR OTHER DEALINGS IN THE SOFTWARE.
42 */
43 
44 #if defined(NDEBUG)
45 #  undef NDEBUG /* because test suite relies on assert(...) at the moment */
46 #endif
47 
48 #include <assert.h>
49 
50 #include <stdio.h>
51 #include <string.h>
52 #include <time.h>
53 
54 #if ! defined(__cplusplus)
55 #  include <stdbool.h>
56 #endif
57 
58 #include "expat_config.h"
59 
60 #include "expat.h"
61 #include "internal.h"
62 #include "minicheck.h"
63 #include "structdata.h"
64 #include "common.h"
65 #include "dummy.h"
66 #include "handlers.h"
67 #include "siphash.h"
68 #include "basic_tests.h"
69 
70 static void
71 basic_setup(void) {
72   g_parser = XML_ParserCreate(NULL);
73   if (g_parser == NULL)
74     fail("Parser not created.");
75 }
76 
77 /*
78  * Character & encoding tests.
79  */
80 
81 START_TEST(test_nul_byte) {
82   char text[] = "<doc>\0</doc>";
83 
84   /* test that a NUL byte (in US-ASCII data) is an error */
85   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
86       == XML_STATUS_OK)
87     fail("Parser did not report error on NUL-byte.");
88   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
89     xml_failure(g_parser);
90 }
91 END_TEST
92 
93 START_TEST(test_u0000_char) {
94   /* test that a NUL byte (in US-ASCII data) is an error */
95   expect_failure("<doc>&#0;</doc>", XML_ERROR_BAD_CHAR_REF,
96                  "Parser did not report error on NUL-byte.");
97 }
98 END_TEST
99 
100 START_TEST(test_siphash_self) {
101   if (! sip24_valid())
102     fail("SipHash self-test failed");
103 }
104 END_TEST
105 
106 START_TEST(test_siphash_spec) {
107   /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
108   const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
109                          "\x0a\x0b\x0c\x0d\x0e";
110   const size_t len = sizeof(message) - 1;
111   const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
112   struct siphash state;
113   struct sipkey key;
114 
115   sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
116                   "\x0a\x0b\x0c\x0d\x0e\x0f");
117   sip24_init(&state, &key);
118 
119   /* Cover spread across calls */
120   sip24_update(&state, message, 4);
121   sip24_update(&state, message + 4, len - 4);
122 
123   /* Cover null length */
124   sip24_update(&state, message, 0);
125 
126   if (sip24_final(&state) != expected)
127     fail("sip24_final failed spec test\n");
128 
129   /* Cover wrapper */
130   if (siphash24(message, len, &key) != expected)
131     fail("siphash24 failed spec test\n");
132 }
133 END_TEST
134 
135 START_TEST(test_bom_utf8) {
136   /* This test is really just making sure we don't core on a UTF-8 BOM. */
137   const char *text = "\357\273\277<e/>";
138 
139   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
140       == XML_STATUS_ERROR)
141     xml_failure(g_parser);
142 }
143 END_TEST
144 
145 START_TEST(test_bom_utf16_be) {
146   char text[] = "\376\377\0<\0e\0/\0>";
147 
148   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
149       == XML_STATUS_ERROR)
150     xml_failure(g_parser);
151 }
152 END_TEST
153 
154 START_TEST(test_bom_utf16_le) {
155   char text[] = "\377\376<\0e\0/\0>\0";
156 
157   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
158       == XML_STATUS_ERROR)
159     xml_failure(g_parser);
160 }
161 END_TEST
162 
163 START_TEST(test_nobom_utf16_le) {
164   char text[] = " \0<\0e\0/\0>\0";
165 
166   if (g_chunkSize == 1) {
167     // TODO: with just the first byte, we can't tell the difference between
168     // UTF-16-LE and UTF-8. Avoid the failure for now.
169     return;
170   }
171 
172   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
173       == XML_STATUS_ERROR)
174     xml_failure(g_parser);
175 }
176 END_TEST
177 
178 START_TEST(test_hash_collision) {
179   /* For full coverage of the lookup routine, we need to ensure a
180    * hash collision even though we can only tell that we have one
181    * through breakpoint debugging or coverage statistics.  The
182    * following will cause a hash collision on machines with a 64-bit
183    * long type; others will have to experiment.  The full coverage
184    * tests invoked from qa.sh usually provide a hash collision, but
185    * not always.  This is an attempt to provide insurance.
186    */
187 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
188   const char *text
189       = "<doc>\n"
190         "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
191         "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
192         "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
193         "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
194         "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
195         "<d8>This triggers the table growth and collides with b2</d8>\n"
196         "</doc>\n";
197 
198   XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
199   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
200       == XML_STATUS_ERROR)
201     xml_failure(g_parser);
202 }
203 END_TEST
204 #undef COLLIDING_HASH_SALT
205 
206 /* Regression test for SF bug #491986. */
207 START_TEST(test_danish_latin1) {
208   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
209                      "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
210 #ifdef XML_UNICODE
211   const XML_Char *expected
212       = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
213 #else
214   const XML_Char *expected
215       = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
216 #endif
217   run_character_check(text, expected);
218 }
219 END_TEST
220 
221 /* Regression test for SF bug #514281. */
222 START_TEST(test_french_charref_hexidecimal) {
223   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
224                      "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
225 #ifdef XML_UNICODE
226   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
227 #else
228   const XML_Char *expected
229       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
230 #endif
231   run_character_check(text, expected);
232 }
233 END_TEST
234 
235 START_TEST(test_french_charref_decimal) {
236   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
237                      "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
238 #ifdef XML_UNICODE
239   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
240 #else
241   const XML_Char *expected
242       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
243 #endif
244   run_character_check(text, expected);
245 }
246 END_TEST
247 
248 START_TEST(test_french_latin1) {
249   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
250                      "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
251 #ifdef XML_UNICODE
252   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
253 #else
254   const XML_Char *expected
255       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
256 #endif
257   run_character_check(text, expected);
258 }
259 END_TEST
260 
261 START_TEST(test_french_utf8) {
262   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
263                      "<doc>\xC3\xA9</doc>";
264 #ifdef XML_UNICODE
265   const XML_Char *expected = XCS("\x00e9");
266 #else
267   const XML_Char *expected = XCS("\xC3\xA9");
268 #endif
269   run_character_check(text, expected);
270 }
271 END_TEST
272 
273 /* Regression test for SF bug #600479.
274    XXX There should be a test that exercises all legal XML Unicode
275    characters as PCDATA and attribute value content, and XML Name
276    characters as part of element and attribute names.
277 */
278 START_TEST(test_utf8_false_rejection) {
279   const char *text = "<doc>\xEF\xBA\xBF</doc>";
280 #ifdef XML_UNICODE
281   const XML_Char *expected = XCS("\xfebf");
282 #else
283   const XML_Char *expected = XCS("\xEF\xBA\xBF");
284 #endif
285   run_character_check(text, expected);
286 }
287 END_TEST
288 
289 /* Regression test for SF bug #477667.
290    This test assures that any 8-bit character followed by a 7-bit
291    character will not be mistakenly interpreted as a valid UTF-8
292    sequence.
293 */
294 START_TEST(test_illegal_utf8) {
295   char text[100];
296   int i;
297 
298   for (i = 128; i <= 255; ++i) {
299     snprintf(text, sizeof(text), "<e>%ccd</e>", i);
300     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
301         == XML_STATUS_OK) {
302       snprintf(text, sizeof(text),
303                "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
304                i);
305       fail(text);
306     } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
307       xml_failure(g_parser);
308     /* Reset the parser since we use the same parser repeatedly. */
309     XML_ParserReset(g_parser, NULL);
310   }
311 }
312 END_TEST
313 
314 /* Examples, not masks: */
315 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
316 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
317 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
318 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
319 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
320 
321 START_TEST(test_utf8_auto_align) {
322   struct TestCase {
323     ptrdiff_t expectedMovementInChars;
324     const char *input;
325   };
326 
327   struct TestCase cases[] = {
328       {00, ""},
329 
330       {00, UTF8_LEAD_1},
331 
332       {-1, UTF8_LEAD_2},
333       {00, UTF8_LEAD_2 UTF8_FOLLOW},
334 
335       {-1, UTF8_LEAD_3},
336       {-2, UTF8_LEAD_3 UTF8_FOLLOW},
337       {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
338 
339       {-1, UTF8_LEAD_4},
340       {-2, UTF8_LEAD_4 UTF8_FOLLOW},
341       {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
342       {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
343   };
344 
345   size_t i = 0;
346   bool success = true;
347   for (; i < sizeof(cases) / sizeof(*cases); i++) {
348     const char *fromLim = cases[i].input + strlen(cases[i].input);
349     const char *const fromLimInitially = fromLim;
350     ptrdiff_t actualMovementInChars;
351 
352     _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
353 
354     actualMovementInChars = (fromLim - fromLimInitially);
355     if (actualMovementInChars != cases[i].expectedMovementInChars) {
356       size_t j = 0;
357       success = false;
358       printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
359              ", actually moved by %2d chars: \"",
360              (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
361              (int)actualMovementInChars);
362       for (; j < strlen(cases[i].input); j++) {
363         printf("\\x%02x", (unsigned char)cases[i].input[j]);
364       }
365       printf("\"\n");
366     }
367   }
368 
369   if (! success) {
370     fail("UTF-8 auto-alignment is not bullet-proof\n");
371   }
372 }
373 END_TEST
374 
375 START_TEST(test_utf16) {
376   /* <?xml version="1.0" encoding="UTF-16"?>
377    *  <doc a='123'>some {A} text</doc>
378    *
379    * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
380    */
381   char text[]
382       = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
383         "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
384         "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
385         "\000'\000?\000>\000\n"
386         "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
387         "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
388         "<\000/\000d\000o\000c\000>";
389 #ifdef XML_UNICODE
390   const XML_Char *expected = XCS("some \xff21 text");
391 #else
392   const XML_Char *expected = XCS("some \357\274\241 text");
393 #endif
394   CharData storage;
395 
396   CharData_Init(&storage);
397   XML_SetUserData(g_parser, &storage);
398   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
399   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
400       == XML_STATUS_ERROR)
401     xml_failure(g_parser);
402   CharData_CheckXMLChars(&storage, expected);
403 }
404 END_TEST
405 
406 START_TEST(test_utf16_le_epilog_newline) {
407   unsigned int first_chunk_bytes = 17;
408   char text[] = "\xFF\xFE"                  /* BOM */
409                 "<\000e\000/\000>\000"      /* document element */
410                 "\r\000\n\000\r\000\n\000"; /* epilog */
411 
412   if (first_chunk_bytes >= sizeof(text) - 1)
413     fail("bad value of first_chunk_bytes");
414   if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE)
415       == XML_STATUS_ERROR)
416     xml_failure(g_parser);
417   else {
418     enum XML_Status rc;
419     rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
420                                  sizeof(text) - first_chunk_bytes - 1,
421                                  XML_TRUE);
422     if (rc == XML_STATUS_ERROR)
423       xml_failure(g_parser);
424   }
425 }
426 END_TEST
427 
428 /* Test that an outright lie in the encoding is faulted */
429 START_TEST(test_not_utf16) {
430   const char *text = "<?xml version='1.0' encoding='utf-16'?>"
431                      "<doc>Hi</doc>";
432 
433   /* Use a handler to provoke the appropriate code paths */
434   XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
435   expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
436                  "UTF-16 declared in UTF-8 not faulted");
437 }
438 END_TEST
439 
440 /* Test that an unknown encoding is rejected */
441 START_TEST(test_bad_encoding) {
442   const char *text = "<doc>Hi</doc>";
443 
444   if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
445     fail("XML_SetEncoding failed");
446   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
447                  "Unknown encoding not faulted");
448 }
449 END_TEST
450 
451 /* Regression test for SF bug #481609, #774028. */
452 START_TEST(test_latin1_umlauts) {
453   const char *text
454       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
455         "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
456         "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
457 #ifdef XML_UNICODE
458   /* Expected results in UTF-16 */
459   const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
460       XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
461 #else
462   /* Expected results in UTF-8 */
463   const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
464       XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
465 #endif
466 
467   run_character_check(text, expected);
468   XML_ParserReset(g_parser, NULL);
469   run_attribute_check(text, expected);
470   /* Repeat with a default handler */
471   XML_ParserReset(g_parser, NULL);
472   XML_SetDefaultHandler(g_parser, dummy_default_handler);
473   run_character_check(text, expected);
474   XML_ParserReset(g_parser, NULL);
475   XML_SetDefaultHandler(g_parser, dummy_default_handler);
476   run_attribute_check(text, expected);
477 }
478 END_TEST
479 
480 /* Test that an element name with a 4-byte UTF-8 character is rejected */
481 START_TEST(test_long_utf8_character) {
482   const char *text
483       = "<?xml version='1.0' encoding='utf-8'?>\n"
484         /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
485         "<do\xf0\x90\x80\x80/>";
486   expect_failure(text, XML_ERROR_INVALID_TOKEN,
487                  "4-byte UTF-8 character in element name not faulted");
488 }
489 END_TEST
490 
491 /* Test that a long latin-1 attribute (too long to convert in one go)
492  * is correctly converted
493  */
494 START_TEST(test_long_latin1_attribute) {
495   const char *text
496       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
497         "<doc att='"
498         /* 64 characters per line */
499         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
500         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
512         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
513         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
514         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
515         /* Last character splits across a buffer boundary */
516         "\xe4'>\n</doc>";
517 
518   const XML_Char *expected =
519       /* 64 characters per line */
520       /* clang-format off */
521         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
522         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
523         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
524         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
525         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
526         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
527         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
528         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
529         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
530         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
531         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
532         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
533         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
534         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
535         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
536         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
537   /* clang-format on */
538 #ifdef XML_UNICODE
539                                                   XCS("\x00e4");
540 #else
541                                                   XCS("\xc3\xa4");
542 #endif
543 
544   run_attribute_check(text, expected);
545 }
546 END_TEST
547 
548 /* Test that a long ASCII attribute (too long to convert in one go)
549  * is correctly converted
550  */
551 START_TEST(test_long_ascii_attribute) {
552   const char *text
553       = "<?xml version='1.0' encoding='us-ascii'?>\n"
554         "<doc att='"
555         /* 64 characters per line */
556         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
557         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
558         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
559         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
560         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
561         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
562         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
563         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
564         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
565         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
566         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
567         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
568         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
569         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
570         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
571         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
572         "01234'>\n</doc>";
573   const XML_Char *expected =
574       /* 64 characters per line */
575       /* clang-format off */
576         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
577         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
578         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
579         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
580         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
581         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
582         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
583         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
584         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
585         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
586         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
587         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
588         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
589         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
590         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
591         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
592         XCS("01234");
593   /* clang-format on */
594 
595   run_attribute_check(text, expected);
596 }
597 END_TEST
598 
599 /* Regression test #1 for SF bug #653180. */
600 START_TEST(test_line_number_after_parse) {
601   const char *text = "<tag>\n"
602                      "\n"
603                      "\n</tag>";
604   XML_Size lineno;
605 
606   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
607       == XML_STATUS_ERROR)
608     xml_failure(g_parser);
609   lineno = XML_GetCurrentLineNumber(g_parser);
610   if (lineno != 4) {
611     char buffer[100];
612     snprintf(buffer, sizeof(buffer),
613              "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
614     fail(buffer);
615   }
616 }
617 END_TEST
618 
619 /* Regression test #2 for SF bug #653180. */
620 START_TEST(test_column_number_after_parse) {
621   const char *text = "<tag></tag>";
622   XML_Size colno;
623 
624   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
625       == XML_STATUS_ERROR)
626     xml_failure(g_parser);
627   colno = XML_GetCurrentColumnNumber(g_parser);
628   if (colno != 11) {
629     char buffer[100];
630     snprintf(buffer, sizeof(buffer),
631              "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
632     fail(buffer);
633   }
634 }
635 END_TEST
636 
637 /* Regression test #3 for SF bug #653180. */
638 START_TEST(test_line_and_column_numbers_inside_handlers) {
639   const char *text = "<a>\n"      /* Unix end-of-line */
640                      "  <b>\r\n"  /* Windows end-of-line */
641                      "    <c/>\r" /* Mac OS end-of-line */
642                      "  </b>\n"
643                      "  <d>\n"
644                      "    <f/>\n"
645                      "  </d>\n"
646                      "</a>";
647   const StructDataEntry expected[]
648       = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
649          {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
650          {XCS("b"), 2, 4, STRUCT_END_TAG},   {XCS("d"), 2, 5, STRUCT_START_TAG},
651          {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
652          {XCS("d"), 2, 7, STRUCT_END_TAG},   {XCS("a"), 0, 8, STRUCT_END_TAG}};
653   const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
654   StructData storage;
655 
656   StructData_Init(&storage);
657   XML_SetUserData(g_parser, &storage);
658   XML_SetStartElementHandler(g_parser, start_element_event_handler2);
659   XML_SetEndElementHandler(g_parser, end_element_event_handler2);
660   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
661       == XML_STATUS_ERROR)
662     xml_failure(g_parser);
663 
664   StructData_CheckItems(&storage, expected, expected_count);
665   StructData_Dispose(&storage);
666 }
667 END_TEST
668 
669 /* Regression test #4 for SF bug #653180. */
670 START_TEST(test_line_number_after_error) {
671   const char *text = "<a>\n"
672                      "  <b>\n"
673                      "  </a>"; /* missing </b> */
674   XML_Size lineno;
675   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
676       != XML_STATUS_ERROR)
677     fail("Expected a parse error");
678 
679   lineno = XML_GetCurrentLineNumber(g_parser);
680   if (lineno != 3) {
681     char buffer[100];
682     snprintf(buffer, sizeof(buffer),
683              "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
684     fail(buffer);
685   }
686 }
687 END_TEST
688 
689 /* Regression test #5 for SF bug #653180. */
690 START_TEST(test_column_number_after_error) {
691   const char *text = "<a>\n"
692                      "  <b>\n"
693                      "  </a>"; /* missing </b> */
694   XML_Size colno;
695   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
696       != XML_STATUS_ERROR)
697     fail("Expected a parse error");
698 
699   colno = XML_GetCurrentColumnNumber(g_parser);
700   if (colno != 4) {
701     char buffer[100];
702     snprintf(buffer, sizeof(buffer),
703              "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
704     fail(buffer);
705   }
706 }
707 END_TEST
708 
709 /* Regression test for SF bug #478332. */
710 START_TEST(test_really_long_lines) {
711   /* This parses an input line longer than INIT_DATA_BUF_SIZE
712      characters long (defined to be 1024 in xmlparse.c).  We take a
713      really cheesy approach to building the input buffer, because
714      this avoids writing bugs in buffer-filling code.
715   */
716   const char *text
717       = "<e>"
718         /* 64 chars */
719         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
720         /* until we have at least 1024 characters on the line: */
721         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
722         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
723         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
724         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
725         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
726         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
727         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
728         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
729         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
730         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
731         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
732         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
733         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
734         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
735         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
736         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
737         "</e>";
738   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
739       == XML_STATUS_ERROR)
740     xml_failure(g_parser);
741 }
742 END_TEST
743 
744 /* Test cdata processing across a buffer boundary */
745 START_TEST(test_really_long_encoded_lines) {
746   /* As above, except that we want to provoke an output buffer
747    * overflow with a non-trivial encoding.  For this we need to pass
748    * the whole cdata in one go, not byte-by-byte.
749    */
750   void *buffer;
751   const char *text
752       = "<?xml version='1.0' encoding='iso-8859-1'?>"
753         "<e>"
754         /* 64 chars */
755         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756         /* until we have at least 1024 characters on the line: */
757         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
765         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
766         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
767         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
768         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
769         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
770         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
771         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
772         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
773         "</e>";
774   int parse_len = (int)strlen(text);
775 
776   /* Need a cdata handler to provoke the code path we want to test */
777   XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
778   buffer = XML_GetBuffer(g_parser, parse_len);
779   if (buffer == NULL)
780     fail("Could not allocate parse buffer");
781   assert(buffer != NULL);
782   memcpy(buffer, text, parse_len);
783   if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
784     xml_failure(g_parser);
785 }
786 END_TEST
787 
788 /*
789  * Element event tests.
790  */
791 
792 START_TEST(test_end_element_events) {
793   const char *text = "<a><b><c/></b><d><f/></d></a>";
794   const XML_Char *expected = XCS("/c/b/f/d/a");
795   CharData storage;
796 
797   CharData_Init(&storage);
798   XML_SetUserData(g_parser, &storage);
799   XML_SetEndElementHandler(g_parser, end_element_event_handler);
800   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
801       == XML_STATUS_ERROR)
802     xml_failure(g_parser);
803   CharData_CheckXMLChars(&storage, expected);
804 }
805 END_TEST
806 
807 /*
808  * Attribute tests.
809  */
810 
811 /* Helper used by the following tests; this checks any "attr" and "refs"
812    attributes to make sure whitespace has been normalized.
813 
814    Return true if whitespace has been normalized in a string, using
815    the rules for attribute value normalization.  The 'is_cdata' flag
816    is needed since CDATA attributes don't need to have multiple
817    whitespace characters collapsed to a single space, while other
818    attribute data types do.  (Section 3.3.3 of the recommendation.)
819 */
820 static int
821 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
822   int blanks = 0;
823   int at_start = 1;
824   while (*s) {
825     if (*s == XCS(' '))
826       ++blanks;
827     else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
828       return 0;
829     else {
830       if (at_start) {
831         at_start = 0;
832         if (blanks && ! is_cdata)
833           /* illegal leading blanks */
834           return 0;
835       } else if (blanks > 1 && ! is_cdata)
836         return 0;
837       blanks = 0;
838     }
839     ++s;
840   }
841   if (blanks && ! is_cdata)
842     return 0;
843   return 1;
844 }
845 
846 /* Check the attribute whitespace checker: */
847 START_TEST(test_helper_is_whitespace_normalized) {
848   assert(is_whitespace_normalized(XCS("abc"), 0));
849   assert(is_whitespace_normalized(XCS("abc"), 1));
850   assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
851   assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
852   assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
853   assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
854   assert(! is_whitespace_normalized(XCS("abc  def ghi"), 0));
855   assert(is_whitespace_normalized(XCS("abc  def ghi"), 1));
856   assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
857   assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
858   assert(! is_whitespace_normalized(XCS(" "), 0));
859   assert(is_whitespace_normalized(XCS(" "), 1));
860   assert(! is_whitespace_normalized(XCS("\t"), 0));
861   assert(! is_whitespace_normalized(XCS("\t"), 1));
862   assert(! is_whitespace_normalized(XCS("\n"), 0));
863   assert(! is_whitespace_normalized(XCS("\n"), 1));
864   assert(! is_whitespace_normalized(XCS("\r"), 0));
865   assert(! is_whitespace_normalized(XCS("\r"), 1));
866   assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
867 }
868 END_TEST
869 
870 static void XMLCALL
871 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
872                                           const XML_Char **atts) {
873   int i;
874   UNUSED_P(userData);
875   UNUSED_P(name);
876   for (i = 0; atts[i] != NULL; i += 2) {
877     const XML_Char *attrname = atts[i];
878     const XML_Char *value = atts[i + 1];
879     if (xcstrcmp(XCS("attr"), attrname) == 0
880         || xcstrcmp(XCS("ents"), attrname) == 0
881         || xcstrcmp(XCS("refs"), attrname) == 0) {
882       if (! is_whitespace_normalized(value, 0)) {
883         char buffer[256];
884         snprintf(buffer, sizeof(buffer),
885                  "attribute value not normalized: %" XML_FMT_STR
886                  "='%" XML_FMT_STR "'",
887                  attrname, value);
888         fail(buffer);
889       }
890     }
891   }
892 }
893 
894 START_TEST(test_attr_whitespace_normalization) {
895   const char *text
896       = "<!DOCTYPE doc [\n"
897         "  <!ATTLIST doc\n"
898         "            attr NMTOKENS #REQUIRED\n"
899         "            ents ENTITIES #REQUIRED\n"
900         "            refs IDREFS   #REQUIRED>\n"
901         "]>\n"
902         "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
903         "     ents=' ent-1   \t\r\n"
904         "            ent-2  ' >\n"
905         "  <e id='id-1'/>\n"
906         "  <e id='id-2'/>\n"
907         "</doc>";
908 
909   XML_SetStartElementHandler(g_parser,
910                              check_attr_contains_normalized_whitespace);
911   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
912       == XML_STATUS_ERROR)
913     xml_failure(g_parser);
914 }
915 END_TEST
916 
917 /*
918  * XML declaration tests.
919  */
920 
921 START_TEST(test_xmldecl_misplaced) {
922   expect_failure("\n"
923                  "<?xml version='1.0'?>\n"
924                  "<a/>",
925                  XML_ERROR_MISPLACED_XML_PI,
926                  "failed to report misplaced XML declaration");
927 }
928 END_TEST
929 
930 START_TEST(test_xmldecl_invalid) {
931   expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
932                  "Failed to report invalid XML declaration");
933 }
934 END_TEST
935 
936 START_TEST(test_xmldecl_missing_attr) {
937   expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
938                  "Failed to report missing XML declaration attribute");
939 }
940 END_TEST
941 
942 START_TEST(test_xmldecl_missing_value) {
943   expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
944                  "<doc/>",
945                  XML_ERROR_XML_DECL,
946                  "Failed to report missing attribute value");
947 }
948 END_TEST
949 
950 /* Regression test for SF bug #584832. */
951 START_TEST(test_unknown_encoding_internal_entity) {
952   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
953                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
954                      "<test a='&foo;'/>";
955 
956   XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
957   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
958       == XML_STATUS_ERROR)
959     xml_failure(g_parser);
960 }
961 END_TEST
962 
963 /* Test unrecognised encoding handler */
964 START_TEST(test_unrecognised_encoding_internal_entity) {
965   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
966                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
967                      "<test a='&foo;'/>";
968 
969   XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
970   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
971       != XML_STATUS_ERROR)
972     fail("Unrecognised encoding not rejected");
973 }
974 END_TEST
975 
976 /* Regression test for SF bug #620106. */
977 START_TEST(test_ext_entity_set_encoding) {
978   const char *text = "<!DOCTYPE doc [\n"
979                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
980                      "]>\n"
981                      "<doc>&en;</doc>";
982   ExtTest test_data
983       = {/* This text says it's an unsupported encoding, but it's really
984             UTF-8, which we tell Expat using XML_SetEncoding().
985          */
986          "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
987 #ifdef XML_UNICODE
988   const XML_Char *expected = XCS("\x00e9");
989 #else
990   const XML_Char *expected = XCS("\xc3\xa9");
991 #endif
992 
993   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
994   run_ext_character_check(text, &test_data, expected);
995 }
996 END_TEST
997 
998 /* Test external entities with no handler */
999 START_TEST(test_ext_entity_no_handler) {
1000   const char *text = "<!DOCTYPE doc [\n"
1001                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1002                      "]>\n"
1003                      "<doc>&en;</doc>";
1004 
1005   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1006   run_character_check(text, XCS(""));
1007 }
1008 END_TEST
1009 
1010 /* Test UTF-8 BOM is accepted */
1011 START_TEST(test_ext_entity_set_bom) {
1012   const char *text = "<!DOCTYPE doc [\n"
1013                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1014                      "]>\n"
1015                      "<doc>&en;</doc>";
1016   ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1017                        "<?xml encoding='iso-8859-3'?>"
1018                        "\xC3\xA9",
1019                        XCS("utf-8"), NULL};
1020 #ifdef XML_UNICODE
1021   const XML_Char *expected = XCS("\x00e9");
1022 #else
1023   const XML_Char *expected = XCS("\xc3\xa9");
1024 #endif
1025 
1026   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1027   run_ext_character_check(text, &test_data, expected);
1028 }
1029 END_TEST
1030 
1031 /* Test that bad encodings are faulted */
1032 START_TEST(test_ext_entity_bad_encoding) {
1033   const char *text = "<!DOCTYPE doc [\n"
1034                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1035                      "]>\n"
1036                      "<doc>&en;</doc>";
1037   ExtFaults fault
1038       = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1039          XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1040 
1041   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1042   XML_SetUserData(g_parser, &fault);
1043   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1044                  "Bad encoding should not have been accepted");
1045 }
1046 END_TEST
1047 
1048 /* Try handing an invalid encoding to an external entity parser */
1049 START_TEST(test_ext_entity_bad_encoding_2) {
1050   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1051                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1052                      "<doc>&entity;</doc>";
1053   ExtFaults fault
1054       = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1055          XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1056 
1057   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1058   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1059   XML_SetUserData(g_parser, &fault);
1060   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1061                  "Bad encoding not faulted in external entity handler");
1062 }
1063 END_TEST
1064 
1065 /* Test that no error is reported for unknown entities if we don't
1066    read an external subset.  This was fixed in Expat 1.95.5.
1067 */
1068 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1069   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1070                      "<doc>&entity;</doc>";
1071 
1072   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1073       == XML_STATUS_ERROR)
1074     xml_failure(g_parser);
1075 }
1076 END_TEST
1077 
1078 /* Test that an error is reported for unknown entities if we don't
1079    have an external subset.
1080 */
1081 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1082   expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1083                  "Parser did not report undefined entity w/out a DTD.");
1084 }
1085 END_TEST
1086 
1087 /* Test that an error is reported for unknown entities if we don't
1088    read an external subset, but have been declared standalone.
1089 */
1090 START_TEST(test_wfc_undeclared_entity_standalone) {
1091   const char *text
1092       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1093         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1094         "<doc>&entity;</doc>";
1095 
1096   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1097                  "Parser did not report undefined entity (standalone).");
1098 }
1099 END_TEST
1100 
1101 /* Test that an error is reported for unknown entities if we have read
1102    an external subset, and standalone is true.
1103 */
1104 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1105   const char *text
1106       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1107         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1108         "<doc>&entity;</doc>";
1109   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1110 
1111   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1112   XML_SetUserData(g_parser, &test_data);
1113   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1114   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1115                  "Parser did not report undefined entity (external DTD).");
1116 }
1117 END_TEST
1118 
1119 /* Test that external entity handling is not done if the parsing flag
1120  * is set to UNLESS_STANDALONE
1121  */
1122 START_TEST(test_entity_with_external_subset_unless_standalone) {
1123   const char *text
1124       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1125         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1126         "<doc>&entity;</doc>";
1127   ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1128 
1129   XML_SetParamEntityParsing(g_parser,
1130                             XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1131   XML_SetUserData(g_parser, &test_data);
1132   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1133   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1134                  "Parser did not report undefined entity");
1135 }
1136 END_TEST
1137 
1138 /* Test that no error is reported for unknown entities if we have read
1139    an external subset, and standalone is false.
1140 */
1141 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1142   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1143                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1144                      "<doc>&entity;</doc>";
1145   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1146 
1147   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1148   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1149   run_ext_character_check(text, &test_data, XCS(""));
1150 }
1151 END_TEST
1152 
1153 /* Test that an error is reported if our NotStandalone handler fails */
1154 START_TEST(test_not_standalone_handler_reject) {
1155   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1156                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1157                      "<doc>&entity;</doc>";
1158   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1159 
1160   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1161   XML_SetUserData(g_parser, &test_data);
1162   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1163   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1164   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1165                  "NotStandalone handler failed to reject");
1166 
1167   /* Try again but without external entity handling */
1168   XML_ParserReset(g_parser, NULL);
1169   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1170   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1171                  "NotStandalone handler failed to reject");
1172 }
1173 END_TEST
1174 
1175 /* Test that no error is reported if our NotStandalone handler succeeds */
1176 START_TEST(test_not_standalone_handler_accept) {
1177   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1178                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1179                      "<doc>&entity;</doc>";
1180   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1181 
1182   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1183   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1184   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1185   run_ext_character_check(text, &test_data, XCS(""));
1186 
1187   /* Repeat without the external entity handler */
1188   XML_ParserReset(g_parser, NULL);
1189   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1190   run_character_check(text, XCS(""));
1191 }
1192 END_TEST
1193 
1194 START_TEST(test_wfc_no_recursive_entity_refs) {
1195   const char *text = "<!DOCTYPE doc [\n"
1196                      "  <!ENTITY entity '&#38;entity;'>\n"
1197                      "]>\n"
1198                      "<doc>&entity;</doc>";
1199 
1200   expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1201                  "Parser did not report recursive entity reference.");
1202 }
1203 END_TEST
1204 
1205 START_TEST(test_recursive_external_parameter_entity_2) {
1206   struct TestCase {
1207     const char *doc;
1208     enum XML_Status expectedStatus;
1209   };
1210 
1211   struct TestCase cases[] = {
1212       {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1213       {"<!ENTITY % p1 '%p1;'>"
1214        "<!ENTITY % p1 'first declaration wins'>",
1215        XML_STATUS_ERROR},
1216       {"<!ENTITY % p1 'first declaration wins'>"
1217        "<!ENTITY % p1 '%p1;'>",
1218        XML_STATUS_OK},
1219       {"<!ENTITY % p1 '&#37;p1;'>", XML_STATUS_OK},
1220   };
1221 
1222   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1223     const char *const doc = cases[i].doc;
1224     const enum XML_Status expectedStatus = cases[i].expectedStatus;
1225     set_subtest("%s", doc);
1226 
1227     XML_Parser parser = XML_ParserCreate(NULL);
1228     assert_true(parser != NULL);
1229 
1230     XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1231     assert_true(ext_parser != NULL);
1232 
1233     const enum XML_Status actualStatus
1234         = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1235 
1236     assert_true(actualStatus == expectedStatus);
1237     if (actualStatus != XML_STATUS_OK) {
1238       assert_true(XML_GetErrorCode(ext_parser)
1239                   == XML_ERROR_RECURSIVE_ENTITY_REF);
1240     }
1241 
1242     XML_ParserFree(ext_parser);
1243     XML_ParserFree(parser);
1244   }
1245 }
1246 END_TEST
1247 
1248 /* Test incomplete external entities are faulted */
1249 START_TEST(test_ext_entity_invalid_parse) {
1250   const char *text = "<!DOCTYPE doc [\n"
1251                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1252                      "]>\n"
1253                      "<doc>&en;</doc>";
1254   const ExtFaults faults[]
1255       = {{"<", "Incomplete element declaration not faulted", NULL,
1256           XML_ERROR_UNCLOSED_TOKEN},
1257          {"<\xe2\x82", /* First two bytes of a three-byte char */
1258           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1259          {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1260           XML_ERROR_PARTIAL_CHAR},
1261          {NULL, NULL, NULL, XML_ERROR_NONE}};
1262   const ExtFaults *fault = faults;
1263 
1264   for (; fault->parse_text != NULL; fault++) {
1265     set_subtest("\"%s\"", fault->parse_text);
1266     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1267     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1268     XML_SetUserData(g_parser, (void *)fault);
1269     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1270                    "Parser did not report external entity error");
1271     XML_ParserReset(g_parser, NULL);
1272   }
1273 }
1274 END_TEST
1275 
1276 /* Regression test for SF bug #483514. */
1277 START_TEST(test_dtd_default_handling) {
1278   const char *text = "<!DOCTYPE doc [\n"
1279                      "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1280                      "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1281                      "<!ELEMENT doc EMPTY>\n"
1282                      "<!ATTLIST doc a CDATA #IMPLIED>\n"
1283                      "<?pi in dtd?>\n"
1284                      "<!--comment in dtd-->\n"
1285                      "]><doc/>";
1286 
1287   XML_SetDefaultHandler(g_parser, accumulate_characters);
1288   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1289   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1290   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1291   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1292   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1293   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1294   XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1295   XML_SetCommentHandler(g_parser, dummy_comment_handler);
1296   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1297   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1298   run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1299 }
1300 END_TEST
1301 
1302 /* Test handling of attribute declarations */
1303 START_TEST(test_dtd_attr_handling) {
1304   const char *prolog = "<!DOCTYPE doc [\n"
1305                        "<!ELEMENT doc EMPTY>\n";
1306   AttTest attr_data[]
1307       = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1308           "]>"
1309           "<doc a='two'/>",
1310           XCS("doc"), XCS("a"),
1311           XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1312           NULL, XML_TRUE},
1313          {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1314           "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1315           "]>"
1316           "<doc/>",
1317           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1318          {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1319           "]>"
1320           "<doc/>",
1321           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1322          {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1323           "]>"
1324           "<doc/>",
1325           XCS("doc"), XCS("a"), XCS("CDATA"),
1326 #ifdef XML_UNICODE
1327           XCS("\x06f2"),
1328 #else
1329           XCS("\xdb\xb2"),
1330 #endif
1331           XML_FALSE},
1332          {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1333   AttTest *test;
1334 
1335   for (test = attr_data; test->definition != NULL; test++) {
1336     set_subtest("%s", test->definition);
1337     XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1338     XML_SetUserData(g_parser, test);
1339     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1340                                 XML_FALSE)
1341         == XML_STATUS_ERROR)
1342       xml_failure(g_parser);
1343     if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1344                                 (int)strlen(test->definition), XML_TRUE)
1345         == XML_STATUS_ERROR)
1346       xml_failure(g_parser);
1347     XML_ParserReset(g_parser, NULL);
1348   }
1349 }
1350 END_TEST
1351 
1352 /* See related SF bug #673791.
1353    When namespace processing is enabled, setting the namespace URI for
1354    a prefix is not allowed; this test ensures that it *is* allowed
1355    when namespace processing is not enabled.
1356    (See Namespaces in XML, section 2.)
1357 */
1358 START_TEST(test_empty_ns_without_namespaces) {
1359   const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1360                      "  <e xmlns:prefix=''/>\n"
1361                      "</doc>";
1362 
1363   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1364       == XML_STATUS_ERROR)
1365     xml_failure(g_parser);
1366 }
1367 END_TEST
1368 
1369 /* Regression test for SF bug #824420.
1370    Checks that an xmlns:prefix attribute set in an attribute's default
1371    value isn't misinterpreted.
1372 */
1373 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1374   const char *text = "<!DOCTYPE e:element [\n"
1375                      "  <!ATTLIST e:element\n"
1376                      "    xmlns:e CDATA 'http://example.org/'>\n"
1377                      "      ]>\n"
1378                      "<e:element/>";
1379 
1380   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1381       == XML_STATUS_ERROR)
1382     xml_failure(g_parser);
1383 }
1384 END_TEST
1385 
1386 /* Regression test for SF bug #1515266: missing check of stopped
1387    parser in doContext() 'for' loop. */
1388 START_TEST(test_stop_parser_between_char_data_calls) {
1389   /* The sample data must be big enough that there are two calls to
1390      the character data handler from within the inner "for" loop of
1391      the XML_TOK_DATA_CHARS case in doContent(), and the character
1392      handler must stop the parser and clear the character data
1393      handler.
1394   */
1395   const char *text = long_character_data_text;
1396 
1397   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1398   g_resumable = XML_FALSE;
1399   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1400       != XML_STATUS_ERROR)
1401     xml_failure(g_parser);
1402   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1403     xml_failure(g_parser);
1404 }
1405 END_TEST
1406 
1407 /* Regression test for SF bug #1515266: missing check of stopped
1408    parser in doContext() 'for' loop. */
1409 START_TEST(test_suspend_parser_between_char_data_calls) {
1410   /* The sample data must be big enough that there are two calls to
1411      the character data handler from within the inner "for" loop of
1412      the XML_TOK_DATA_CHARS case in doContent(), and the character
1413      handler must stop the parser and clear the character data
1414      handler.
1415   */
1416   const char *text = long_character_data_text;
1417 
1418   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1419   g_resumable = XML_TRUE;
1420   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1421       != XML_STATUS_SUSPENDED)
1422     xml_failure(g_parser);
1423   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1424     xml_failure(g_parser);
1425   /* Try parsing directly */
1426   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1427       != XML_STATUS_ERROR)
1428     fail("Attempt to continue parse while suspended not faulted");
1429   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1430     fail("Suspended parse not faulted with correct error");
1431 }
1432 END_TEST
1433 
1434 /* Test repeated calls to XML_StopParser are handled correctly */
1435 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1436   const char *text = long_character_data_text;
1437 
1438   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1439   g_resumable = XML_FALSE;
1440   g_abortable = XML_FALSE;
1441   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1442       != XML_STATUS_ERROR)
1443     fail("Failed to double-stop parser");
1444 
1445   XML_ParserReset(g_parser, NULL);
1446   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1447   g_resumable = XML_TRUE;
1448   g_abortable = XML_FALSE;
1449   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1450       != XML_STATUS_SUSPENDED)
1451     fail("Failed to double-suspend parser");
1452 
1453   XML_ParserReset(g_parser, NULL);
1454   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1455   g_resumable = XML_TRUE;
1456   g_abortable = XML_TRUE;
1457   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1458       != XML_STATUS_ERROR)
1459     fail("Failed to suspend-abort parser");
1460 }
1461 END_TEST
1462 
1463 START_TEST(test_good_cdata_ascii) {
1464   const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1465   const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1466 
1467   CharData storage;
1468   CharData_Init(&storage);
1469   XML_SetUserData(g_parser, &storage);
1470   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1471   /* Add start and end handlers for coverage */
1472   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1473   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1474 
1475   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1476       == XML_STATUS_ERROR)
1477     xml_failure(g_parser);
1478   CharData_CheckXMLChars(&storage, expected);
1479 
1480   /* Try again, this time with a default handler */
1481   XML_ParserReset(g_parser, NULL);
1482   CharData_Init(&storage);
1483   XML_SetUserData(g_parser, &storage);
1484   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1485   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1486 
1487   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1488       == XML_STATUS_ERROR)
1489     xml_failure(g_parser);
1490   CharData_CheckXMLChars(&storage, expected);
1491 }
1492 END_TEST
1493 
1494 START_TEST(test_good_cdata_utf16) {
1495   /* Test data is:
1496    *   <?xml version='1.0' encoding='utf-16'?>
1497    *   <a><![CDATA[hello]]></a>
1498    */
1499   const char text[]
1500       = "\0<\0?\0x\0m\0l\0"
1501         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1502         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1503         "1\0"
1504         "6\0'"
1505         "\0?\0>\0\n"
1506         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1507   const XML_Char *expected = XCS("hello");
1508 
1509   CharData storage;
1510   CharData_Init(&storage);
1511   XML_SetUserData(g_parser, &storage);
1512   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1513 
1514   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1515       == XML_STATUS_ERROR)
1516     xml_failure(g_parser);
1517   CharData_CheckXMLChars(&storage, expected);
1518 }
1519 END_TEST
1520 
1521 START_TEST(test_good_cdata_utf16_le) {
1522   /* Test data is:
1523    *   <?xml version='1.0' encoding='utf-16'?>
1524    *   <a><![CDATA[hello]]></a>
1525    */
1526   const char text[]
1527       = "<\0?\0x\0m\0l\0"
1528         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1529         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1530         "1\0"
1531         "6\0'"
1532         "\0?\0>\0\n"
1533         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1534   const XML_Char *expected = XCS("hello");
1535 
1536   CharData storage;
1537   CharData_Init(&storage);
1538   XML_SetUserData(g_parser, &storage);
1539   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1540 
1541   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1542       == XML_STATUS_ERROR)
1543     xml_failure(g_parser);
1544   CharData_CheckXMLChars(&storage, expected);
1545 }
1546 END_TEST
1547 
1548 /* Test UTF16 conversion of a long cdata string */
1549 
1550 /* 16 characters: handy macro to reduce visual clutter */
1551 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1552 
1553 START_TEST(test_long_cdata_utf16) {
1554   /* Test data is:
1555    * <?xlm version='1.0' encoding='utf-16'?>
1556    * <a><![CDATA[
1557    * ABCDEFGHIJKLMNOP
1558    * ]]></a>
1559    */
1560   const char text[]
1561       = "\0<\0?\0x\0m\0l\0 "
1562         "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1563         "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1564         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1565       /* 64 characters per line */
1566       /* clang-format off */
1567         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1568         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1569         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1570         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1571         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1572         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1573         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1574         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1575         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1576         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1577         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1578         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1579         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1580         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1581         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1582         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1583         A_TO_P_IN_UTF16
1584         /* clang-format on */
1585         "\0]\0]\0>\0<\0/\0a\0>";
1586   const XML_Char *expected =
1587       /* clang-format off */
1588         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1589         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1590         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1591         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1592         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1593         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1594         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1595         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1596         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1597         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1598         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1599         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1600         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1601         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1602         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1603         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1604         XCS("ABCDEFGHIJKLMNOP");
1605   /* clang-format on */
1606   CharData storage;
1607   void *buffer;
1608 
1609   CharData_Init(&storage);
1610   XML_SetUserData(g_parser, &storage);
1611   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1612   buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1613   if (buffer == NULL)
1614     fail("Could not allocate parse buffer");
1615   assert(buffer != NULL);
1616   memcpy(buffer, text, sizeof(text) - 1);
1617   if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1618     xml_failure(g_parser);
1619   CharData_CheckXMLChars(&storage, expected);
1620 }
1621 END_TEST
1622 
1623 /* Test handling of multiple unit UTF-16 characters */
1624 START_TEST(test_multichar_cdata_utf16) {
1625   /* Test data is:
1626    *   <?xml version='1.0' encoding='utf-16'?>
1627    *   <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1628    *
1629    * where {MINIM} is U+1d15e (a minim or half-note)
1630    *   UTF-16: 0xd834 0xdd5e
1631    *   UTF-8:  0xf0 0x9d 0x85 0x9e
1632    * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1633    *   UTF-16: 0xd834 0xdd5f
1634    *   UTF-8:  0xf0 0x9d 0x85 0x9f
1635    */
1636   const char text[] = "\0<\0?\0x\0m\0l\0"
1637                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1638                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1639                       "1\0"
1640                       "6\0'"
1641                       "\0?\0>\0\n"
1642                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1643                       "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1644                       "\0]\0]\0>\0<\0/\0a\0>";
1645 #ifdef XML_UNICODE
1646   const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1647 #else
1648   const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1649 #endif
1650   CharData storage;
1651 
1652   CharData_Init(&storage);
1653   XML_SetUserData(g_parser, &storage);
1654   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1655 
1656   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1657       == XML_STATUS_ERROR)
1658     xml_failure(g_parser);
1659   CharData_CheckXMLChars(&storage, expected);
1660 }
1661 END_TEST
1662 
1663 /* Test that an element name with a UTF-16 surrogate pair is rejected */
1664 START_TEST(test_utf16_bad_surrogate_pair) {
1665   /* Test data is:
1666    *   <?xml version='1.0' encoding='utf-16'?>
1667    *   <a><![CDATA[{BADLINB}]]></a>
1668    *
1669    * where {BADLINB} is U+10000 (the first Linear B character)
1670    * with the UTF-16 surrogate pair in the wrong order, i.e.
1671    *   0xdc00 0xd800
1672    */
1673   const char text[] = "\0<\0?\0x\0m\0l\0"
1674                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1675                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1676                       "1\0"
1677                       "6\0'"
1678                       "\0?\0>\0\n"
1679                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1680                       "\xdc\x00\xd8\x00"
1681                       "\0]\0]\0>\0<\0/\0a\0>";
1682 
1683   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1684       != XML_STATUS_ERROR)
1685     fail("Reversed UTF-16 surrogate pair not faulted");
1686   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1687     xml_failure(g_parser);
1688 }
1689 END_TEST
1690 
1691 START_TEST(test_bad_cdata) {
1692   struct CaseData {
1693     const char *text;
1694     enum XML_Error expectedError;
1695   };
1696 
1697   struct CaseData cases[]
1698       = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1699          {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1700          {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1701          {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1702          {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1703          {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1704          {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1705          {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1706 
1707          {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1708          {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1709          {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1710 
1711          {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1712          {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN},  /* ?! */
1713          {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1714          {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1715          {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1716          {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1717          {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1718 
1719          {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1720          {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1721          {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1722 
1723   size_t i = 0;
1724   for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1725     set_subtest("%s", cases[i].text);
1726     const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1727         g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1728     const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1729 
1730     assert(actualStatus == XML_STATUS_ERROR);
1731 
1732     if (actualError != cases[i].expectedError) {
1733       char message[100];
1734       snprintf(message, sizeof(message),
1735                "Expected error %d but got error %d for case %u: \"%s\"\n",
1736                cases[i].expectedError, actualError, (unsigned int)i + 1,
1737                cases[i].text);
1738       fail(message);
1739     }
1740 
1741     XML_ParserReset(g_parser, NULL);
1742   }
1743 }
1744 END_TEST
1745 
1746 /* Test failures in UTF-16 CDATA */
1747 START_TEST(test_bad_cdata_utf16) {
1748   struct CaseData {
1749     size_t text_bytes;
1750     const char *text;
1751     enum XML_Error expected_error;
1752   };
1753 
1754   const char prolog[] = "\0<\0?\0x\0m\0l\0"
1755                         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1756                         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1757                         "1\0"
1758                         "6\0'"
1759                         "\0?\0>\0\n"
1760                         "\0<\0a\0>";
1761   struct CaseData cases[] = {
1762       {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1763       {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1764       {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1765       {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1766       {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1767       {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1768       {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1769       {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1770       {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1771       {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1772       {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1773       {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1774       {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1775       {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1776       {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1777       {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1778       {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1779       {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1780       {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1781       {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1782       /* Now add a four-byte UTF-16 character */
1783       {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1784        XML_ERROR_UNCLOSED_CDATA_SECTION},
1785       {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1786       {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1787        XML_ERROR_PARTIAL_CHAR},
1788       {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1789        XML_ERROR_UNCLOSED_CDATA_SECTION}};
1790   size_t i;
1791 
1792   for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1793     set_subtest("case %lu", (long unsigned)(i + 1));
1794     enum XML_Status actual_status;
1795     enum XML_Error actual_error;
1796 
1797     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1798                                 XML_FALSE)
1799         == XML_STATUS_ERROR)
1800       xml_failure(g_parser);
1801     actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1802                                             (int)cases[i].text_bytes, XML_TRUE);
1803     assert(actual_status == XML_STATUS_ERROR);
1804     actual_error = XML_GetErrorCode(g_parser);
1805     if (actual_error != cases[i].expected_error) {
1806       char message[1024];
1807 
1808       snprintf(message, sizeof(message),
1809                "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1810                ") for case %lu\n",
1811                cases[i].expected_error,
1812                XML_ErrorString(cases[i].expected_error), actual_error,
1813                XML_ErrorString(actual_error), (long unsigned)(i + 1));
1814       fail(message);
1815     }
1816     XML_ParserReset(g_parser, NULL);
1817   }
1818 }
1819 END_TEST
1820 
1821 /* Test stopping the parser in cdata handler */
1822 START_TEST(test_stop_parser_between_cdata_calls) {
1823   const char *text = long_cdata_text;
1824 
1825   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1826   g_resumable = XML_FALSE;
1827   expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1828 }
1829 END_TEST
1830 
1831 /* Test suspending the parser in cdata handler */
1832 START_TEST(test_suspend_parser_between_cdata_calls) {
1833   const char *text = long_cdata_text;
1834   enum XML_Status result;
1835 
1836   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1837   g_resumable = XML_TRUE;
1838   result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
1839   if (result != XML_STATUS_SUSPENDED) {
1840     if (result == XML_STATUS_ERROR)
1841       xml_failure(g_parser);
1842     fail("Parse not suspended in CDATA handler");
1843   }
1844   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1845     xml_failure(g_parser);
1846 }
1847 END_TEST
1848 
1849 /* Test memory allocation functions */
1850 START_TEST(test_memory_allocation) {
1851   char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1852   char *p;
1853 
1854   if (buffer == NULL) {
1855     fail("Allocation failed");
1856   } else {
1857     /* Try writing to memory; some OSes try to cheat! */
1858     buffer[0] = 'T';
1859     buffer[1] = 'E';
1860     buffer[2] = 'S';
1861     buffer[3] = 'T';
1862     buffer[4] = '\0';
1863     if (strcmp(buffer, "TEST") != 0) {
1864       fail("Memory not writable");
1865     } else {
1866       p = (char *)XML_MemRealloc(g_parser, buffer, 512);
1867       if (p == NULL) {
1868         fail("Reallocation failed");
1869       } else {
1870         /* Write again, just to be sure */
1871         buffer = p;
1872         buffer[0] = 'V';
1873         if (strcmp(buffer, "VEST") != 0) {
1874           fail("Reallocated memory not writable");
1875         }
1876       }
1877     }
1878     XML_MemFree(g_parser, buffer);
1879   }
1880 }
1881 END_TEST
1882 
1883 /* Test XML_DefaultCurrent() passes handling on correctly */
1884 START_TEST(test_default_current) {
1885   const char *text = "<doc>hell]</doc>";
1886   const char *entity_text = "<!DOCTYPE doc [\n"
1887                             "<!ENTITY entity '&#37;'>\n"
1888                             "]>\n"
1889                             "<doc>&entity;</doc>";
1890 
1891   set_subtest("with defaulting");
1892   {
1893     struct handler_record_list storage;
1894     storage.count = 0;
1895     XML_SetDefaultHandler(g_parser, record_default_handler);
1896     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1897     XML_SetUserData(g_parser, &storage);
1898     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1899         == XML_STATUS_ERROR)
1900       xml_failure(g_parser);
1901     int i = 0;
1902     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1903     // we should have gotten one or more cdata callbacks, totaling 5 chars
1904     int cdata_len_remaining = 5;
1905     while (cdata_len_remaining > 0) {
1906       const struct handler_record_entry *c_entry
1907           = handler_record_get(&storage, i++);
1908       assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
1909       assert_true(c_entry->arg > 0);
1910       assert_true(c_entry->arg <= cdata_len_remaining);
1911       cdata_len_remaining -= c_entry->arg;
1912       // default handler must follow, with the exact same len argument.
1913       assert_record_handler_called(&storage, i++, "record_default_handler",
1914                                    c_entry->arg);
1915     }
1916     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1917     assert_true(storage.count == i);
1918   }
1919 
1920   /* Again, without the defaulting */
1921   set_subtest("no defaulting");
1922   {
1923     struct handler_record_list storage;
1924     storage.count = 0;
1925     XML_ParserReset(g_parser, NULL);
1926     XML_SetDefaultHandler(g_parser, record_default_handler);
1927     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
1928     XML_SetUserData(g_parser, &storage);
1929     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1930         == XML_STATUS_ERROR)
1931       xml_failure(g_parser);
1932     int i = 0;
1933     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1934     // we should have gotten one or more cdata callbacks, totaling 5 chars
1935     int cdata_len_remaining = 5;
1936     while (cdata_len_remaining > 0) {
1937       const struct handler_record_entry *c_entry
1938           = handler_record_get(&storage, i++);
1939       assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
1940       assert_true(c_entry->arg > 0);
1941       assert_true(c_entry->arg <= cdata_len_remaining);
1942       cdata_len_remaining -= c_entry->arg;
1943     }
1944     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1945     assert_true(storage.count == i);
1946   }
1947 
1948   /* Now with an internal entity to complicate matters */
1949   set_subtest("with internal entity");
1950   {
1951     struct handler_record_list storage;
1952     storage.count = 0;
1953     XML_ParserReset(g_parser, NULL);
1954     XML_SetDefaultHandler(g_parser, record_default_handler);
1955     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1956     XML_SetUserData(g_parser, &storage);
1957     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1958                                 XML_TRUE)
1959         == XML_STATUS_ERROR)
1960       xml_failure(g_parser);
1961     /* The default handler suppresses the entity */
1962     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1963     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1964     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1965     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1966     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1967     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1968     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1969     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1970     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1971     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1972     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1973     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1974     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1975     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1976     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1977     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1978     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1979     assert_record_handler_called(&storage, 17, "record_default_handler", 8);
1980     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1981     assert_true(storage.count == 19);
1982   }
1983 
1984   /* Again, with a skip handler */
1985   set_subtest("with skip handler");
1986   {
1987     struct handler_record_list storage;
1988     storage.count = 0;
1989     XML_ParserReset(g_parser, NULL);
1990     XML_SetDefaultHandler(g_parser, record_default_handler);
1991     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1992     XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
1993     XML_SetUserData(g_parser, &storage);
1994     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1995                                 XML_TRUE)
1996         == XML_STATUS_ERROR)
1997       xml_failure(g_parser);
1998     /* The default handler suppresses the entity */
1999     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2000     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2001     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2002     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2003     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2004     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2005     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2006     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2007     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2008     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2009     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2010     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2011     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2012     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2013     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2014     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2015     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2016     assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2017     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2018     assert_true(storage.count == 19);
2019   }
2020 
2021   /* This time, allow the entity through */
2022   set_subtest("allow entity");
2023   {
2024     struct handler_record_list storage;
2025     storage.count = 0;
2026     XML_ParserReset(g_parser, NULL);
2027     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2028     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2029     XML_SetUserData(g_parser, &storage);
2030     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2031                                 XML_TRUE)
2032         == XML_STATUS_ERROR)
2033       xml_failure(g_parser);
2034     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2035     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2036     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2037     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2038     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2039     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2040     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2041     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2042     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2043     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2044     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2045     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2046     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2047     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2048     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2049     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2050     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2051     assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2052     assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2053     assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2054     assert_true(storage.count == 20);
2055   }
2056 
2057   /* Finally, without passing the cdata to the default handler */
2058   set_subtest("not passing cdata");
2059   {
2060     struct handler_record_list storage;
2061     storage.count = 0;
2062     XML_ParserReset(g_parser, NULL);
2063     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2064     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2065     XML_SetUserData(g_parser, &storage);
2066     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2067                                 XML_TRUE)
2068         == XML_STATUS_ERROR)
2069       xml_failure(g_parser);
2070     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2071     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2072     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2073     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2074     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2075     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2076     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2077     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2078     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2079     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2080     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2081     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2082     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2083     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2084     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2085     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2086     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2087     assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2088                                  1);
2089     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2090     assert_true(storage.count == 19);
2091   }
2092 }
2093 END_TEST
2094 
2095 /* Test DTD element parsing code paths */
2096 START_TEST(test_dtd_elements) {
2097   const char *text = "<!DOCTYPE doc [\n"
2098                      "<!ELEMENT doc (chapter)>\n"
2099                      "<!ELEMENT chapter (#PCDATA)>\n"
2100                      "]>\n"
2101                      "<doc><chapter>Wombats are go</chapter></doc>";
2102 
2103   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2104   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2105       == XML_STATUS_ERROR)
2106     xml_failure(g_parser);
2107 }
2108 END_TEST
2109 
2110 static void XMLCALL
2111 element_decl_check_model(void *userData, const XML_Char *name,
2112                          XML_Content *model) {
2113   UNUSED_P(userData);
2114   uint32_t errorFlags = 0;
2115 
2116   /* Expected model array structure is this:
2117    * [0] (type 6, quant 0)
2118    *   [1] (type 5, quant 0)
2119    *     [3] (type 4, quant 0, name "bar")
2120    *     [4] (type 4, quant 0, name "foo")
2121    *     [5] (type 4, quant 3, name "xyz")
2122    *   [2] (type 4, quant 2, name "zebra")
2123    */
2124   errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2125   errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2126 
2127   if (model != NULL) {
2128     errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2129     errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2130     errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2131     errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2132     errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2133 
2134     errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2135     errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2136     errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2137     errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2138     errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2139 
2140     errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2141     errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2142     errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2143     errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2144     errorFlags
2145         |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2146 
2147     errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2148     errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2149     errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2150     errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2151     errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2152 
2153     errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2154     errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2155     errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2156     errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2157     errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2158 
2159     errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2160     errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2161     errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2162     errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2163     errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2164   }
2165 
2166   XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2167   XML_FreeContentModel(g_parser, model);
2168 }
2169 
2170 START_TEST(test_dtd_elements_nesting) {
2171   // Payload inspired by a test in Perl's XML::Parser
2172   const char *text = "<!DOCTYPE foo [\n"
2173                      "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2174                      "]>\n"
2175                      "<foo/>";
2176 
2177   XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2178 
2179   XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2180   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2181       == XML_STATUS_ERROR)
2182     xml_failure(g_parser);
2183 
2184   if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2185     fail("Element declaration model regression detected");
2186 }
2187 END_TEST
2188 
2189 /* Test foreign DTD handling */
2190 START_TEST(test_set_foreign_dtd) {
2191   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2192   const char *text2 = "<doc>&entity;</doc>";
2193   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2194 
2195   /* Check hash salt is passed through too */
2196   XML_SetHashSalt(g_parser, 0x12345678);
2197   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2198   XML_SetUserData(g_parser, &test_data);
2199   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2200   /* Add a default handler to exercise more code paths */
2201   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2202   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2203     fail("Could not set foreign DTD");
2204   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2205       == XML_STATUS_ERROR)
2206     xml_failure(g_parser);
2207 
2208   /* Ensure that trying to set the DTD after parsing has started
2209    * is faulted, even if it's the same setting.
2210    */
2211   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2212       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2213     fail("Failed to reject late foreign DTD setting");
2214   /* Ditto for the hash salt */
2215   if (XML_SetHashSalt(g_parser, 0x23456789))
2216     fail("Failed to reject late hash salt change");
2217 
2218   /* Now finish the parse */
2219   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2220       == XML_STATUS_ERROR)
2221     xml_failure(g_parser);
2222 }
2223 END_TEST
2224 
2225 /* Test foreign DTD handling with a failing NotStandalone handler */
2226 START_TEST(test_foreign_dtd_not_standalone) {
2227   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2228                      "<doc>&entity;</doc>";
2229   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2230 
2231   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2232   XML_SetUserData(g_parser, &test_data);
2233   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2234   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2235   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2236     fail("Could not set foreign DTD");
2237   expect_failure(text, XML_ERROR_NOT_STANDALONE,
2238                  "NotStandalonehandler failed to reject");
2239 }
2240 END_TEST
2241 
2242 /* Test invalid character in a foreign DTD is faulted */
2243 START_TEST(test_invalid_foreign_dtd) {
2244   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2245                      "<doc>&entity;</doc>";
2246   ExtFaults test_data
2247       = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2248 
2249   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2250   XML_SetUserData(g_parser, &test_data);
2251   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2252   XML_UseForeignDTD(g_parser, XML_TRUE);
2253   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2254                  "Bad DTD should not have been accepted");
2255 }
2256 END_TEST
2257 
2258 /* Test foreign DTD use with a doctype */
2259 START_TEST(test_foreign_dtd_with_doctype) {
2260   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2261                       "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2262   const char *text2 = "<doc>&entity;</doc>";
2263   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2264 
2265   /* Check hash salt is passed through too */
2266   XML_SetHashSalt(g_parser, 0x12345678);
2267   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2268   XML_SetUserData(g_parser, &test_data);
2269   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2270   /* Add a default handler to exercise more code paths */
2271   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2272   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2273     fail("Could not set foreign DTD");
2274   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2275       == XML_STATUS_ERROR)
2276     xml_failure(g_parser);
2277 
2278   /* Ensure that trying to set the DTD after parsing has started
2279    * is faulted, even if it's the same setting.
2280    */
2281   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2282       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2283     fail("Failed to reject late foreign DTD setting");
2284   /* Ditto for the hash salt */
2285   if (XML_SetHashSalt(g_parser, 0x23456789))
2286     fail("Failed to reject late hash salt change");
2287 
2288   /* Now finish the parse */
2289   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2290       == XML_STATUS_ERROR)
2291     xml_failure(g_parser);
2292 }
2293 END_TEST
2294 
2295 /* Test XML_UseForeignDTD with no external subset present */
2296 START_TEST(test_foreign_dtd_without_external_subset) {
2297   const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2298                      "<doc>&foo;</doc>";
2299 
2300   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2301   XML_SetUserData(g_parser, NULL);
2302   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2303   XML_UseForeignDTD(g_parser, XML_TRUE);
2304   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2305       == XML_STATUS_ERROR)
2306     xml_failure(g_parser);
2307 }
2308 END_TEST
2309 
2310 START_TEST(test_empty_foreign_dtd) {
2311   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2312                      "<doc>&entity;</doc>";
2313 
2314   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2315   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2316   XML_UseForeignDTD(g_parser, XML_TRUE);
2317   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2318                  "Undefined entity not faulted");
2319 }
2320 END_TEST
2321 
2322 /* Test XML Base is set and unset appropriately */
2323 START_TEST(test_set_base) {
2324   const XML_Char *old_base;
2325   const XML_Char *new_base = XCS("/local/file/name.xml");
2326 
2327   old_base = XML_GetBase(g_parser);
2328   if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2329     fail("Unable to set base");
2330   if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2331     fail("Base setting not correct");
2332   if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2333     fail("Unable to NULL base");
2334   if (XML_GetBase(g_parser) != NULL)
2335     fail("Base setting not nulled");
2336   XML_SetBase(g_parser, old_base);
2337 }
2338 END_TEST
2339 
2340 /* Test attribute counts, indexing, etc */
2341 START_TEST(test_attributes) {
2342   const char *text = "<!DOCTYPE doc [\n"
2343                      "<!ELEMENT doc (tag)>\n"
2344                      "<!ATTLIST doc id ID #REQUIRED>\n"
2345                      "]>"
2346                      "<doc a='1' id='one' b='2'>"
2347                      "<tag c='3'/>"
2348                      "</doc>";
2349   AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2350                          {XCS("b"), XCS("2")},
2351                          {XCS("id"), XCS("one")},
2352                          {NULL, NULL}};
2353   AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2354   ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2355                         {XCS("tag"), 1, NULL, NULL},
2356                         {NULL, 0, NULL, NULL}};
2357   info[0].attributes = doc_info;
2358   info[1].attributes = tag_info;
2359 
2360   XML_Parser parser = XML_ParserCreate(NULL);
2361   assert_true(parser != NULL);
2362   ParserAndElementInfo parserAndElementInfos = {
2363       parser,
2364       info,
2365   };
2366 
2367   XML_SetStartElementHandler(parser, counting_start_element_handler);
2368   XML_SetUserData(parser, &parserAndElementInfos);
2369   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2370       == XML_STATUS_ERROR)
2371     xml_failure(parser);
2372 
2373   XML_ParserFree(parser);
2374 }
2375 END_TEST
2376 
2377 /* Test reset works correctly in the middle of processing an internal
2378  * entity.  Exercises some obscure code in XML_ParserReset().
2379  */
2380 START_TEST(test_reset_in_entity) {
2381   const char *text = "<!DOCTYPE doc [\n"
2382                      "<!ENTITY wombat 'wom'>\n"
2383                      "<!ENTITY entity 'hi &wom; there'>\n"
2384                      "]>\n"
2385                      "<doc>&entity;</doc>";
2386   XML_ParsingStatus status;
2387 
2388   g_resumable = XML_TRUE;
2389   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2390   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2391       == XML_STATUS_ERROR)
2392     xml_failure(g_parser);
2393   XML_GetParsingStatus(g_parser, &status);
2394   if (status.parsing != XML_SUSPENDED)
2395     fail("Parsing status not SUSPENDED");
2396   XML_ParserReset(g_parser, NULL);
2397   XML_GetParsingStatus(g_parser, &status);
2398   if (status.parsing != XML_INITIALIZED)
2399     fail("Parsing status doesn't reset to INITIALIZED");
2400 }
2401 END_TEST
2402 
2403 /* Test that resume correctly passes through parse errors */
2404 START_TEST(test_resume_invalid_parse) {
2405   const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2406 
2407   g_resumable = XML_TRUE;
2408   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2409   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2410       == XML_STATUS_ERROR)
2411     xml_failure(g_parser);
2412   if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2413     fail("Resumed invalid parse not faulted");
2414   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2415     fail("Invalid parse not correctly faulted");
2416 }
2417 END_TEST
2418 
2419 /* Test that re-suspended parses are correctly passed through */
2420 START_TEST(test_resume_resuspended) {
2421   const char *text = "<doc>Hello<meep/>world</doc>";
2422 
2423   g_resumable = XML_TRUE;
2424   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2425   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2426       == XML_STATUS_ERROR)
2427     xml_failure(g_parser);
2428   g_resumable = XML_TRUE;
2429   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2430   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2431     fail("Resumption not suspended");
2432   /* This one should succeed and finish up */
2433   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2434     xml_failure(g_parser);
2435 }
2436 END_TEST
2437 
2438 /* Test that CDATA shows up correctly through a default handler */
2439 START_TEST(test_cdata_default) {
2440   const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2441   const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2442   CharData storage;
2443 
2444   CharData_Init(&storage);
2445   XML_SetUserData(g_parser, &storage);
2446   XML_SetDefaultHandler(g_parser, accumulate_characters);
2447 
2448   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2449       == XML_STATUS_ERROR)
2450     xml_failure(g_parser);
2451   CharData_CheckXMLChars(&storage, expected);
2452 }
2453 END_TEST
2454 
2455 /* Test resetting a subordinate parser does exactly nothing */
2456 START_TEST(test_subordinate_reset) {
2457   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2458                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2459                      "<doc>&entity;</doc>";
2460 
2461   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2462   XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2463   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2464       == XML_STATUS_ERROR)
2465     xml_failure(g_parser);
2466 }
2467 END_TEST
2468 
2469 /* Test suspending a subordinate parser */
2470 START_TEST(test_subordinate_suspend) {
2471   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2472                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2473                      "<doc>&entity;</doc>";
2474 
2475   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2476   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2477   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2478       == XML_STATUS_ERROR)
2479     xml_failure(g_parser);
2480 }
2481 END_TEST
2482 
2483 /* Test suspending a subordinate parser from an XML declaration */
2484 /* Increases code coverage of the tests */
2485 
2486 START_TEST(test_subordinate_xdecl_suspend) {
2487   const char *text
2488       = "<!DOCTYPE doc [\n"
2489         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2490         "]>\n"
2491         "<doc>&entity;</doc>";
2492 
2493   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2494   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2495   g_resumable = XML_TRUE;
2496   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2497       == XML_STATUS_ERROR)
2498     xml_failure(g_parser);
2499 }
2500 END_TEST
2501 
2502 START_TEST(test_subordinate_xdecl_abort) {
2503   const char *text
2504       = "<!DOCTYPE doc [\n"
2505         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2506         "]>\n"
2507         "<doc>&entity;</doc>";
2508 
2509   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2510   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2511   g_resumable = XML_FALSE;
2512   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2513       == XML_STATUS_ERROR)
2514     xml_failure(g_parser);
2515 }
2516 END_TEST
2517 
2518 /* Test external entity fault handling with suspension */
2519 START_TEST(test_ext_entity_invalid_suspended_parse) {
2520   const char *text = "<!DOCTYPE doc [\n"
2521                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2522                      "]>\n"
2523                      "<doc>&en;</doc>";
2524   ExtFaults faults[]
2525       = {{"<?xml version='1.0' encoding='us-ascii'?><",
2526           "Incomplete element declaration not faulted", NULL,
2527           XML_ERROR_UNCLOSED_TOKEN},
2528          {/* First two bytes of a three-byte char */
2529           "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2530           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2531          {NULL, NULL, NULL, XML_ERROR_NONE}};
2532   ExtFaults *fault;
2533 
2534   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2535     set_subtest("%s", fault->parse_text);
2536     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2537     XML_SetExternalEntityRefHandler(g_parser,
2538                                     external_entity_suspending_faulter);
2539     XML_SetUserData(g_parser, fault);
2540     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2541                    "Parser did not report external entity error");
2542     XML_ParserReset(g_parser, NULL);
2543   }
2544 }
2545 END_TEST
2546 
2547 /* Test setting an explicit encoding */
2548 START_TEST(test_explicit_encoding) {
2549   const char *text1 = "<doc>Hello ";
2550   const char *text2 = " World</doc>";
2551 
2552   /* Just check that we can set the encoding to NULL before starting */
2553   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2554     fail("Failed to initialise encoding to NULL");
2555   /* Say we are UTF-8 */
2556   if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2557     fail("Failed to set explicit encoding");
2558   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2559       == XML_STATUS_ERROR)
2560     xml_failure(g_parser);
2561   /* Try to switch encodings mid-parse */
2562   if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2563     fail("Allowed encoding change");
2564   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2565       == XML_STATUS_ERROR)
2566     xml_failure(g_parser);
2567   /* Try now the parse is over */
2568   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2569     fail("Failed to unset encoding");
2570 }
2571 END_TEST
2572 
2573 /* Test handling of trailing CR (rather than newline) */
2574 START_TEST(test_trailing_cr) {
2575   const char *text = "<doc>\r";
2576   int found_cr;
2577 
2578   /* Try with a character handler, for code coverage */
2579   XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2580   XML_SetUserData(g_parser, &found_cr);
2581   found_cr = 0;
2582   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2583       == XML_STATUS_OK)
2584     fail("Failed to fault unclosed doc");
2585   if (found_cr == 0)
2586     fail("Did not catch the carriage return");
2587   XML_ParserReset(g_parser, NULL);
2588 
2589   /* Now with a default handler instead */
2590   XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2591   XML_SetUserData(g_parser, &found_cr);
2592   found_cr = 0;
2593   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2594       == XML_STATUS_OK)
2595     fail("Failed to fault unclosed doc");
2596   if (found_cr == 0)
2597     fail("Did not catch default carriage return");
2598 }
2599 END_TEST
2600 
2601 /* Test trailing CR in an external entity parse */
2602 START_TEST(test_ext_entity_trailing_cr) {
2603   const char *text = "<!DOCTYPE doc [\n"
2604                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2605                      "]>\n"
2606                      "<doc>&en;</doc>";
2607   int found_cr;
2608 
2609   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2610   XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2611   XML_SetUserData(g_parser, &found_cr);
2612   found_cr = 0;
2613   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2614       != XML_STATUS_OK)
2615     xml_failure(g_parser);
2616   if (found_cr == 0)
2617     fail("No carriage return found");
2618   XML_ParserReset(g_parser, NULL);
2619 
2620   /* Try again with a different trailing CR */
2621   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2622   XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2623   XML_SetUserData(g_parser, &found_cr);
2624   found_cr = 0;
2625   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2626       != XML_STATUS_OK)
2627     xml_failure(g_parser);
2628   if (found_cr == 0)
2629     fail("No carriage return found");
2630 }
2631 END_TEST
2632 
2633 /* Test handling of trailing square bracket */
2634 START_TEST(test_trailing_rsqb) {
2635   const char *text8 = "<doc>]";
2636   const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2637   int found_rsqb;
2638   int text8_len = (int)strlen(text8);
2639 
2640   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2641   XML_SetUserData(g_parser, &found_rsqb);
2642   found_rsqb = 0;
2643   if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2644       == XML_STATUS_OK)
2645     fail("Failed to fault unclosed doc");
2646   if (found_rsqb == 0)
2647     fail("Did not catch the right square bracket");
2648 
2649   /* Try again with a different encoding */
2650   XML_ParserReset(g_parser, NULL);
2651   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2652   XML_SetUserData(g_parser, &found_rsqb);
2653   found_rsqb = 0;
2654   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2655                               XML_TRUE)
2656       == XML_STATUS_OK)
2657     fail("Failed to fault unclosed doc");
2658   if (found_rsqb == 0)
2659     fail("Did not catch the right square bracket");
2660 
2661   /* And finally with a default handler */
2662   XML_ParserReset(g_parser, NULL);
2663   XML_SetDefaultHandler(g_parser, rsqb_handler);
2664   XML_SetUserData(g_parser, &found_rsqb);
2665   found_rsqb = 0;
2666   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2667                               XML_TRUE)
2668       == XML_STATUS_OK)
2669     fail("Failed to fault unclosed doc");
2670   if (found_rsqb == 0)
2671     fail("Did not catch the right square bracket");
2672 }
2673 END_TEST
2674 
2675 /* Test trailing right square bracket in an external entity parse */
2676 START_TEST(test_ext_entity_trailing_rsqb) {
2677   const char *text = "<!DOCTYPE doc [\n"
2678                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2679                      "]>\n"
2680                      "<doc>&en;</doc>";
2681   int found_rsqb;
2682 
2683   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2684   XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2685   XML_SetUserData(g_parser, &found_rsqb);
2686   found_rsqb = 0;
2687   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2688       != XML_STATUS_OK)
2689     xml_failure(g_parser);
2690   if (found_rsqb == 0)
2691     fail("No right square bracket found");
2692 }
2693 END_TEST
2694 
2695 /* Test CDATA handling in an external entity */
2696 START_TEST(test_ext_entity_good_cdata) {
2697   const char *text = "<!DOCTYPE doc [\n"
2698                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2699                      "]>\n"
2700                      "<doc>&en;</doc>";
2701 
2702   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2703   XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2704   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2705       != XML_STATUS_OK)
2706     xml_failure(g_parser);
2707 }
2708 END_TEST
2709 
2710 /* Test user parameter settings */
2711 START_TEST(test_user_parameters) {
2712   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2713                      "<!-- Primary parse -->\n"
2714                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2715                      "<doc>&entity;";
2716   const char *epilog = "<!-- Back to primary parser -->\n"
2717                        "</doc>";
2718 
2719   g_comment_count = 0;
2720   g_skip_count = 0;
2721   g_xdecl_count = 0;
2722   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2723   XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2724   XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2725   XML_SetCommentHandler(g_parser, data_check_comment_handler);
2726   XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2727   XML_UseParserAsHandlerArg(g_parser);
2728   XML_SetUserData(g_parser, (void *)1);
2729   g_handler_data = g_parser;
2730   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2731       == XML_STATUS_ERROR)
2732     xml_failure(g_parser);
2733   /* Ensure we can't change policy mid-parse */
2734   if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2735     fail("Changed param entity parsing policy while parsing");
2736   if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2737       == XML_STATUS_ERROR)
2738     xml_failure(g_parser);
2739   if (g_comment_count != 3)
2740     fail("Comment handler not invoked enough times");
2741   if (g_skip_count != 1)
2742     fail("Skip handler not invoked enough times");
2743   if (g_xdecl_count != 1)
2744     fail("XML declaration handler not invoked");
2745 }
2746 END_TEST
2747 
2748 /* Test that an explicit external entity handler argument replaces
2749  * the parser as the first argument.
2750  *
2751  * We do not call the first parameter to the external entity handler
2752  * 'parser' for once, since the first time the handler is called it
2753  * will actually be a text string.  We need to be able to access the
2754  * global 'parser' variable to create our external entity parser from,
2755  * since there are code paths we need to ensure get executed.
2756  */
2757 START_TEST(test_ext_entity_ref_parameter) {
2758   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2759                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2760                      "<doc>&entity;</doc>";
2761 
2762   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2763   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2764   /* Set a handler arg that is not NULL and not parser (which is
2765    * what NULL would cause to be passed.
2766    */
2767   XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2768   g_handler_data = text;
2769   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2770       == XML_STATUS_ERROR)
2771     xml_failure(g_parser);
2772 
2773   /* Now try again with unset args */
2774   XML_ParserReset(g_parser, NULL);
2775   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2776   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2777   XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2778   g_handler_data = g_parser;
2779   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2780       == XML_STATUS_ERROR)
2781     xml_failure(g_parser);
2782 }
2783 END_TEST
2784 
2785 /* Test the parsing of an empty string */
2786 START_TEST(test_empty_parse) {
2787   const char *text = "<doc></doc>";
2788   const char *partial = "<doc>";
2789 
2790   if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2791     fail("Parsing empty string faulted");
2792   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2793     fail("Parsing final empty string not faulted");
2794   if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2795     fail("Parsing final empty string faulted for wrong reason");
2796 
2797   /* Now try with valid text before the empty end */
2798   XML_ParserReset(g_parser, NULL);
2799   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2800       == XML_STATUS_ERROR)
2801     xml_failure(g_parser);
2802   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2803     fail("Parsing final empty string faulted");
2804 
2805   /* Now try with invalid text before the empty end */
2806   XML_ParserReset(g_parser, NULL);
2807   if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2808                               XML_FALSE)
2809       == XML_STATUS_ERROR)
2810     xml_failure(g_parser);
2811   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2812     fail("Parsing final incomplete empty string not faulted");
2813 }
2814 END_TEST
2815 
2816 /* Test XML_Parse for len < 0 */
2817 START_TEST(test_negative_len_parse) {
2818   const char *const doc = "<root/>";
2819   for (int isFinal = 0; isFinal < 2; isFinal++) {
2820     set_subtest("isFinal=%d", isFinal);
2821 
2822     XML_Parser parser = XML_ParserCreate(NULL);
2823 
2824     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2825       fail("There was not supposed to be any initial parse error.");
2826 
2827     const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
2828 
2829     if (status != XML_STATUS_ERROR)
2830       fail("Negative len was expected to fail the parse but did not.");
2831 
2832     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2833       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2834 
2835     XML_ParserFree(parser);
2836   }
2837 }
2838 END_TEST
2839 
2840 /* Test XML_ParseBuffer for len < 0 */
2841 START_TEST(test_negative_len_parse_buffer) {
2842   const char *const doc = "<root/>";
2843   for (int isFinal = 0; isFinal < 2; isFinal++) {
2844     set_subtest("isFinal=%d", isFinal);
2845 
2846     XML_Parser parser = XML_ParserCreate(NULL);
2847 
2848     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2849       fail("There was not supposed to be any initial parse error.");
2850 
2851     void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
2852 
2853     if (buffer == NULL)
2854       fail("XML_GetBuffer failed.");
2855 
2856     memcpy(buffer, doc, strlen(doc));
2857 
2858     const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
2859 
2860     if (status != XML_STATUS_ERROR)
2861       fail("Negative len was expected to fail the parse but did not.");
2862 
2863     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2864       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2865 
2866     XML_ParserFree(parser);
2867   }
2868 }
2869 END_TEST
2870 
2871 /* Test odd corners of the XML_GetBuffer interface */
2872 static enum XML_Status
2873 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
2874   const XML_Feature *feature = XML_GetFeatureList();
2875 
2876   if (feature == NULL)
2877     return XML_STATUS_ERROR;
2878   for (; feature->feature != XML_FEATURE_END; feature++) {
2879     if (feature->feature == feature_id) {
2880       *presult = feature->value;
2881       return XML_STATUS_OK;
2882     }
2883   }
2884   return XML_STATUS_ERROR;
2885 }
2886 
2887 /* Test odd corners of the XML_GetBuffer interface */
2888 START_TEST(test_get_buffer_1) {
2889   const char *text = get_buffer_test_text;
2890   void *buffer;
2891   long context_bytes;
2892 
2893   /* Attempt to allocate a negative length buffer */
2894   if (XML_GetBuffer(g_parser, -12) != NULL)
2895     fail("Negative length buffer not failed");
2896 
2897   /* Now get a small buffer and extend it past valid length */
2898   buffer = XML_GetBuffer(g_parser, 1536);
2899   if (buffer == NULL)
2900     fail("1.5K buffer failed");
2901   assert(buffer != NULL);
2902   memcpy(buffer, text, strlen(text));
2903   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2904       == XML_STATUS_ERROR)
2905     xml_failure(g_parser);
2906   if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
2907     fail("INT_MAX buffer not failed");
2908 
2909   /* Now try extending it a more reasonable but still too large
2910    * amount.  The allocator in XML_GetBuffer() doubles the buffer
2911    * size until it exceeds the requested amount or INT_MAX.  If it
2912    * exceeds INT_MAX, it rejects the request, so we want a request
2913    * between INT_MAX and INT_MAX/2.  A gap of 1K seems comfortable,
2914    * with an extra byte just to ensure that the request is off any
2915    * boundary.  The request will be inflated internally by
2916    * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
2917    * request.
2918    */
2919   if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
2920     context_bytes = 0;
2921   if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
2922     fail("INT_MAX- buffer not failed");
2923 
2924   /* Now try extending it a carefully crafted amount */
2925   if (XML_GetBuffer(g_parser, 1000) == NULL)
2926     fail("1000 buffer failed");
2927 }
2928 END_TEST
2929 
2930 /* Test more corners of the XML_GetBuffer interface */
2931 START_TEST(test_get_buffer_2) {
2932   const char *text = get_buffer_test_text;
2933   void *buffer;
2934 
2935   /* Now get a decent buffer */
2936   buffer = XML_GetBuffer(g_parser, 1536);
2937   if (buffer == NULL)
2938     fail("1.5K buffer failed");
2939   assert(buffer != NULL);
2940   memcpy(buffer, text, strlen(text));
2941   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2942       == XML_STATUS_ERROR)
2943     xml_failure(g_parser);
2944 
2945   /* Extend it, to catch a different code path */
2946   if (XML_GetBuffer(g_parser, 1024) == NULL)
2947     fail("1024 buffer failed");
2948 }
2949 END_TEST
2950 
2951 /* Test for signed integer overflow CVE-2022-23852 */
2952 #if XML_CONTEXT_BYTES > 0
2953 START_TEST(test_get_buffer_3_overflow) {
2954   XML_Parser parser = XML_ParserCreate(NULL);
2955   assert(parser != NULL);
2956 
2957   const char *const text = "\n";
2958   const int expectedKeepValue = (int)strlen(text);
2959 
2960   // After this call, variable "keep" in XML_GetBuffer will
2961   // have value expectedKeepValue
2962   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
2963                               XML_FALSE /* isFinal */)
2964       == XML_STATUS_ERROR)
2965     xml_failure(parser);
2966 
2967   assert(expectedKeepValue > 0);
2968   if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
2969     fail("enlarging buffer not failed");
2970 
2971   XML_ParserFree(parser);
2972 }
2973 END_TEST
2974 #endif // XML_CONTEXT_BYTES > 0
2975 
2976 START_TEST(test_buffer_can_grow_to_max) {
2977   const char *const prefixes[] = {
2978       "",
2979       "<",
2980       "<x a='",
2981       "<doc><x a='",
2982       "<document><x a='",
2983       "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
2984       "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
2985       "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
2986       "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
2987       "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
2988   const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
2989   int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
2990 #if defined(__MINGW32__) && ! defined(__MINGW64__)
2991   // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
2992   // Can we make a big allocation?
2993   void *big = malloc(maxbuf);
2994   if (! big) {
2995     // The big allocation failed. Let's be a little lenient.
2996     maxbuf = maxbuf / 2;
2997   }
2998   free(big);
2999 #endif
3000 
3001   for (int i = 0; i < num_prefixes; ++i) {
3002     set_subtest("\"%s\"", prefixes[i]);
3003     XML_Parser parser = XML_ParserCreate(NULL);
3004     const int prefix_len = (int)strlen(prefixes[i]);
3005     const enum XML_Status s
3006         = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
3007     if (s != XML_STATUS_OK)
3008       xml_failure(parser);
3009 
3010     // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
3011     // subtracting the whole prefix is easiest, and close enough.
3012     assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
3013     // The limit should be consistent; no prefix should allow us to
3014     // reach above the max buffer size.
3015     assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
3016     XML_ParserFree(parser);
3017   }
3018 }
3019 END_TEST
3020 
3021 START_TEST(test_getbuffer_allocates_on_zero_len) {
3022   for (int first_len = 1; first_len >= 0; first_len--) {
3023     set_subtest("with len=%d first", first_len);
3024     XML_Parser parser = XML_ParserCreate(NULL);
3025     assert_true(parser != NULL);
3026     assert_true(XML_GetBuffer(parser, first_len) != NULL);
3027     assert_true(XML_GetBuffer(parser, 0) != NULL);
3028     if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
3029       xml_failure(parser);
3030     XML_ParserFree(parser);
3031   }
3032 }
3033 END_TEST
3034 
3035 /* Test position information macros */
3036 START_TEST(test_byte_info_at_end) {
3037   const char *text = "<doc></doc>";
3038 
3039   if (XML_GetCurrentByteIndex(g_parser) != -1
3040       || XML_GetCurrentByteCount(g_parser) != 0)
3041     fail("Byte index/count incorrect at start of parse");
3042   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3043       == XML_STATUS_ERROR)
3044     xml_failure(g_parser);
3045   /* At end, the count will be zero and the index the end of string */
3046   if (XML_GetCurrentByteCount(g_parser) != 0)
3047     fail("Terminal byte count incorrect");
3048   if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
3049     fail("Terminal byte index incorrect");
3050 }
3051 END_TEST
3052 
3053 /* Test position information from errors */
3054 #define PRE_ERROR_STR "<doc></"
3055 #define POST_ERROR_STR "wombat></doc>"
3056 START_TEST(test_byte_info_at_error) {
3057   const char *text = PRE_ERROR_STR POST_ERROR_STR;
3058 
3059   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3060       == XML_STATUS_OK)
3061     fail("Syntax error not faulted");
3062   if (XML_GetCurrentByteCount(g_parser) != 0)
3063     fail("Error byte count incorrect");
3064   if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3065     fail("Error byte index incorrect");
3066 }
3067 END_TEST
3068 #undef PRE_ERROR_STR
3069 #undef POST_ERROR_STR
3070 
3071 /* Test position information in handler */
3072 #define START_ELEMENT "<e>"
3073 #define CDATA_TEXT "Hello"
3074 #define END_ELEMENT "</e>"
3075 START_TEST(test_byte_info_at_cdata) {
3076   const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3077   int offset, size;
3078   ByteTestData data;
3079 
3080   /* Check initial context is empty */
3081   if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3082     fail("Unexpected context at start of parse");
3083 
3084   data.start_element_len = (int)strlen(START_ELEMENT);
3085   data.cdata_len = (int)strlen(CDATA_TEXT);
3086   data.total_string_len = (int)strlen(text);
3087   XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3088   XML_SetUserData(g_parser, &data);
3089   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3090     xml_failure(g_parser);
3091 }
3092 END_TEST
3093 #undef START_ELEMENT
3094 #undef CDATA_TEXT
3095 #undef END_ELEMENT
3096 
3097 /* Test predefined entities are correctly recognised */
3098 START_TEST(test_predefined_entities) {
3099   const char *text = "<doc>&lt;&gt;&amp;&quot;&apos;</doc>";
3100   const XML_Char *expected = XCS("<doc>&lt;&gt;&amp;&quot;&apos;</doc>");
3101   const XML_Char *result = XCS("<>&\"'");
3102   CharData storage;
3103 
3104   XML_SetDefaultHandler(g_parser, accumulate_characters);
3105   /* run_character_check uses XML_SetCharacterDataHandler(), which
3106    * unfortunately heads off a code path that we need to exercise.
3107    */
3108   CharData_Init(&storage);
3109   XML_SetUserData(g_parser, &storage);
3110   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3111       == XML_STATUS_ERROR)
3112     xml_failure(g_parser);
3113   /* The default handler doesn't translate the entities */
3114   CharData_CheckXMLChars(&storage, expected);
3115 
3116   /* Now try again and check the translation */
3117   XML_ParserReset(g_parser, NULL);
3118   run_character_check(text, result);
3119 }
3120 END_TEST
3121 
3122 /* Regression test that an invalid tag in an external parameter
3123  * reference in an external DTD is correctly faulted.
3124  *
3125  * Only a few specific tags are legal in DTDs ignoring comments and
3126  * processing instructions, all of which begin with an exclamation
3127  * mark.  "<el/>" is not one of them, so the parser should raise an
3128  * error on encountering it.
3129  */
3130 START_TEST(test_invalid_tag_in_dtd) {
3131   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3132                      "<doc></doc>\n";
3133 
3134   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3135   XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3136   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3137                  "Invalid tag IN DTD external param not rejected");
3138 }
3139 END_TEST
3140 
3141 /* Test entities not quite the predefined ones are not mis-recognised */
3142 START_TEST(test_not_predefined_entities) {
3143   const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3144                         "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3145   int i = 0;
3146 
3147   while (text[i] != NULL) {
3148     expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3149                    "Undefined entity not rejected");
3150     XML_ParserReset(g_parser, NULL);
3151     i++;
3152   }
3153 }
3154 END_TEST
3155 
3156 /* Test conditional inclusion (IGNORE) */
3157 START_TEST(test_ignore_section) {
3158   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3159                      "<doc><e>&entity;</e></doc>";
3160   const XML_Char *expected
3161       = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3162   CharData storage;
3163 
3164   CharData_Init(&storage);
3165   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3166   XML_SetUserData(g_parser, &storage);
3167   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3168   XML_SetDefaultHandler(g_parser, accumulate_characters);
3169   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3170   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3171   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3172   XML_SetStartElementHandler(g_parser, dummy_start_element);
3173   XML_SetEndElementHandler(g_parser, dummy_end_element);
3174   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3175       == XML_STATUS_ERROR)
3176     xml_failure(g_parser);
3177   CharData_CheckXMLChars(&storage, expected);
3178 }
3179 END_TEST
3180 
3181 START_TEST(test_ignore_section_utf16) {
3182   const char text[] =
3183       /* <!DOCTYPE d SYSTEM 's'> */
3184       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3185       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3186       /* <d><e>&en;</e></d> */
3187       "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3188   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3189   CharData storage;
3190 
3191   CharData_Init(&storage);
3192   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3193   XML_SetUserData(g_parser, &storage);
3194   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3195   XML_SetDefaultHandler(g_parser, accumulate_characters);
3196   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3197   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3198   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3199   XML_SetStartElementHandler(g_parser, dummy_start_element);
3200   XML_SetEndElementHandler(g_parser, dummy_end_element);
3201   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3202       == XML_STATUS_ERROR)
3203     xml_failure(g_parser);
3204   CharData_CheckXMLChars(&storage, expected);
3205 }
3206 END_TEST
3207 
3208 START_TEST(test_ignore_section_utf16_be) {
3209   const char text[] =
3210       /* <!DOCTYPE d SYSTEM 's'> */
3211       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3212       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3213       /* <d><e>&en;</e></d> */
3214       "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3215   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3216   CharData storage;
3217 
3218   CharData_Init(&storage);
3219   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3220   XML_SetUserData(g_parser, &storage);
3221   XML_SetExternalEntityRefHandler(g_parser,
3222                                   external_entity_load_ignore_utf16_be);
3223   XML_SetDefaultHandler(g_parser, accumulate_characters);
3224   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3225   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3226   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3227   XML_SetStartElementHandler(g_parser, dummy_start_element);
3228   XML_SetEndElementHandler(g_parser, dummy_end_element);
3229   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3230       == XML_STATUS_ERROR)
3231     xml_failure(g_parser);
3232   CharData_CheckXMLChars(&storage, expected);
3233 }
3234 END_TEST
3235 
3236 /* Test mis-formatted conditional exclusion */
3237 START_TEST(test_bad_ignore_section) {
3238   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3239                      "<doc><e>&entity;</e></doc>";
3240   ExtFaults faults[]
3241       = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3242           XML_ERROR_SYNTAX},
3243          {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3244           XML_ERROR_INVALID_TOKEN},
3245          {/* FIrst two bytes of a three-byte char */
3246           "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3247           XML_ERROR_PARTIAL_CHAR},
3248          {NULL, NULL, NULL, XML_ERROR_NONE}};
3249   ExtFaults *fault;
3250 
3251   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3252     set_subtest("%s", fault->parse_text);
3253     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3254     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3255     XML_SetUserData(g_parser, fault);
3256     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3257                    "Incomplete IGNORE section not failed");
3258     XML_ParserReset(g_parser, NULL);
3259   }
3260 }
3261 END_TEST
3262 
3263 struct bom_testdata {
3264   const char *external;
3265   int split;
3266   XML_Bool nested_callback_happened;
3267 };
3268 
3269 static int XMLCALL
3270 external_bom_checker(XML_Parser parser, const XML_Char *context,
3271                      const XML_Char *base, const XML_Char *systemId,
3272                      const XML_Char *publicId) {
3273   const char *text;
3274   UNUSED_P(base);
3275   UNUSED_P(systemId);
3276   UNUSED_P(publicId);
3277 
3278   XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3279   if (ext_parser == NULL)
3280     fail("Could not create external entity parser");
3281 
3282   if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3283     struct bom_testdata *const testdata
3284         = (struct bom_testdata *)XML_GetUserData(parser);
3285     const char *const external = testdata->external;
3286     const int split = testdata->split;
3287     testdata->nested_callback_happened = XML_TRUE;
3288 
3289     if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3290         != XML_STATUS_OK) {
3291       xml_failure(ext_parser);
3292     }
3293     text = external + split; // the parse below will continue where we left off.
3294   } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3295     text = "<!ELEMENT doc EMPTY>\n"
3296            "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3297            "<!ENTITY % e2 '%e1;'>\n";
3298   } else {
3299     fail("unknown systemId");
3300   }
3301 
3302   if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3303       != XML_STATUS_OK)
3304     xml_failure(ext_parser);
3305 
3306   XML_ParserFree(ext_parser);
3307   return XML_STATUS_OK;
3308 }
3309 
3310 /* regression test: BOM should be consumed when followed by a partial token. */
3311 START_TEST(test_external_bom_consumed) {
3312   const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3313                            "<doc></doc>\n";
3314   const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3315   const int len = (int)strlen(external);
3316   for (int split = 0; split <= len; ++split) {
3317     set_subtest("split at byte %d", split);
3318 
3319     struct bom_testdata testdata;
3320     testdata.external = external;
3321     testdata.split = split;
3322     testdata.nested_callback_happened = XML_FALSE;
3323 
3324     XML_Parser parser = XML_ParserCreate(NULL);
3325     if (parser == NULL) {
3326       fail("Couldn't create parser");
3327     }
3328     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3329     XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3330     XML_SetUserData(parser, &testdata);
3331     if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3332         == XML_STATUS_ERROR)
3333       xml_failure(parser);
3334     if (! testdata.nested_callback_happened) {
3335       fail("ref handler not called");
3336     }
3337     XML_ParserFree(parser);
3338   }
3339 }
3340 END_TEST
3341 
3342 /* Test recursive parsing */
3343 START_TEST(test_external_entity_values) {
3344   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3345                      "<doc></doc>\n";
3346   ExtFaults data_004_2[] = {
3347       {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3348       {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3349        XML_ERROR_INVALID_TOKEN},
3350       {"'wombat", "Unterminated string not faulted", NULL,
3351        XML_ERROR_UNCLOSED_TOKEN},
3352       {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3353        XML_ERROR_PARTIAL_CHAR},
3354       {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3355       {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3356        XML_ERROR_XML_DECL},
3357       {/* UTF-8 BOM */
3358        "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3359        XML_ERROR_NONE},
3360       {"<?xml version='1.0' encoding='utf-8'?>\n$",
3361        "Invalid token after text declaration not faulted", NULL,
3362        XML_ERROR_INVALID_TOKEN},
3363       {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3364        "Unterminated string after text decl not faulted", NULL,
3365        XML_ERROR_UNCLOSED_TOKEN},
3366       {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3367        "Partial UTF-8 character after text decl not faulted", NULL,
3368        XML_ERROR_PARTIAL_CHAR},
3369       {"%e1;", "Recursive parameter entity not faulted", NULL,
3370        XML_ERROR_RECURSIVE_ENTITY_REF},
3371       {NULL, NULL, NULL, XML_ERROR_NONE}};
3372   int i;
3373 
3374   for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3375     set_subtest("%s", data_004_2[i].parse_text);
3376     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3377     XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3378     XML_SetUserData(g_parser, &data_004_2[i]);
3379     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3380         == XML_STATUS_ERROR)
3381       xml_failure(g_parser);
3382     XML_ParserReset(g_parser, NULL);
3383   }
3384 }
3385 END_TEST
3386 
3387 /* Test the recursive parse interacts with a not standalone handler */
3388 START_TEST(test_ext_entity_not_standalone) {
3389   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3390                      "<doc></doc>";
3391 
3392   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3393   XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3394   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3395                  "Standalone rejection not caught");
3396 }
3397 END_TEST
3398 
3399 START_TEST(test_ext_entity_value_abort) {
3400   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3401                      "<doc></doc>\n";
3402 
3403   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3404   XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3405   g_resumable = XML_FALSE;
3406   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3407       == XML_STATUS_ERROR)
3408     xml_failure(g_parser);
3409 }
3410 END_TEST
3411 
3412 START_TEST(test_bad_public_doctype) {
3413   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3414                      "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3415                      "<doc></doc>";
3416 
3417   /* Setting a handler provokes a particular code path */
3418   XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3419                             dummy_end_doctype_handler);
3420   expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3421 }
3422 END_TEST
3423 
3424 /* Test based on ibm/valid/P32/ibm32v04.xml */
3425 START_TEST(test_attribute_enum_value) {
3426   const char *text = "<?xml version='1.0' standalone='no'?>\n"
3427                      "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3428                      "<animal>This is a \n    <a/>  \n\nyellow tiger</animal>";
3429   ExtTest dtd_data
3430       = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3431          "<!ELEMENT a EMPTY>\n"
3432          "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3433          NULL, NULL};
3434   const XML_Char *expected = XCS("This is a \n      \n\nyellow tiger");
3435 
3436   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3437   XML_SetUserData(g_parser, &dtd_data);
3438   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3439   /* An attribute list handler provokes a different code path */
3440   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3441   run_ext_character_check(text, &dtd_data, expected);
3442 }
3443 END_TEST
3444 
3445 /* Slightly bizarrely, the library seems to silently ignore entity
3446  * definitions for predefined entities, even when they are wrong.  The
3447  * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3448  * to happen, so this is currently treated as acceptable.
3449  */
3450 START_TEST(test_predefined_entity_redefinition) {
3451   const char *text = "<!DOCTYPE doc [\n"
3452                      "<!ENTITY apos 'foo'>\n"
3453                      "]>\n"
3454                      "<doc>&apos;</doc>";
3455   run_character_check(text, XCS("'"));
3456 }
3457 END_TEST
3458 
3459 /* Test that the parser stops processing the DTD after an unresolved
3460  * parameter entity is encountered.
3461  */
3462 START_TEST(test_dtd_stop_processing) {
3463   const char *text = "<!DOCTYPE doc [\n"
3464                      "%foo;\n"
3465                      "<!ENTITY bar 'bas'>\n"
3466                      "]><doc/>";
3467 
3468   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3469   init_dummy_handlers();
3470   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3471       == XML_STATUS_ERROR)
3472     xml_failure(g_parser);
3473   if (get_dummy_handler_flags() != 0)
3474     fail("DTD processing still going after undefined PE");
3475 }
3476 END_TEST
3477 
3478 /* Test public notations with no system ID */
3479 START_TEST(test_public_notation_no_sysid) {
3480   const char *text = "<!DOCTYPE doc [\n"
3481                      "<!NOTATION note PUBLIC 'foo'>\n"
3482                      "<!ELEMENT doc EMPTY>\n"
3483                      "]>\n<doc/>";
3484 
3485   init_dummy_handlers();
3486   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3487   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3488       == XML_STATUS_ERROR)
3489     xml_failure(g_parser);
3490   if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3491     fail("Notation declaration handler not called");
3492 }
3493 END_TEST
3494 
3495 START_TEST(test_nested_groups) {
3496   const char *text
3497       = "<!DOCTYPE doc [\n"
3498         "<!ELEMENT doc "
3499         /* Sixteen elements per line */
3500         "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3501         "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3502         "))))))))))))))))))))))))))))))))>\n"
3503         "<!ELEMENT e EMPTY>"
3504         "]>\n"
3505         "<doc><e/></doc>";
3506   CharData storage;
3507 
3508   CharData_Init(&storage);
3509   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3510   XML_SetStartElementHandler(g_parser, record_element_start_handler);
3511   XML_SetUserData(g_parser, &storage);
3512   init_dummy_handlers();
3513   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3514       == XML_STATUS_ERROR)
3515     xml_failure(g_parser);
3516   CharData_CheckXMLChars(&storage, XCS("doce"));
3517   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3518     fail("Element handler not fired");
3519 }
3520 END_TEST
3521 
3522 START_TEST(test_group_choice) {
3523   const char *text = "<!DOCTYPE doc [\n"
3524                      "<!ELEMENT doc (a|b|c)+>\n"
3525                      "<!ELEMENT a EMPTY>\n"
3526                      "<!ELEMENT b (#PCDATA)>\n"
3527                      "<!ELEMENT c ANY>\n"
3528                      "]>\n"
3529                      "<doc>\n"
3530                      "<a/>\n"
3531                      "<b attr='foo'>This is a foo</b>\n"
3532                      "<c></c>\n"
3533                      "</doc>\n";
3534 
3535   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3536   init_dummy_handlers();
3537   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3538       == XML_STATUS_ERROR)
3539     xml_failure(g_parser);
3540   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3541     fail("Element handler flag not raised");
3542 }
3543 END_TEST
3544 
3545 START_TEST(test_standalone_parameter_entity) {
3546   const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3547                      "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3548                      "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3549                      "%entity;\n"
3550                      "]>\n"
3551                      "<doc></doc>";
3552   char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3553 
3554   XML_SetUserData(g_parser, dtd_data);
3555   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3556   XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3557   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3558       == XML_STATUS_ERROR)
3559     xml_failure(g_parser);
3560 }
3561 END_TEST
3562 
3563 /* Test skipping of parameter entity in an external DTD */
3564 /* Derived from ibm/invalid/P69/ibm69i01.xml */
3565 START_TEST(test_skipped_parameter_entity) {
3566   const char *text = "<?xml version='1.0'?>\n"
3567                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3568                      "<!ELEMENT root (#PCDATA|a)* >\n"
3569                      "]>\n"
3570                      "<root></root>";
3571   ExtTest dtd_data = {"%pe2;", NULL, NULL};
3572 
3573   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3574   XML_SetUserData(g_parser, &dtd_data);
3575   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3576   XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3577   init_dummy_handlers();
3578   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3579       == XML_STATUS_ERROR)
3580     xml_failure(g_parser);
3581   if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3582     fail("Skip handler not executed");
3583 }
3584 END_TEST
3585 
3586 /* Test recursive parameter entity definition rejected in external DTD */
3587 START_TEST(test_recursive_external_parameter_entity) {
3588   const char *text = "<?xml version='1.0'?>\n"
3589                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3590                      "<!ELEMENT root (#PCDATA|a)* >\n"
3591                      "]>\n"
3592                      "<root></root>";
3593   ExtFaults dtd_data = {"<!ENTITY % pe2 '&#37;pe2;'>\n%pe2;",
3594                         "Recursive external parameter entity not faulted", NULL,
3595                         XML_ERROR_RECURSIVE_ENTITY_REF};
3596 
3597   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3598   XML_SetUserData(g_parser, &dtd_data);
3599   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3600   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3601                  "Recursive external parameter not spotted");
3602 }
3603 END_TEST
3604 
3605 /* Test undefined parameter entity in external entity handler */
3606 START_TEST(test_undefined_ext_entity_in_external_dtd) {
3607   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3608                      "<doc></doc>\n";
3609 
3610   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3611   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3612   XML_SetUserData(g_parser, NULL);
3613   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3614       == XML_STATUS_ERROR)
3615     xml_failure(g_parser);
3616 
3617   /* Now repeat without the external entity ref handler invoking
3618    * another copy of itself.
3619    */
3620   XML_ParserReset(g_parser, NULL);
3621   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3622   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3623   XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3624   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3625       == XML_STATUS_ERROR)
3626     xml_failure(g_parser);
3627 }
3628 END_TEST
3629 
3630 /* Test suspending the parse on receiving an XML declaration works */
3631 START_TEST(test_suspend_xdecl) {
3632   const char *text = long_character_data_text;
3633 
3634   XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3635   XML_SetUserData(g_parser, g_parser);
3636   g_resumable = XML_TRUE;
3637   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3638       != XML_STATUS_SUSPENDED)
3639     xml_failure(g_parser);
3640   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3641     xml_failure(g_parser);
3642   /* Attempt to start a new parse while suspended */
3643   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3644       != XML_STATUS_ERROR)
3645     fail("Attempt to parse while suspended not faulted");
3646   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3647     fail("Suspended parse not faulted with correct error");
3648 }
3649 END_TEST
3650 
3651 /* Test aborting the parse in an epilog works */
3652 START_TEST(test_abort_epilog) {
3653   const char *text = "<doc></doc>\n\r\n";
3654   XML_Char trigger_char = XCS('\r');
3655 
3656   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3657   XML_SetUserData(g_parser, &trigger_char);
3658   g_resumable = XML_FALSE;
3659   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3660       != XML_STATUS_ERROR)
3661     fail("Abort not triggered");
3662   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3663     xml_failure(g_parser);
3664 }
3665 END_TEST
3666 
3667 /* Test a different code path for abort in the epilog */
3668 START_TEST(test_abort_epilog_2) {
3669   const char *text = "<doc></doc>\n";
3670   XML_Char trigger_char = XCS('\n');
3671 
3672   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3673   XML_SetUserData(g_parser, &trigger_char);
3674   g_resumable = XML_FALSE;
3675   expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3676 }
3677 END_TEST
3678 
3679 /* Test suspension from the epilog */
3680 START_TEST(test_suspend_epilog) {
3681   const char *text = "<doc></doc>\n";
3682   XML_Char trigger_char = XCS('\n');
3683 
3684   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3685   XML_SetUserData(g_parser, &trigger_char);
3686   g_resumable = XML_TRUE;
3687   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3688       != XML_STATUS_SUSPENDED)
3689     xml_failure(g_parser);
3690 }
3691 END_TEST
3692 
3693 START_TEST(test_suspend_in_sole_empty_tag) {
3694   const char *text = "<doc/>";
3695   enum XML_Status rc;
3696 
3697   XML_SetEndElementHandler(g_parser, suspending_end_handler);
3698   XML_SetUserData(g_parser, g_parser);
3699   rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3700   if (rc == XML_STATUS_ERROR)
3701     xml_failure(g_parser);
3702   else if (rc != XML_STATUS_SUSPENDED)
3703     fail("Suspend not triggered");
3704   rc = XML_ResumeParser(g_parser);
3705   if (rc == XML_STATUS_ERROR)
3706     xml_failure(g_parser);
3707   else if (rc != XML_STATUS_OK)
3708     fail("Resume failed");
3709 }
3710 END_TEST
3711 
3712 START_TEST(test_unfinished_epilog) {
3713   const char *text = "<doc></doc><";
3714 
3715   expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3716                  "Incomplete epilog entry not faulted");
3717 }
3718 END_TEST
3719 
3720 START_TEST(test_partial_char_in_epilog) {
3721   const char *text = "<doc></doc>\xe2\x82";
3722 
3723   /* First check that no fault is raised if the parse is not finished */
3724   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3725       == XML_STATUS_ERROR)
3726     xml_failure(g_parser);
3727   /* Now check that it is faulted once we finish */
3728   if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3729     fail("Partial character in epilog not faulted");
3730   if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3731     xml_failure(g_parser);
3732 }
3733 END_TEST
3734 
3735 /* Test resuming a parse suspended in entity substitution */
3736 START_TEST(test_suspend_resume_internal_entity) {
3737   const char *text
3738       = "<!DOCTYPE doc [\n"
3739         "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3740         "]>\n"
3741         "<doc>&foo;</doc>\n";
3742   const XML_Char *expected1 = XCS("Hi");
3743   const XML_Char *expected2 = XCS("HiHo");
3744   CharData storage;
3745 
3746   CharData_Init(&storage);
3747   XML_SetStartElementHandler(g_parser, start_element_suspender);
3748   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3749   XML_SetUserData(g_parser, &storage);
3750   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3751   // we won't know exactly how much input we actually managed to give Expat.
3752   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3753       != XML_STATUS_SUSPENDED)
3754     xml_failure(g_parser);
3755   CharData_CheckXMLChars(&storage, XCS(""));
3756   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3757     xml_failure(g_parser);
3758   CharData_CheckXMLChars(&storage, expected1);
3759   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3760     xml_failure(g_parser);
3761   CharData_CheckXMLChars(&storage, expected2);
3762 }
3763 END_TEST
3764 
3765 START_TEST(test_suspend_resume_internal_entity_issue_629) {
3766   const char *const text
3767       = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3768         "<"
3769         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3770         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3771         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3772         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3773         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3774         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3775         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3776         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3777         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3778         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3779         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3780         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3781         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3782         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3783         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3784         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3785         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3786         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3787         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3788         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3789         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3790         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3791         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3792         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3793         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3794         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3795         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3796         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3797         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3798         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3799         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3800         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3801         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3802         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3803         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3804         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3805         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3806         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3807         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3808         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3809         "/>"
3810         "</b></a>";
3811   const size_t firstChunkSizeBytes = 54;
3812 
3813   XML_Parser parser = XML_ParserCreate(NULL);
3814   XML_SetUserData(parser, parser);
3815   XML_SetCommentHandler(parser, suspending_comment_handler);
3816 
3817   if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3818       != XML_STATUS_SUSPENDED)
3819     xml_failure(parser);
3820   if (XML_ResumeParser(parser) != XML_STATUS_OK)
3821     xml_failure(parser);
3822   if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3823                               (int)(strlen(text) - firstChunkSizeBytes),
3824                               XML_TRUE)
3825       != XML_STATUS_OK)
3826     xml_failure(parser);
3827   XML_ParserFree(parser);
3828 }
3829 END_TEST
3830 
3831 /* Test syntax error is caught at parse resumption */
3832 START_TEST(test_resume_entity_with_syntax_error) {
3833   const char *text = "<!DOCTYPE doc [\n"
3834                      "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3835                      "]>\n"
3836                      "<doc>&foo;</doc>\n";
3837 
3838   XML_SetStartElementHandler(g_parser, start_element_suspender);
3839   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3840       != XML_STATUS_SUSPENDED)
3841     xml_failure(g_parser);
3842   if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
3843     fail("Syntax error in entity not faulted");
3844   if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
3845     xml_failure(g_parser);
3846 }
3847 END_TEST
3848 
3849 /* Test suspending and resuming in a parameter entity substitution */
3850 START_TEST(test_suspend_resume_parameter_entity) {
3851   const char *text = "<!DOCTYPE doc [\n"
3852                      "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
3853                      "%foo;\n"
3854                      "]>\n"
3855                      "<doc>Hello, world</doc>";
3856   const XML_Char *expected = XCS("Hello, world");
3857   CharData storage;
3858 
3859   CharData_Init(&storage);
3860   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3861   XML_SetElementDeclHandler(g_parser, element_decl_suspender);
3862   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3863   XML_SetUserData(g_parser, &storage);
3864   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3865       != XML_STATUS_SUSPENDED)
3866     xml_failure(g_parser);
3867   CharData_CheckXMLChars(&storage, XCS(""));
3868   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3869     xml_failure(g_parser);
3870   CharData_CheckXMLChars(&storage, expected);
3871 }
3872 END_TEST
3873 
3874 /* Test attempting to use parser after an error is faulted */
3875 START_TEST(test_restart_on_error) {
3876   const char *text = "<$doc><doc></doc>";
3877 
3878   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3879       != XML_STATUS_ERROR)
3880     fail("Invalid tag name not faulted");
3881   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3882     xml_failure(g_parser);
3883   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3884     fail("Restarting invalid parse not faulted");
3885   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3886     xml_failure(g_parser);
3887 }
3888 END_TEST
3889 
3890 /* Test that angle brackets in an attribute default value are faulted */
3891 START_TEST(test_reject_lt_in_attribute_value) {
3892   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
3893                      "<doc></doc>";
3894 
3895   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3896                  "Bad attribute default not faulted");
3897 }
3898 END_TEST
3899 
3900 START_TEST(test_reject_unfinished_param_in_att_value) {
3901   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
3902                      "<doc></doc>";
3903 
3904   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3905                  "Bad attribute default not faulted");
3906 }
3907 END_TEST
3908 
3909 START_TEST(test_trailing_cr_in_att_value) {
3910   const char *text = "<doc a='value\r'/>";
3911 
3912   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3913       == XML_STATUS_ERROR)
3914     xml_failure(g_parser);
3915 }
3916 END_TEST
3917 
3918 /* Try parsing a general entity within a parameter entity in a
3919  * standalone internal DTD.  Covers a corner case in the parser.
3920  */
3921 START_TEST(test_standalone_internal_entity) {
3922   const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
3923                      "<!DOCTYPE doc [\n"
3924                      "  <!ELEMENT doc (#PCDATA)>\n"
3925                      "  <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"&ge;\">'>\n"
3926                      "  <!ENTITY ge 'AttDefaultValue'>\n"
3927                      "  %pe;\n"
3928                      "]>\n"
3929                      "<doc att2='any'/>";
3930 
3931   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3932   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3933       == XML_STATUS_ERROR)
3934     xml_failure(g_parser);
3935 }
3936 END_TEST
3937 
3938 /* Test that a reference to an unknown external entity is skipped */
3939 START_TEST(test_skipped_external_entity) {
3940   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3941                      "<doc></doc>\n";
3942   ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
3943                        "<!ENTITY % e2 '%e1;'>\n",
3944                        NULL, NULL};
3945 
3946   XML_SetUserData(g_parser, &test_data);
3947   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3948   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3949   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3950       == XML_STATUS_ERROR)
3951     xml_failure(g_parser);
3952 }
3953 END_TEST
3954 
3955 /* Test a different form of unknown external entity */
3956 START_TEST(test_skipped_null_loaded_ext_entity) {
3957   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3958                      "<doc />";
3959   ExtHdlrData test_data
3960       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3961          "<!ENTITY % pe2 '%pe1;'>\n"
3962          "%pe2;\n",
3963          external_entity_null_loader};
3964 
3965   XML_SetUserData(g_parser, &test_data);
3966   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3967   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3968   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3969       == XML_STATUS_ERROR)
3970     xml_failure(g_parser);
3971 }
3972 END_TEST
3973 
3974 START_TEST(test_skipped_unloaded_ext_entity) {
3975   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3976                      "<doc />";
3977   ExtHdlrData test_data
3978       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3979          "<!ENTITY % pe2 '%pe1;'>\n"
3980          "%pe2;\n",
3981          NULL};
3982 
3983   XML_SetUserData(g_parser, &test_data);
3984   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3985   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3986   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3987       == XML_STATUS_ERROR)
3988     xml_failure(g_parser);
3989 }
3990 END_TEST
3991 
3992 /* Test that a parameter entity value ending with a carriage return
3993  * has it translated internally into a newline.
3994  */
3995 START_TEST(test_param_entity_with_trailing_cr) {
3996 #define PARAM_ENTITY_NAME "pe"
3997 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
3998   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3999                      "<doc/>";
4000   ExtTest test_data
4001       = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
4002          "%" PARAM_ENTITY_NAME ";\n",
4003          NULL, NULL};
4004 
4005   XML_SetUserData(g_parser, &test_data);
4006   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4007   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4008   XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
4009   param_entity_match_init(XCS(PARAM_ENTITY_NAME),
4010                           XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
4011   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4012       == XML_STATUS_ERROR)
4013     xml_failure(g_parser);
4014   int entity_match_flag = get_param_entity_match_flag();
4015   if (entity_match_flag == ENTITY_MATCH_FAIL)
4016     fail("Parameter entity CR->NEWLINE conversion failed");
4017   else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
4018     fail("Parameter entity not parsed");
4019 }
4020 #undef PARAM_ENTITY_NAME
4021 #undef PARAM_ENTITY_CORE_VALUE
4022 END_TEST
4023 
4024 START_TEST(test_invalid_character_entity) {
4025   const char *text = "<!DOCTYPE doc [\n"
4026                      "  <!ENTITY entity '&#x110000;'>\n"
4027                      "]>\n"
4028                      "<doc>&entity;</doc>";
4029 
4030   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4031                  "Out of range character reference not faulted");
4032 }
4033 END_TEST
4034 
4035 START_TEST(test_invalid_character_entity_2) {
4036   const char *text = "<!DOCTYPE doc [\n"
4037                      "  <!ENTITY entity '&#xg0;'>\n"
4038                      "]>\n"
4039                      "<doc>&entity;</doc>";
4040 
4041   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4042                  "Out of range character reference not faulted");
4043 }
4044 END_TEST
4045 
4046 START_TEST(test_invalid_character_entity_3) {
4047   const char text[] =
4048       /* <!DOCTYPE doc [\n */
4049       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4050       /* U+0E04 = KHO KHWAI
4051        * U+0E08 = CHO CHAN */
4052       /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
4053       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
4054       "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
4055       /* ]>\n */
4056       "\0]\0>\0\n"
4057       /* <doc>&entity;</doc> */
4058       "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
4059 
4060   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4061       != XML_STATUS_ERROR)
4062     fail("Invalid start of entity name not faulted");
4063   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4064     xml_failure(g_parser);
4065 }
4066 END_TEST
4067 
4068 START_TEST(test_invalid_character_entity_4) {
4069   const char *text = "<!DOCTYPE doc [\n"
4070                      "  <!ENTITY entity '&#1114112;'>\n" /* = &#x110000 */
4071                      "]>\n"
4072                      "<doc>&entity;</doc>";
4073 
4074   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4075                  "Out of range character reference not faulted");
4076 }
4077 END_TEST
4078 
4079 /* Test that processing instructions are picked up by a default handler */
4080 START_TEST(test_pi_handled_in_default) {
4081   const char *text = "<?test processing instruction?>\n<doc/>";
4082   const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4083   CharData storage;
4084 
4085   CharData_Init(&storage);
4086   XML_SetDefaultHandler(g_parser, accumulate_characters);
4087   XML_SetUserData(g_parser, &storage);
4088   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4089       == XML_STATUS_ERROR)
4090     xml_failure(g_parser);
4091   CharData_CheckXMLChars(&storage, expected);
4092 }
4093 END_TEST
4094 
4095 /* Test that comments are picked up by a default handler */
4096 START_TEST(test_comment_handled_in_default) {
4097   const char *text = "<!-- This is a comment -->\n<doc/>";
4098   const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4099   CharData storage;
4100 
4101   CharData_Init(&storage);
4102   XML_SetDefaultHandler(g_parser, accumulate_characters);
4103   XML_SetUserData(g_parser, &storage);
4104   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4105       == XML_STATUS_ERROR)
4106     xml_failure(g_parser);
4107   CharData_CheckXMLChars(&storage, expected);
4108 }
4109 END_TEST
4110 
4111 /* Test PIs that look almost but not quite like XML declarations */
4112 START_TEST(test_pi_yml) {
4113   const char *text = "<?yml something like data?><doc/>";
4114   const XML_Char *expected = XCS("yml: something like data\n");
4115   CharData storage;
4116 
4117   CharData_Init(&storage);
4118   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4119   XML_SetUserData(g_parser, &storage);
4120   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4121       == XML_STATUS_ERROR)
4122     xml_failure(g_parser);
4123   CharData_CheckXMLChars(&storage, expected);
4124 }
4125 END_TEST
4126 
4127 START_TEST(test_pi_xnl) {
4128   const char *text = "<?xnl nothing like data?><doc/>";
4129   const XML_Char *expected = XCS("xnl: nothing like data\n");
4130   CharData storage;
4131 
4132   CharData_Init(&storage);
4133   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4134   XML_SetUserData(g_parser, &storage);
4135   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4136       == XML_STATUS_ERROR)
4137     xml_failure(g_parser);
4138   CharData_CheckXMLChars(&storage, expected);
4139 }
4140 END_TEST
4141 
4142 START_TEST(test_pi_xmm) {
4143   const char *text = "<?xmm everything like data?><doc/>";
4144   const XML_Char *expected = XCS("xmm: everything like data\n");
4145   CharData storage;
4146 
4147   CharData_Init(&storage);
4148   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4149   XML_SetUserData(g_parser, &storage);
4150   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4151       == XML_STATUS_ERROR)
4152     xml_failure(g_parser);
4153   CharData_CheckXMLChars(&storage, expected);
4154 }
4155 END_TEST
4156 
4157 START_TEST(test_utf16_pi) {
4158   const char text[] =
4159       /* <?{KHO KHWAI}{CHO CHAN}?>
4160        * where {KHO KHWAI} = U+0E04
4161        * and   {CHO CHAN}  = U+0E08
4162        */
4163       "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4164       /* <q/> */
4165       "<\0q\0/\0>\0";
4166 #ifdef XML_UNICODE
4167   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4168 #else
4169   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4170 #endif
4171   CharData storage;
4172 
4173   CharData_Init(&storage);
4174   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4175   XML_SetUserData(g_parser, &storage);
4176   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4177       == XML_STATUS_ERROR)
4178     xml_failure(g_parser);
4179   CharData_CheckXMLChars(&storage, expected);
4180 }
4181 END_TEST
4182 
4183 START_TEST(test_utf16_be_pi) {
4184   const char text[] =
4185       /* <?{KHO KHWAI}{CHO CHAN}?>
4186        * where {KHO KHWAI} = U+0E04
4187        * and   {CHO CHAN}  = U+0E08
4188        */
4189       "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4190       /* <q/> */
4191       "\0<\0q\0/\0>";
4192 #ifdef XML_UNICODE
4193   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4194 #else
4195   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4196 #endif
4197   CharData storage;
4198 
4199   CharData_Init(&storage);
4200   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4201   XML_SetUserData(g_parser, &storage);
4202   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4203       == XML_STATUS_ERROR)
4204     xml_failure(g_parser);
4205   CharData_CheckXMLChars(&storage, expected);
4206 }
4207 END_TEST
4208 
4209 /* Test that comments can be picked up and translated */
4210 START_TEST(test_utf16_be_comment) {
4211   const char text[] =
4212       /* <!-- Comment A --> */
4213       "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4214       /* <doc/> */
4215       "\0<\0d\0o\0c\0/\0>";
4216   const XML_Char *expected = XCS(" Comment A ");
4217   CharData storage;
4218 
4219   CharData_Init(&storage);
4220   XML_SetCommentHandler(g_parser, accumulate_comment);
4221   XML_SetUserData(g_parser, &storage);
4222   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4223       == XML_STATUS_ERROR)
4224     xml_failure(g_parser);
4225   CharData_CheckXMLChars(&storage, expected);
4226 }
4227 END_TEST
4228 
4229 START_TEST(test_utf16_le_comment) {
4230   const char text[] =
4231       /* <!-- Comment B --> */
4232       "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4233       /* <doc/> */
4234       "<\0d\0o\0c\0/\0>\0";
4235   const XML_Char *expected = XCS(" Comment B ");
4236   CharData storage;
4237 
4238   CharData_Init(&storage);
4239   XML_SetCommentHandler(g_parser, accumulate_comment);
4240   XML_SetUserData(g_parser, &storage);
4241   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4242       == XML_STATUS_ERROR)
4243     xml_failure(g_parser);
4244   CharData_CheckXMLChars(&storage, expected);
4245 }
4246 END_TEST
4247 
4248 /* Test that the unknown encoding handler with map entries that expect
4249  * conversion but no conversion function is faulted
4250  */
4251 START_TEST(test_missing_encoding_conversion_fn) {
4252   const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4253                      "<doc>\x81</doc>";
4254 
4255   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4256   /* MiscEncodingHandler sets up an encoding with every top-bit-set
4257    * character introducing a two-byte sequence.  For this, it
4258    * requires a convert function.  The above function call doesn't
4259    * pass one through, so when BadEncodingHandler actually gets
4260    * called it should supply an invalid encoding.
4261    */
4262   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4263                  "Encoding with missing convert() not faulted");
4264 }
4265 END_TEST
4266 
4267 START_TEST(test_failing_encoding_conversion_fn) {
4268   const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4269                      "<doc>\x81</doc>";
4270 
4271   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4272   /* BadEncodingHandler sets up an encoding with every top-bit-set
4273    * character introducing a two-byte sequence.  For this, it
4274    * requires a convert function.  The above function call passes
4275    * one that insists all possible sequences are invalid anyway.
4276    */
4277   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4278                  "Encoding with failing convert() not faulted");
4279 }
4280 END_TEST
4281 
4282 /* Test unknown encoding conversions */
4283 START_TEST(test_unknown_encoding_success) {
4284   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4285                      /* Equivalent to <eoc>Hello, world</eoc> */
4286                      "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4287 
4288   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4289   run_character_check(text, XCS("Hello, world"));
4290 }
4291 END_TEST
4292 
4293 /* Test bad name character in unknown encoding */
4294 START_TEST(test_unknown_encoding_bad_name) {
4295   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4296                      "<\xff\x64oc>Hello, world</\xff\x64oc>";
4297 
4298   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4299   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4300                  "Bad name start in unknown encoding not faulted");
4301 }
4302 END_TEST
4303 
4304 /* Test bad mid-name character in unknown encoding */
4305 START_TEST(test_unknown_encoding_bad_name_2) {
4306   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4307                      "<d\xffoc>Hello, world</d\xffoc>";
4308 
4309   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4310   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4311                  "Bad name in unknown encoding not faulted");
4312 }
4313 END_TEST
4314 
4315 /* Test element name that is long enough to fill the conversion buffer
4316  * in an unknown encoding, finishing with an encoded character.
4317  */
4318 START_TEST(test_unknown_encoding_long_name_1) {
4319   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4320                      "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4321                      "Hi"
4322                      "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4323   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4324   CharData storage;
4325 
4326   CharData_Init(&storage);
4327   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4328   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4329   XML_SetUserData(g_parser, &storage);
4330   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4331       == XML_STATUS_ERROR)
4332     xml_failure(g_parser);
4333   CharData_CheckXMLChars(&storage, expected);
4334 }
4335 END_TEST
4336 
4337 /* Test element name that is long enough to fill the conversion buffer
4338  * in an unknown encoding, finishing with an simple character.
4339  */
4340 START_TEST(test_unknown_encoding_long_name_2) {
4341   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4342                      "<abcdefghabcdefghabcdefghijklmnop>"
4343                      "Hi"
4344                      "</abcdefghabcdefghabcdefghijklmnop>";
4345   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4346   CharData storage;
4347 
4348   CharData_Init(&storage);
4349   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4350   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4351   XML_SetUserData(g_parser, &storage);
4352   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4353       == XML_STATUS_ERROR)
4354     xml_failure(g_parser);
4355   CharData_CheckXMLChars(&storage, expected);
4356 }
4357 END_TEST
4358 
4359 START_TEST(test_invalid_unknown_encoding) {
4360   const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4361                      "<doc>Hello world</doc>";
4362 
4363   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4364   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4365                  "Invalid unknown encoding not faulted");
4366 }
4367 END_TEST
4368 
4369 START_TEST(test_unknown_ascii_encoding_ok) {
4370   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4371                      "<doc>Hello, world</doc>";
4372 
4373   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4374   run_character_check(text, XCS("Hello, world"));
4375 }
4376 END_TEST
4377 
4378 START_TEST(test_unknown_ascii_encoding_fail) {
4379   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4380                      "<doc>Hello, \x80 world</doc>";
4381 
4382   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4383   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4384                  "Invalid character not faulted");
4385 }
4386 END_TEST
4387 
4388 START_TEST(test_unknown_encoding_invalid_length) {
4389   const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4390                      "<doc>Hello, world</doc>";
4391 
4392   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4393   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4394                  "Invalid unknown encoding not faulted");
4395 }
4396 END_TEST
4397 
4398 START_TEST(test_unknown_encoding_invalid_topbit) {
4399   const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4400                      "<doc>Hello, world</doc>";
4401 
4402   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4403   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4404                  "Invalid unknown encoding not faulted");
4405 }
4406 END_TEST
4407 
4408 START_TEST(test_unknown_encoding_invalid_surrogate) {
4409   const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4410                      "<doc>Hello, \x82 world</doc>";
4411 
4412   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4413   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4414                  "Invalid unknown encoding not faulted");
4415 }
4416 END_TEST
4417 
4418 START_TEST(test_unknown_encoding_invalid_high) {
4419   const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4420                      "<doc>Hello, world</doc>";
4421 
4422   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4423   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4424                  "Invalid unknown encoding not faulted");
4425 }
4426 END_TEST
4427 
4428 START_TEST(test_unknown_encoding_invalid_attr_value) {
4429   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4430                      "<doc attr='\xff\x30'/>";
4431 
4432   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4433   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4434                  "Invalid attribute valid not faulted");
4435 }
4436 END_TEST
4437 
4438 /* Test an external entity parser set to use latin-1 detects UTF-16
4439  * BOMs correctly.
4440  */
4441 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
4442 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4443   const char *text = "<!DOCTYPE doc [\n"
4444                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4445                      "]>\n"
4446                      "<doc>&en;</doc>";
4447   ExtTest2 test_data
4448       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4449          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4450           *   0x4c = L and 0x20 is a space
4451           */
4452          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4453 #ifdef XML_UNICODE
4454   const XML_Char *expected = XCS("\x00ff\x00feL ");
4455 #else
4456   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4457   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4458 #endif
4459   CharData storage;
4460 
4461   CharData_Init(&storage);
4462   test_data.storage = &storage;
4463   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4464   XML_SetUserData(g_parser, &test_data);
4465   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4466   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4467       == XML_STATUS_ERROR)
4468     xml_failure(g_parser);
4469   CharData_CheckXMLChars(&storage, expected);
4470 }
4471 END_TEST
4472 
4473 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4474   const char *text = "<!DOCTYPE doc [\n"
4475                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4476                      "]>\n"
4477                      "<doc>&en;</doc>";
4478   ExtTest2 test_data
4479       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4480          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4481           *   0x4c = L and 0x20 is a space
4482           */
4483          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4484 #ifdef XML_UNICODE
4485   const XML_Char *expected = XCS("\x00fe\x00ff L");
4486 #else
4487   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4488   const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4489 #endif
4490   CharData storage;
4491 
4492   CharData_Init(&storage);
4493   test_data.storage = &storage;
4494   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4495   XML_SetUserData(g_parser, &test_data);
4496   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4497   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4498       == XML_STATUS_ERROR)
4499     xml_failure(g_parser);
4500   CharData_CheckXMLChars(&storage, expected);
4501 }
4502 END_TEST
4503 
4504 /* Parsing the full buffer rather than a byte at a time makes a
4505  * difference to the encoding scanning code, so repeat the above tests
4506  * without breaking them down by byte.
4507  */
4508 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4509   const char *text = "<!DOCTYPE doc [\n"
4510                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4511                      "]>\n"
4512                      "<doc>&en;</doc>";
4513   ExtTest2 test_data
4514       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4515          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4516           *   0x4c = L and 0x20 is a space
4517           */
4518          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4519 #ifdef XML_UNICODE
4520   const XML_Char *expected = XCS("\x00ff\x00feL ");
4521 #else
4522   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4523   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4524 #endif
4525   CharData storage;
4526 
4527   CharData_Init(&storage);
4528   test_data.storage = &storage;
4529   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4530   XML_SetUserData(g_parser, &test_data);
4531   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4532   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4533       == XML_STATUS_ERROR)
4534     xml_failure(g_parser);
4535   CharData_CheckXMLChars(&storage, expected);
4536 }
4537 END_TEST
4538 
4539 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4540   const char *text = "<!DOCTYPE doc [\n"
4541                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4542                      "]>\n"
4543                      "<doc>&en;</doc>";
4544   ExtTest2 test_data
4545       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4546          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4547           *   0x4c = L and 0x20 is a space
4548           */
4549          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4550 #ifdef XML_UNICODE
4551   const XML_Char *expected = XCS("\x00fe\x00ff L");
4552 #else
4553   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4554   const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4555 #endif
4556   CharData storage;
4557 
4558   CharData_Init(&storage);
4559   test_data.storage = &storage;
4560   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4561   XML_SetUserData(g_parser, &test_data);
4562   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4563   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4564       == XML_STATUS_ERROR)
4565     xml_failure(g_parser);
4566   CharData_CheckXMLChars(&storage, expected);
4567 }
4568 END_TEST
4569 
4570 /* Test little-endian UTF-16 given an explicit big-endian encoding */
4571 START_TEST(test_ext_entity_utf16_be) {
4572   const char *text = "<!DOCTYPE doc [\n"
4573                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4574                      "]>\n"
4575                      "<doc>&en;</doc>";
4576   ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4577 #ifdef XML_UNICODE
4578   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4579 #else
4580   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4581                                  "\xe6\x94\x80"   /* U+6500 */
4582                                  "\xe2\xbc\x80"   /* U+2F00 */
4583                                  "\xe3\xb8\x80"); /* U+3E00 */
4584 #endif
4585   CharData storage;
4586 
4587   CharData_Init(&storage);
4588   test_data.storage = &storage;
4589   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4590   XML_SetUserData(g_parser, &test_data);
4591   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4592   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4593       == XML_STATUS_ERROR)
4594     xml_failure(g_parser);
4595   CharData_CheckXMLChars(&storage, expected);
4596 }
4597 END_TEST
4598 
4599 /* Test big-endian UTF-16 given an explicit little-endian encoding */
4600 START_TEST(test_ext_entity_utf16_le) {
4601   const char *text = "<!DOCTYPE doc [\n"
4602                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4603                      "]>\n"
4604                      "<doc>&en;</doc>";
4605   ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4606 #ifdef XML_UNICODE
4607   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4608 #else
4609   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4610                                  "\xe6\x94\x80"   /* U+6500 */
4611                                  "\xe2\xbc\x80"   /* U+2F00 */
4612                                  "\xe3\xb8\x80"); /* U+3E00 */
4613 #endif
4614   CharData storage;
4615 
4616   CharData_Init(&storage);
4617   test_data.storage = &storage;
4618   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4619   XML_SetUserData(g_parser, &test_data);
4620   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4621   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4622       == XML_STATUS_ERROR)
4623     xml_failure(g_parser);
4624   CharData_CheckXMLChars(&storage, expected);
4625 }
4626 END_TEST
4627 
4628 /* Test little-endian UTF-16 given no explicit encoding.
4629  * The existing default encoding (UTF-8) is assumed to hold without a
4630  * BOM to contradict it, so the entity value will in fact provoke an
4631  * error because 0x00 is not a valid XML character.  We parse the
4632  * whole buffer in one go rather than feeding it in byte by byte to
4633  * exercise different code paths in the initial scanning routines.
4634  */
4635 START_TEST(test_ext_entity_utf16_unknown) {
4636   const char *text = "<!DOCTYPE doc [\n"
4637                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4638                      "]>\n"
4639                      "<doc>&en;</doc>";
4640   ExtFaults2 test_data
4641       = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4642          XML_ERROR_INVALID_TOKEN};
4643 
4644   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4645   XML_SetUserData(g_parser, &test_data);
4646   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4647                  "Invalid character should not have been accepted");
4648 }
4649 END_TEST
4650 
4651 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
4652 START_TEST(test_ext_entity_utf8_non_bom) {
4653   const char *text = "<!DOCTYPE doc [\n"
4654                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4655                      "]>\n"
4656                      "<doc>&en;</doc>";
4657   ExtTest2 test_data
4658       = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4659          3, NULL, NULL};
4660 #ifdef XML_UNICODE
4661   const XML_Char *expected = XCS("\xfec0");
4662 #else
4663   const XML_Char *expected = XCS("\xef\xbb\x80");
4664 #endif
4665   CharData storage;
4666 
4667   CharData_Init(&storage);
4668   test_data.storage = &storage;
4669   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4670   XML_SetUserData(g_parser, &test_data);
4671   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4672   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4673       == XML_STATUS_ERROR)
4674     xml_failure(g_parser);
4675   CharData_CheckXMLChars(&storage, expected);
4676 }
4677 END_TEST
4678 
4679 /* Test that UTF-8 in a CDATA section is correctly passed through */
4680 START_TEST(test_utf8_in_cdata_section) {
4681   const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4682 #ifdef XML_UNICODE
4683   const XML_Char *expected = XCS("one \x00e9 two");
4684 #else
4685   const XML_Char *expected = XCS("one \xc3\xa9 two");
4686 #endif
4687 
4688   run_character_check(text, expected);
4689 }
4690 END_TEST
4691 
4692 /* Test that little-endian UTF-16 in a CDATA section is handled */
4693 START_TEST(test_utf8_in_cdata_section_2) {
4694   const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4695 #ifdef XML_UNICODE
4696   const XML_Char *expected = XCS("\x00e9]\x00e9two");
4697 #else
4698   const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4699 #endif
4700 
4701   run_character_check(text, expected);
4702 }
4703 END_TEST
4704 
4705 START_TEST(test_utf8_in_start_tags) {
4706   struct test_case {
4707     bool goodName;
4708     bool goodNameStart;
4709     const char *tagName;
4710   };
4711 
4712   // The idea with the tests below is this:
4713   // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4714   // go to isNever and are hence not a concern.
4715   //
4716   // We start with a character that is a valid name character
4717   // (or even name-start character, see XML 1.0r4 spec) and then we flip
4718   // single bits at places where (1) the result leaves the UTF-8 encoding space
4719   // and (2) we stay in the same n-byte sequence family.
4720   //
4721   // The flipped bits are highlighted in angle brackets in comments,
4722   // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4723   // the most significant bit to 1 to leave UTF-8 encoding space.
4724   struct test_case cases[] = {
4725       // 1-byte UTF-8: [0xxx xxxx]
4726       {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
4727       {false, false, "\xBA"}, // [<1>011 1010]
4728       {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
4729       {false, false, "\xB9"}, // [<1>011 1001]
4730 
4731       // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4732       {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
4733                                   // Arabic small waw U+06E5
4734       {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4735       {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4736       {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4737       {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
4738                                   // combining char U+0301
4739       {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4740       {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4741       {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4742 
4743       // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4744       {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
4745                                       // Devanagari Letter A U+0905
4746       {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4747       {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4748       {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4749       {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4750       {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4751       {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
4752                                       // combining char U+0901
4753       {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4754       {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4755       {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4756       {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4757       {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4758   };
4759   const bool atNameStart[] = {true, false};
4760 
4761   size_t i = 0;
4762   char doc[1024];
4763   size_t failCount = 0;
4764 
4765   // we need all the bytes to be parsed, but we don't want the errors that can
4766   // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4767   if (g_reparseDeferralEnabledDefault) {
4768     return;
4769   }
4770 
4771   for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4772     size_t j = 0;
4773     for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4774       const bool expectedSuccess
4775           = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4776       snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4777                cases[i].tagName);
4778       XML_Parser parser = XML_ParserCreate(NULL);
4779 
4780       const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4781           parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4782 
4783       bool success = true;
4784       if ((status == XML_STATUS_OK) != expectedSuccess) {
4785         success = false;
4786       }
4787       if ((status == XML_STATUS_ERROR)
4788           && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4789         success = false;
4790       }
4791 
4792       if (! success) {
4793         fprintf(
4794             stderr,
4795             "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
4796             (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
4797             (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
4798         failCount++;
4799       }
4800 
4801       XML_ParserFree(parser);
4802     }
4803   }
4804 
4805   if (failCount > 0) {
4806     fail("UTF-8 regression detected");
4807   }
4808 }
4809 END_TEST
4810 
4811 /* Test trailing spaces in elements are accepted */
4812 START_TEST(test_trailing_spaces_in_elements) {
4813   const char *text = "<doc   >Hi</doc >";
4814   const XML_Char *expected = XCS("doc/doc");
4815   CharData storage;
4816 
4817   CharData_Init(&storage);
4818   XML_SetElementHandler(g_parser, record_element_start_handler,
4819                         record_element_end_handler);
4820   XML_SetUserData(g_parser, &storage);
4821   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4822       == XML_STATUS_ERROR)
4823     xml_failure(g_parser);
4824   CharData_CheckXMLChars(&storage, expected);
4825 }
4826 END_TEST
4827 
4828 START_TEST(test_utf16_attribute) {
4829   const char text[] =
4830       /* <d {KHO KHWAI}{CHO CHAN}='a'/>
4831        * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4832        * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4833        */
4834       "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
4835   const XML_Char *expected = XCS("a");
4836   CharData storage;
4837 
4838   CharData_Init(&storage);
4839   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4840   XML_SetUserData(g_parser, &storage);
4841   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4842       == XML_STATUS_ERROR)
4843     xml_failure(g_parser);
4844   CharData_CheckXMLChars(&storage, expected);
4845 }
4846 END_TEST
4847 
4848 START_TEST(test_utf16_second_attr) {
4849   /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
4850    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4851    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4852    */
4853   const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
4854                       "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
4855   const XML_Char *expected = XCS("1");
4856   CharData storage;
4857 
4858   CharData_Init(&storage);
4859   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4860   XML_SetUserData(g_parser, &storage);
4861   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4862       == XML_STATUS_ERROR)
4863     xml_failure(g_parser);
4864   CharData_CheckXMLChars(&storage, expected);
4865 }
4866 END_TEST
4867 
4868 START_TEST(test_attr_after_solidus) {
4869   const char *text = "<doc attr1='a' / attr2='b'>";
4870 
4871   expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
4872 }
4873 END_TEST
4874 
4875 START_TEST(test_utf16_pe) {
4876   /* <!DOCTYPE doc [
4877    * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
4878    * %{KHO KHWAI}{CHO CHAN};
4879    * ]>
4880    * <doc></doc>
4881    *
4882    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4883    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4884    */
4885   const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4886                       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
4887                       "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
4888                       "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
4889                       "\0%\x0e\x04\x0e\x08\0;\0\n"
4890                       "\0]\0>\0\n"
4891                       "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
4892 #ifdef XML_UNICODE
4893   const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
4894 #else
4895   const XML_Char *expected
4896       = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
4897 #endif
4898   CharData storage;
4899 
4900   CharData_Init(&storage);
4901   XML_SetUserData(g_parser, &storage);
4902   XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
4903   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4904       == XML_STATUS_ERROR)
4905     xml_failure(g_parser);
4906   CharData_CheckXMLChars(&storage, expected);
4907 }
4908 END_TEST
4909 
4910 /* Test that duff attribute description keywords are rejected */
4911 START_TEST(test_bad_attr_desc_keyword) {
4912   const char *text = "<!DOCTYPE doc [\n"
4913                      "  <!ATTLIST doc attr CDATA #!IMPLIED>\n"
4914                      "]>\n"
4915                      "<doc />";
4916 
4917   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4918                  "Bad keyword !IMPLIED not faulted");
4919 }
4920 END_TEST
4921 
4922 /* Test that an invalid attribute description keyword consisting of
4923  * UTF-16 characters with their top bytes non-zero are correctly
4924  * faulted
4925  */
4926 START_TEST(test_bad_attr_desc_keyword_utf16) {
4927   /* <!DOCTYPE d [
4928    * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
4929    * ]><d/>
4930    *
4931    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4932    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4933    */
4934   const char text[]
4935       = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4936         "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
4937         "\0#\x0e\x04\x0e\x08\0>\0\n"
4938         "\0]\0>\0<\0d\0/\0>";
4939 
4940   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4941       != XML_STATUS_ERROR)
4942     fail("Invalid UTF16 attribute keyword not faulted");
4943   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4944     xml_failure(g_parser);
4945 }
4946 END_TEST
4947 
4948 /* Test that invalid syntax in a <!DOCTYPE> is rejected.  Do this
4949  * using prefix-encoding (see above) to trigger specific code paths
4950  */
4951 START_TEST(test_bad_doctype) {
4952   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4953                      "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
4954 
4955   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4956   expect_failure(text, XML_ERROR_SYNTAX,
4957                  "Invalid bytes in DOCTYPE not faulted");
4958 }
4959 END_TEST
4960 
4961 START_TEST(test_bad_doctype_utf8) {
4962   const char *text = "<!DOCTYPE \xDB\x25"
4963                      "doc><doc/>"; // [1101 1011] [<0>010 0101]
4964   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4965                  "Invalid UTF-8 in DOCTYPE not faulted");
4966 }
4967 END_TEST
4968 
4969 START_TEST(test_bad_doctype_utf16) {
4970   const char text[] =
4971       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
4972        *
4973        * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
4974        * (name character) but not a valid letter (name start character)
4975        */
4976       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
4977       "\x06\xf2"
4978       "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
4979 
4980   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4981       != XML_STATUS_ERROR)
4982     fail("Invalid bytes in DOCTYPE not faulted");
4983   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4984     xml_failure(g_parser);
4985 }
4986 END_TEST
4987 
4988 START_TEST(test_bad_doctype_plus) {
4989   const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
4990                      "<1+>&foo;</1+>";
4991 
4992   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4993                  "'+' in document name not faulted");
4994 }
4995 END_TEST
4996 
4997 START_TEST(test_bad_doctype_star) {
4998   const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
4999                      "<1*>&foo;</1*>";
5000 
5001   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5002                  "'*' in document name not faulted");
5003 }
5004 END_TEST
5005 
5006 START_TEST(test_bad_doctype_query) {
5007   const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
5008                      "<1?>&foo;</1?>";
5009 
5010   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5011                  "'?' in document name not faulted");
5012 }
5013 END_TEST
5014 
5015 START_TEST(test_unknown_encoding_bad_ignore) {
5016   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
5017                      "<!DOCTYPE doc SYSTEM 'foo'>"
5018                      "<doc><e>&entity;</e></doc>";
5019   ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
5020                      "Invalid character not faulted", XCS("prefix-conv"),
5021                      XML_ERROR_INVALID_TOKEN};
5022 
5023   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5024   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5025   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
5026   XML_SetUserData(g_parser, &fault);
5027   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5028                  "Bad IGNORE section with unknown encoding not failed");
5029 }
5030 END_TEST
5031 
5032 START_TEST(test_entity_in_utf16_be_attr) {
5033   const char text[] =
5034       /* <e a='&#228; &#x00E4;'></e> */
5035       "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
5036       "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
5037 #ifdef XML_UNICODE
5038   const XML_Char *expected = XCS("\x00e4 \x00e4");
5039 #else
5040   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5041 #endif
5042   CharData storage;
5043 
5044   CharData_Init(&storage);
5045   XML_SetUserData(g_parser, &storage);
5046   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5047   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5048       == XML_STATUS_ERROR)
5049     xml_failure(g_parser);
5050   CharData_CheckXMLChars(&storage, expected);
5051 }
5052 END_TEST
5053 
5054 START_TEST(test_entity_in_utf16_le_attr) {
5055   const char text[] =
5056       /* <e a='&#228; &#x00E4;'></e> */
5057       "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
5058       "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
5059 #ifdef XML_UNICODE
5060   const XML_Char *expected = XCS("\x00e4 \x00e4");
5061 #else
5062   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5063 #endif
5064   CharData storage;
5065 
5066   CharData_Init(&storage);
5067   XML_SetUserData(g_parser, &storage);
5068   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5069   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5070       == XML_STATUS_ERROR)
5071     xml_failure(g_parser);
5072   CharData_CheckXMLChars(&storage, expected);
5073 }
5074 END_TEST
5075 
5076 START_TEST(test_entity_public_utf16_be) {
5077   const char text[] =
5078       /* <!DOCTYPE d [ */
5079       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5080       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5081       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5082       "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5083       /* %e; */
5084       "\0%\0e\0;\0\n"
5085       /* ]> */
5086       "\0]\0>\0\n"
5087       /* <d>&j;</d> */
5088       "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5089   ExtTest2 test_data
5090       = {/* <!ENTITY j 'baz'> */
5091          "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5092   const XML_Char *expected = XCS("baz");
5093   CharData storage;
5094 
5095   CharData_Init(&storage);
5096   test_data.storage = &storage;
5097   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5098   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5099   XML_SetUserData(g_parser, &test_data);
5100   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5101   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5102       == XML_STATUS_ERROR)
5103     xml_failure(g_parser);
5104   CharData_CheckXMLChars(&storage, expected);
5105 }
5106 END_TEST
5107 
5108 START_TEST(test_entity_public_utf16_le) {
5109   const char text[] =
5110       /* <!DOCTYPE d [ */
5111       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5112       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5113       "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5114       "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5115       /* %e; */
5116       "%\0e\0;\0\n\0"
5117       /* ]> */
5118       "]\0>\0\n\0"
5119       /* <d>&j;</d> */
5120       "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5121   ExtTest2 test_data
5122       = {/* <!ENTITY j 'baz'> */
5123          "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5124   const XML_Char *expected = XCS("baz");
5125   CharData storage;
5126 
5127   CharData_Init(&storage);
5128   test_data.storage = &storage;
5129   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5130   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5131   XML_SetUserData(g_parser, &test_data);
5132   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5133   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5134       == XML_STATUS_ERROR)
5135     xml_failure(g_parser);
5136   CharData_CheckXMLChars(&storage, expected);
5137 }
5138 END_TEST
5139 
5140 /* Test that a doctype with neither an internal nor external subset is
5141  * faulted
5142  */
5143 START_TEST(test_short_doctype) {
5144   const char *text = "<!DOCTYPE doc></doc>";
5145   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5146                  "DOCTYPE without subset not rejected");
5147 }
5148 END_TEST
5149 
5150 START_TEST(test_short_doctype_2) {
5151   const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5152   expect_failure(text, XML_ERROR_SYNTAX,
5153                  "DOCTYPE without Public ID not rejected");
5154 }
5155 END_TEST
5156 
5157 START_TEST(test_short_doctype_3) {
5158   const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5159   expect_failure(text, XML_ERROR_SYNTAX,
5160                  "DOCTYPE without System ID not rejected");
5161 }
5162 END_TEST
5163 
5164 START_TEST(test_long_doctype) {
5165   const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5166   expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5167 }
5168 END_TEST
5169 
5170 START_TEST(test_bad_entity) {
5171   const char *text = "<!DOCTYPE doc [\n"
5172                      "  <!ENTITY foo PUBLIC>\n"
5173                      "]>\n"
5174                      "<doc/>";
5175   expect_failure(text, XML_ERROR_SYNTAX,
5176                  "ENTITY without Public ID is not rejected");
5177 }
5178 END_TEST
5179 
5180 /* Test unquoted value is faulted */
5181 START_TEST(test_bad_entity_2) {
5182   const char *text = "<!DOCTYPE doc [\n"
5183                      "  <!ENTITY % foo bar>\n"
5184                      "]>\n"
5185                      "<doc/>";
5186   expect_failure(text, XML_ERROR_SYNTAX,
5187                  "ENTITY without Public ID is not rejected");
5188 }
5189 END_TEST
5190 
5191 START_TEST(test_bad_entity_3) {
5192   const char *text = "<!DOCTYPE doc [\n"
5193                      "  <!ENTITY % foo PUBLIC>\n"
5194                      "]>\n"
5195                      "<doc/>";
5196   expect_failure(text, XML_ERROR_SYNTAX,
5197                  "Parameter ENTITY without Public ID is not rejected");
5198 }
5199 END_TEST
5200 
5201 START_TEST(test_bad_entity_4) {
5202   const char *text = "<!DOCTYPE doc [\n"
5203                      "  <!ENTITY % foo SYSTEM>\n"
5204                      "]>\n"
5205                      "<doc/>";
5206   expect_failure(text, XML_ERROR_SYNTAX,
5207                  "Parameter ENTITY without Public ID is not rejected");
5208 }
5209 END_TEST
5210 
5211 START_TEST(test_bad_notation) {
5212   const char *text = "<!DOCTYPE doc [\n"
5213                      "  <!NOTATION n SYSTEM>\n"
5214                      "]>\n"
5215                      "<doc/>";
5216   expect_failure(text, XML_ERROR_SYNTAX,
5217                  "Notation without System ID is not rejected");
5218 }
5219 END_TEST
5220 
5221 /* Test for issue #11, wrongly suppressed default handler */
5222 START_TEST(test_default_doctype_handler) {
5223   const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5224                      "  <!ENTITY foo 'bar'>\n"
5225                      "]>\n"
5226                      "<doc>&foo;</doc>";
5227   DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5228                               {XCS("'test.dtd'"), 10, XML_FALSE},
5229                               {NULL, 0, XML_FALSE}};
5230   int i;
5231 
5232   XML_SetUserData(g_parser, &test_data);
5233   XML_SetDefaultHandler(g_parser, checking_default_handler);
5234   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5235   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5236       == XML_STATUS_ERROR)
5237     xml_failure(g_parser);
5238   for (i = 0; test_data[i].expected != NULL; i++)
5239     if (! test_data[i].seen)
5240       fail("Default handler not run for public !DOCTYPE");
5241 }
5242 END_TEST
5243 
5244 START_TEST(test_empty_element_abort) {
5245   const char *text = "<abort/>";
5246 
5247   XML_SetStartElementHandler(g_parser, start_element_suspender);
5248   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5249       != XML_STATUS_ERROR)
5250     fail("Expected to error on abort");
5251 }
5252 END_TEST
5253 
5254 /* Regression test for GH issue #612: unfinished m_declAttributeType
5255  * allocation in ->m_tempPool can corrupt following allocation.
5256  */
5257 START_TEST(test_pool_integrity_with_unfinished_attr) {
5258   const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5259                      "<!DOCTYPE foo [\n"
5260                      "<!ELEMENT foo ANY>\n"
5261                      "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5262                      "%entp;\n"
5263                      "]>\n"
5264                      "<a></a>\n";
5265   const XML_Char *expected = XCS("COMMENT");
5266   CharData storage;
5267 
5268   CharData_Init(&storage);
5269   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5270   XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5271   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5272   XML_SetCommentHandler(g_parser, accumulate_comment);
5273   XML_SetUserData(g_parser, &storage);
5274   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5275       == XML_STATUS_ERROR)
5276     xml_failure(g_parser);
5277   CharData_CheckXMLChars(&storage, expected);
5278 }
5279 END_TEST
5280 
5281 START_TEST(test_nested_entity_suspend) {
5282   const char *const text = "<!DOCTYPE a [\n"
5283                            "  <!ENTITY e1 '<!--e1-->'>\n"
5284                            "  <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5285                            "  <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5286                            "]>\n"
5287                            "<a><!--start-->&e3;<!--end--></a>";
5288   const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5289       XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5290   CharData storage;
5291   CharData_Init(&storage);
5292   XML_Parser parser = XML_ParserCreate(NULL);
5293   ParserPlusStorage parserPlusStorage = {parser, &storage};
5294 
5295   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5296   XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5297   XML_SetUserData(parser, &parserPlusStorage);
5298 
5299   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5300   while (status == XML_STATUS_SUSPENDED) {
5301     status = XML_ResumeParser(parser);
5302   }
5303   if (status != XML_STATUS_OK)
5304     xml_failure(parser);
5305 
5306   CharData_CheckXMLChars(&storage, expected);
5307   XML_ParserFree(parser);
5308 }
5309 END_TEST
5310 
5311 /* Regression test for quadratic parsing on large tokens */
5312 START_TEST(test_big_tokens_scale_linearly) {
5313   const struct {
5314     const char *pre;
5315     const char *post;
5316   } text[] = {
5317       {"<a>", "</a>"},                      // assumed good, used as baseline
5318       {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5319       {"<c attr='", "'></c>"},              // big attribute, used to be O(N²)
5320       {"<d><!-- ", " --></d>"},             // long comment, used to be O(N²)
5321       {"<e><", "/></e>"},                   // big elem name, used to be O(N²)
5322   };
5323   const int num_cases = sizeof(text) / sizeof(text[0]);
5324   char aaaaaa[4096];
5325   const int fillsize = (int)sizeof(aaaaaa);
5326   const int fillcount = 100;
5327   const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5328   const unsigned max_factor = 4;
5329   const unsigned max_scanned = max_factor * approx_bytes;
5330 
5331   memset(aaaaaa, 'a', fillsize);
5332 
5333   if (! g_reparseDeferralEnabledDefault) {
5334     return; // heuristic is disabled; we would get O(n^2) and fail.
5335   }
5336 
5337   for (int i = 0; i < num_cases; ++i) {
5338     XML_Parser parser = XML_ParserCreate(NULL);
5339     assert_true(parser != NULL);
5340     enum XML_Status status;
5341     set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
5342 
5343     // parse the start text
5344     g_bytesScanned = 0;
5345     status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5346                                      (int)strlen(text[i].pre), XML_FALSE);
5347     if (status != XML_STATUS_OK) {
5348       xml_failure(parser);
5349     }
5350 
5351     // parse lots of 'a', failing the test early if it takes too long
5352     unsigned past_max_count = 0;
5353     for (int f = 0; f < fillcount; ++f) {
5354       status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5355       if (status != XML_STATUS_OK) {
5356         xml_failure(parser);
5357       }
5358       if (g_bytesScanned > max_scanned) {
5359         // We're not done, and have already passed the limit -- the test will
5360         // definitely fail. This block allows us to save time by failing early.
5361         const unsigned pushed
5362             = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
5363         fprintf(
5364             stderr,
5365             "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5366             f + 1, fillcount, pushed, g_bytesScanned,
5367             g_bytesScanned / (double)pushed, max_scanned, max_factor);
5368         past_max_count++;
5369         // We are failing, but allow a few log prints first. If we don't reach
5370         // a count of five, the test will fail after the loop instead.
5371         assert_true(past_max_count < 5);
5372       }
5373     }
5374 
5375     // parse the end text
5376     status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5377                                      (int)strlen(text[i].post), XML_TRUE);
5378     if (status != XML_STATUS_OK) {
5379       xml_failure(parser);
5380     }
5381 
5382     assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
5383     if (g_bytesScanned > max_scanned) {
5384       fprintf(
5385           stderr,
5386           "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5387           g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
5388           max_factor);
5389       fail("scanned too many bytes");
5390     }
5391 
5392     XML_ParserFree(parser);
5393   }
5394 }
5395 END_TEST
5396 
5397 START_TEST(test_set_reparse_deferral) {
5398   const char *const pre = "<d>";
5399   const char *const start = "<x attr='";
5400   const char *const end = "'></x>";
5401   char eeeeee[100];
5402   const int fillsize = (int)sizeof(eeeeee);
5403   memset(eeeeee, 'e', fillsize);
5404 
5405   for (int enabled = 0; enabled <= 1; enabled += 1) {
5406     set_subtest("deferral=%d", enabled);
5407 
5408     XML_Parser parser = XML_ParserCreate(NULL);
5409     assert_true(parser != NULL);
5410     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5411     // pre-grow the buffer to avoid reparsing due to almost-fullness
5412     assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5413 
5414     CharData storage;
5415     CharData_Init(&storage);
5416     XML_SetUserData(parser, &storage);
5417     XML_SetStartElementHandler(parser, start_element_event_handler);
5418 
5419     enum XML_Status status;
5420     // parse the start text
5421     status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5422     if (status != XML_STATUS_OK) {
5423       xml_failure(parser);
5424     }
5425     CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5426 
5427     // ..and the start of the token
5428     status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5429     if (status != XML_STATUS_OK) {
5430       xml_failure(parser);
5431     }
5432     CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5433 
5434     // try to parse lots of 'e', but the token isn't finished
5435     for (int c = 0; c < 100; ++c) {
5436       status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5437       if (status != XML_STATUS_OK) {
5438         xml_failure(parser);
5439       }
5440     }
5441     CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5442 
5443     // end the <x> token.
5444     status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5445     if (status != XML_STATUS_OK) {
5446       xml_failure(parser);
5447     }
5448 
5449     if (enabled) {
5450       // In general, we may need to push more data to trigger a reparse attempt,
5451       // but in this test, the data is constructed to always require it.
5452       CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5453       // 2x the token length should suffice; the +1 covers the start and end.
5454       for (int c = 0; c < 101; ++c) {
5455         status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5456         if (status != XML_STATUS_OK) {
5457           xml_failure(parser);
5458         }
5459       }
5460     }
5461     CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5462 
5463     XML_ParserFree(parser);
5464   }
5465 }
5466 END_TEST
5467 
5468 struct element_decl_data {
5469   XML_Parser parser;
5470   int count;
5471 };
5472 
5473 static void
5474 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5475   UNUSED_P(name);
5476   struct element_decl_data *testdata = (struct element_decl_data *)userData;
5477   testdata->count += 1;
5478   XML_FreeContentModel(testdata->parser, model);
5479 }
5480 
5481 static int
5482 external_inherited_parser(XML_Parser p, const XML_Char *context,
5483                           const XML_Char *base, const XML_Char *systemId,
5484                           const XML_Char *publicId) {
5485   UNUSED_P(base);
5486   UNUSED_P(systemId);
5487   UNUSED_P(publicId);
5488   const char *const pre = "<!ELEMENT document ANY>\n";
5489   const char *const start = "<!ELEMENT ";
5490   const char *const end = " ANY>\n";
5491   const char *const post = "<!ELEMENT xyz ANY>\n";
5492   const int enabled = *(int *)XML_GetUserData(p);
5493   char eeeeee[100];
5494   char spaces[100];
5495   const int fillsize = (int)sizeof(eeeeee);
5496   assert_true(fillsize == (int)sizeof(spaces));
5497   memset(eeeeee, 'e', fillsize);
5498   memset(spaces, ' ', fillsize);
5499 
5500   XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5501   assert_true(parser != NULL);
5502   // pre-grow the buffer to avoid reparsing due to almost-fullness
5503   assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5504 
5505   struct element_decl_data testdata;
5506   testdata.parser = parser;
5507   testdata.count = 0;
5508   XML_SetUserData(parser, &testdata);
5509   XML_SetElementDeclHandler(parser, element_decl_counter);
5510 
5511   enum XML_Status status;
5512   // parse the initial text
5513   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5514   if (status != XML_STATUS_OK) {
5515     xml_failure(parser);
5516   }
5517   assert_true(testdata.count == 1); // first element should be done
5518 
5519   // ..and the start of the big token
5520   status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5521   if (status != XML_STATUS_OK) {
5522     xml_failure(parser);
5523   }
5524   assert_true(testdata.count == 1); // still just the first one
5525 
5526   // try to parse lots of 'e', but the token isn't finished
5527   for (int c = 0; c < 100; ++c) {
5528     status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5529     if (status != XML_STATUS_OK) {
5530       xml_failure(parser);
5531     }
5532   }
5533   assert_true(testdata.count == 1); // *still* just the first one
5534 
5535   // end the big token.
5536   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5537   if (status != XML_STATUS_OK) {
5538     xml_failure(parser);
5539   }
5540 
5541   if (enabled) {
5542     // In general, we may need to push more data to trigger a reparse attempt,
5543     // but in this test, the data is constructed to always require it.
5544     assert_true(testdata.count == 1); // or the test is incorrect
5545     // 2x the token length should suffice; the +1 covers the start and end.
5546     for (int c = 0; c < 101; ++c) {
5547       status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5548       if (status != XML_STATUS_OK) {
5549         xml_failure(parser);
5550       }
5551     }
5552   }
5553   assert_true(testdata.count == 2); // the big token should be done
5554 
5555   // parse the final text
5556   status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5557   if (status != XML_STATUS_OK) {
5558     xml_failure(parser);
5559   }
5560   assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5561 
5562   XML_ParserFree(parser);
5563   return XML_STATUS_OK;
5564 }
5565 
5566 START_TEST(test_reparse_deferral_is_inherited) {
5567   const char *const text
5568       = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5569   for (int enabled = 0; enabled <= 1; ++enabled) {
5570     set_subtest("deferral=%d", enabled);
5571 
5572     XML_Parser parser = XML_ParserCreate(NULL);
5573     assert_true(parser != NULL);
5574     XML_SetUserData(parser, (void *)&enabled);
5575     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5576     // this handler creates a sub-parser and checks that its deferral behavior
5577     // is what we expected, based on the value of `enabled` (in userdata).
5578     XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5579     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5580     if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5581       xml_failure(parser);
5582 
5583     XML_ParserFree(parser);
5584   }
5585 }
5586 END_TEST
5587 
5588 START_TEST(test_set_reparse_deferral_on_null_parser) {
5589   assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5590   assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5591   assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5592   assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5593   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5594               == XML_FALSE);
5595   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5596               == XML_FALSE);
5597 }
5598 END_TEST
5599 
5600 START_TEST(test_set_reparse_deferral_on_the_fly) {
5601   const char *const pre = "<d><x attr='";
5602   const char *const end = "'></x>";
5603   char iiiiii[100];
5604   const int fillsize = (int)sizeof(iiiiii);
5605   memset(iiiiii, 'i', fillsize);
5606 
5607   XML_Parser parser = XML_ParserCreate(NULL);
5608   assert_true(parser != NULL);
5609   assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5610 
5611   CharData storage;
5612   CharData_Init(&storage);
5613   XML_SetUserData(parser, &storage);
5614   XML_SetStartElementHandler(parser, start_element_event_handler);
5615 
5616   enum XML_Status status;
5617   // parse the start text
5618   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5619   if (status != XML_STATUS_OK) {
5620     xml_failure(parser);
5621   }
5622   CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5623 
5624   // try to parse some 'i', but the token isn't finished
5625   status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
5626   if (status != XML_STATUS_OK) {
5627     xml_failure(parser);
5628   }
5629   CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5630 
5631   // end the <x> token.
5632   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5633   if (status != XML_STATUS_OK) {
5634     xml_failure(parser);
5635   }
5636   CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
5637 
5638   // now change the heuristic setting and add *no* data
5639   assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
5640   // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
5641   status = XML_Parse(parser, "", 0, XML_FALSE);
5642   if (status != XML_STATUS_OK) {
5643     xml_failure(parser);
5644   }
5645   CharData_CheckXMLChars(&storage, XCS("dx"));
5646 
5647   XML_ParserFree(parser);
5648 }
5649 END_TEST
5650 
5651 START_TEST(test_set_bad_reparse_option) {
5652   XML_Parser parser = XML_ParserCreate(NULL);
5653   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
5654   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
5655   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
5656   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
5657   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
5658   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
5659   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
5660   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
5661   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
5662   XML_ParserFree(parser);
5663 }
5664 END_TEST
5665 
5666 static size_t g_totalAlloc = 0;
5667 static size_t g_biggestAlloc = 0;
5668 
5669 static void *
5670 counting_realloc(void *ptr, size_t size) {
5671   g_totalAlloc += size;
5672   if (size > g_biggestAlloc) {
5673     g_biggestAlloc = size;
5674   }
5675   return realloc(ptr, size);
5676 }
5677 
5678 static void *
5679 counting_malloc(size_t size) {
5680   return counting_realloc(NULL, size);
5681 }
5682 
5683 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
5684   if (g_chunkSize != 0) {
5685     // this test does not use SINGLE_BYTES, because it depends on very precise
5686     // buffer fills.
5687     return;
5688   }
5689   if (! g_reparseDeferralEnabledDefault) {
5690     return; // this test is irrelevant when the deferral heuristic is disabled.
5691   }
5692 
5693   const int document_length = 65536;
5694   char *const document = (char *)malloc(document_length);
5695 
5696   const XML_Memory_Handling_Suite memfuncs = {
5697       counting_malloc,
5698       counting_realloc,
5699       free,
5700   };
5701 
5702   const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
5703   const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
5704   const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
5705 
5706   for (const int *leading = leading_list; *leading >= 0; leading++) {
5707     for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
5708       for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
5709         set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
5710                     *fillsize);
5711         // start by checking that the test looks reasonably valid
5712         assert_true(*leading + *bigtoken <= document_length);
5713 
5714         // put 'x' everywhere; some will be overwritten by elements.
5715         memset(document, 'x', document_length);
5716         // maybe add an initial tag
5717         if (*leading) {
5718           assert_true(*leading >= 3); // or the test case is invalid
5719           memcpy(document, "<a>", 3);
5720         }
5721         // add the large token
5722         document[*leading + 0] = '<';
5723         document[*leading + 1] = 'b';
5724         memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
5725         document[*leading + *bigtoken - 1] = '>';
5726 
5727         // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
5728         const int expected_elem_total = 1 + (*leading ? 1 : 0);
5729 
5730         XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
5731         assert_true(parser != NULL);
5732 
5733         CharData storage;
5734         CharData_Init(&storage);
5735         XML_SetUserData(parser, &storage);
5736         XML_SetStartElementHandler(parser, start_element_event_handler);
5737 
5738         g_biggestAlloc = 0;
5739         g_totalAlloc = 0;
5740         int offset = 0;
5741         // fill data until the big token is covered (but not necessarily parsed)
5742         while (offset < *leading + *bigtoken) {
5743           assert_true(offset + *fillsize <= document_length);
5744           const enum XML_Status status
5745               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5746           if (status != XML_STATUS_OK) {
5747             xml_failure(parser);
5748           }
5749           offset += *fillsize;
5750         }
5751         // Now, check that we've had a buffer allocation that could fit the
5752         // context bytes and our big token. In order to detect a special case,
5753         // we need to know how many bytes of our big token were included in the
5754         // first push that contained _any_ bytes of the big token:
5755         const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
5756         if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
5757           // Special case: we aren't saving any context, and the whole big token
5758           // was covered by a single fill, so Expat may have parsed directly
5759           // from our input pointer, without allocating an internal buffer.
5760         } else if (*leading < XML_CONTEXT_BYTES) {
5761           assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
5762         } else {
5763           assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
5764         }
5765         // fill data until the big token is actually parsed
5766         while (storage.count < expected_elem_total) {
5767           const size_t alloc_before = g_totalAlloc;
5768           assert_true(offset + *fillsize <= document_length);
5769           const enum XML_Status status
5770               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5771           if (status != XML_STATUS_OK) {
5772             xml_failure(parser);
5773           }
5774           offset += *fillsize;
5775           // since all the bytes of the big token are already in the buffer,
5776           // the bufsize ceiling should make us finish its parsing without any
5777           // further buffer allocations. We assume that there will be no other
5778           // large allocations in this test.
5779           assert_true(g_totalAlloc - alloc_before < 4096);
5780         }
5781         // test-the-test: was our alloc even called?
5782         assert_true(g_totalAlloc > 0);
5783         // test-the-test: there shouldn't be any extra start elements
5784         assert_true(storage.count == expected_elem_total);
5785 
5786         XML_ParserFree(parser);
5787       }
5788     }
5789   }
5790   free(document);
5791 }
5792 END_TEST
5793 
5794 START_TEST(test_varying_buffer_fills) {
5795   const int KiB = 1024;
5796   const int MiB = 1024 * KiB;
5797   const int document_length = 16 * MiB;
5798   const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
5799 
5800   if (g_chunkSize != 0) {
5801     return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
5802   }
5803 
5804   char *const document = (char *)malloc(document_length);
5805   assert_true(document != NULL);
5806   memset(document, 'x', document_length);
5807   document[0] = '<';
5808   document[1] = 't';
5809   memset(&document[2], ' ', big - 2); // a very spacy token
5810   document[big - 1] = '>';
5811 
5812   // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
5813   // When reparse deferral is enabled, the final (negated) value is the expected
5814   // maximum number of bytes scanned in parse attempts.
5815   const int testcases[][30] = {
5816       {8 * MiB, -8 * MiB},
5817       {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
5818       // zero-size fills shouldn't trigger the bypass
5819       {4 * MiB, 0, 4 * MiB, -12 * MiB},
5820       {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
5821       {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
5822       // try to hit the buffer ceiling only once (at the end)
5823       {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
5824       // try to hit the same buffer ceiling multiple times
5825       {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
5826 
5827       // try to hit every ceiling, by always landing 1K shy of the buffer size
5828       {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
5829        128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
5830 
5831       // try to avoid every ceiling, by always landing 1B past the buffer size
5832       // the normal 2x heuristic threshold still forces parse attempts.
5833       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
5834        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
5835        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
5836        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
5837        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5838        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5839        2 * MiB, 4 * MiB,     // will attempt 8MiB + 1 ==> total 10M + 682K + 7
5840        -(10 * MiB + 682 * KiB + 7)},
5841       // try to avoid every ceiling again, except on our last fill.
5842       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
5843        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
5844        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
5845        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
5846        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5847        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5848        2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
5849        -(10 * MiB + 682 * KiB + 6)},
5850 
5851       // try to hit ceilings on the way multiple times
5852       {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
5853        512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
5854        1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1,   // 4 MiB buffer
5855        2 * MiB + 1, 1 * MiB, 512 * KiB,                    // 8 MiB buffer
5856        // we'll make a parse attempt at every parse call
5857        -(45 * MiB + 12)},
5858   };
5859   const int testcount = sizeof(testcases) / sizeof(testcases[0]);
5860   for (int test_i = 0; test_i < testcount; test_i++) {
5861     const int *fillsize = testcases[test_i];
5862     set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
5863                 fillsize[2], fillsize[3]);
5864     XML_Parser parser = XML_ParserCreate(NULL);
5865     assert_true(parser != NULL);
5866 
5867     CharData storage;
5868     CharData_Init(&storage);
5869     XML_SetUserData(parser, &storage);
5870     XML_SetStartElementHandler(parser, start_element_event_handler);
5871 
5872     g_bytesScanned = 0;
5873     int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
5874     int offset = 0;
5875     while (*fillsize >= 0) {
5876       assert_true(offset + *fillsize <= document_length); // or test is invalid
5877       const enum XML_Status status
5878           = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5879       if (status != XML_STATUS_OK) {
5880         xml_failure(parser);
5881       }
5882       offset += *fillsize;
5883       fillsize++;
5884       assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
5885       worstcase_bytes += offset; // we might've tried to parse all pending bytes
5886     }
5887     assert_true(storage.count == 1); // the big token should've been parsed
5888     assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
5889     if (g_reparseDeferralEnabledDefault) {
5890       // heuristic is enabled; some XML_Parse calls may have deferred reparsing
5891       const unsigned max_bytes_scanned = -*fillsize;
5892       if (g_bytesScanned > max_bytes_scanned) {
5893         fprintf(stderr,
5894                 "bytes scanned in parse attempts: actual=%u limit=%u \n",
5895                 g_bytesScanned, max_bytes_scanned);
5896         fail("too many bytes scanned in parse attempts");
5897       }
5898     }
5899     assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
5900 
5901     XML_ParserFree(parser);
5902   }
5903   free(document);
5904 }
5905 END_TEST
5906 
5907 void
5908 make_basic_test_case(Suite *s) {
5909   TCase *tc_basic = tcase_create("basic tests");
5910 
5911   suite_add_tcase(s, tc_basic);
5912   tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
5913 
5914   tcase_add_test(tc_basic, test_nul_byte);
5915   tcase_add_test(tc_basic, test_u0000_char);
5916   tcase_add_test(tc_basic, test_siphash_self);
5917   tcase_add_test(tc_basic, test_siphash_spec);
5918   tcase_add_test(tc_basic, test_bom_utf8);
5919   tcase_add_test(tc_basic, test_bom_utf16_be);
5920   tcase_add_test(tc_basic, test_bom_utf16_le);
5921   tcase_add_test(tc_basic, test_nobom_utf16_le);
5922   tcase_add_test(tc_basic, test_hash_collision);
5923   tcase_add_test(tc_basic, test_illegal_utf8);
5924   tcase_add_test(tc_basic, test_utf8_auto_align);
5925   tcase_add_test(tc_basic, test_utf16);
5926   tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
5927   tcase_add_test(tc_basic, test_not_utf16);
5928   tcase_add_test(tc_basic, test_bad_encoding);
5929   tcase_add_test(tc_basic, test_latin1_umlauts);
5930   tcase_add_test(tc_basic, test_long_utf8_character);
5931   tcase_add_test(tc_basic, test_long_latin1_attribute);
5932   tcase_add_test(tc_basic, test_long_ascii_attribute);
5933   /* Regression test for SF bug #491986. */
5934   tcase_add_test(tc_basic, test_danish_latin1);
5935   /* Regression test for SF bug #514281. */
5936   tcase_add_test(tc_basic, test_french_charref_hexidecimal);
5937   tcase_add_test(tc_basic, test_french_charref_decimal);
5938   tcase_add_test(tc_basic, test_french_latin1);
5939   tcase_add_test(tc_basic, test_french_utf8);
5940   tcase_add_test(tc_basic, test_utf8_false_rejection);
5941   tcase_add_test(tc_basic, test_line_number_after_parse);
5942   tcase_add_test(tc_basic, test_column_number_after_parse);
5943   tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
5944   tcase_add_test(tc_basic, test_line_number_after_error);
5945   tcase_add_test(tc_basic, test_column_number_after_error);
5946   tcase_add_test(tc_basic, test_really_long_lines);
5947   tcase_add_test(tc_basic, test_really_long_encoded_lines);
5948   tcase_add_test(tc_basic, test_end_element_events);
5949   tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
5950   tcase_add_test(tc_basic, test_attr_whitespace_normalization);
5951   tcase_add_test(tc_basic, test_xmldecl_misplaced);
5952   tcase_add_test(tc_basic, test_xmldecl_invalid);
5953   tcase_add_test(tc_basic, test_xmldecl_missing_attr);
5954   tcase_add_test(tc_basic, test_xmldecl_missing_value);
5955   tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
5956   tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
5957   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
5958   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
5959   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
5960   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
5961   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
5962   tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
5963   tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
5964   tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
5965   tcase_add_test(tc_basic,
5966                  test_wfc_undeclared_entity_with_external_subset_standalone);
5967   tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
5968   tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
5969   tcase_add_test(tc_basic, test_not_standalone_handler_reject);
5970   tcase_add_test(tc_basic, test_not_standalone_handler_accept);
5971   tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
5972   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
5973   tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
5974   tcase_add_test(tc_basic, test_dtd_attr_handling);
5975   tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
5976   tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
5977   tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
5978   tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
5979   tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
5980   tcase_add_test(tc_basic, test_good_cdata_ascii);
5981   tcase_add_test(tc_basic, test_good_cdata_utf16);
5982   tcase_add_test(tc_basic, test_good_cdata_utf16_le);
5983   tcase_add_test(tc_basic, test_long_cdata_utf16);
5984   tcase_add_test(tc_basic, test_multichar_cdata_utf16);
5985   tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
5986   tcase_add_test(tc_basic, test_bad_cdata);
5987   tcase_add_test(tc_basic, test_bad_cdata_utf16);
5988   tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
5989   tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
5990   tcase_add_test(tc_basic, test_memory_allocation);
5991   tcase_add_test__if_xml_ge(tc_basic, test_default_current);
5992   tcase_add_test(tc_basic, test_dtd_elements);
5993   tcase_add_test(tc_basic, test_dtd_elements_nesting);
5994   tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
5995   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
5996   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
5997   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
5998   tcase_add_test__ifdef_xml_dtd(tc_basic,
5999                                 test_foreign_dtd_without_external_subset);
6000   tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
6001   tcase_add_test(tc_basic, test_set_base);
6002   tcase_add_test(tc_basic, test_attributes);
6003   tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
6004   tcase_add_test(tc_basic, test_resume_invalid_parse);
6005   tcase_add_test(tc_basic, test_resume_resuspended);
6006   tcase_add_test(tc_basic, test_cdata_default);
6007   tcase_add_test(tc_basic, test_subordinate_reset);
6008   tcase_add_test(tc_basic, test_subordinate_suspend);
6009   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
6010   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
6011   tcase_add_test__ifdef_xml_dtd(tc_basic,
6012                                 test_ext_entity_invalid_suspended_parse);
6013   tcase_add_test(tc_basic, test_explicit_encoding);
6014   tcase_add_test(tc_basic, test_trailing_cr);
6015   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
6016   tcase_add_test(tc_basic, test_trailing_rsqb);
6017   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
6018   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
6019   tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
6020   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
6021   tcase_add_test(tc_basic, test_empty_parse);
6022   tcase_add_test(tc_basic, test_negative_len_parse);
6023   tcase_add_test(tc_basic, test_negative_len_parse_buffer);
6024   tcase_add_test(tc_basic, test_get_buffer_1);
6025   tcase_add_test(tc_basic, test_get_buffer_2);
6026 #if XML_CONTEXT_BYTES > 0
6027   tcase_add_test(tc_basic, test_get_buffer_3_overflow);
6028 #endif
6029   tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
6030   tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
6031   tcase_add_test(tc_basic, test_byte_info_at_end);
6032   tcase_add_test(tc_basic, test_byte_info_at_error);
6033   tcase_add_test(tc_basic, test_byte_info_at_cdata);
6034   tcase_add_test(tc_basic, test_predefined_entities);
6035   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
6036   tcase_add_test(tc_basic, test_not_predefined_entities);
6037   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
6038   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
6039   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
6040   tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
6041   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
6042   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
6043   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
6044   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
6045   tcase_add_test(tc_basic, test_bad_public_doctype);
6046   tcase_add_test(tc_basic, test_attribute_enum_value);
6047   tcase_add_test(tc_basic, test_predefined_entity_redefinition);
6048   tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
6049   tcase_add_test(tc_basic, test_public_notation_no_sysid);
6050   tcase_add_test(tc_basic, test_nested_groups);
6051   tcase_add_test(tc_basic, test_group_choice);
6052   tcase_add_test(tc_basic, test_standalone_parameter_entity);
6053   tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
6054   tcase_add_test__ifdef_xml_dtd(tc_basic,
6055                                 test_recursive_external_parameter_entity);
6056   tcase_add_test__ifdef_xml_dtd(tc_basic,
6057                                 test_recursive_external_parameter_entity_2);
6058   tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
6059   tcase_add_test(tc_basic, test_suspend_xdecl);
6060   tcase_add_test(tc_basic, test_abort_epilog);
6061   tcase_add_test(tc_basic, test_abort_epilog_2);
6062   tcase_add_test(tc_basic, test_suspend_epilog);
6063   tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
6064   tcase_add_test(tc_basic, test_unfinished_epilog);
6065   tcase_add_test(tc_basic, test_partial_char_in_epilog);
6066   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6067   tcase_add_test__ifdef_xml_dtd(tc_basic,
6068                                 test_suspend_resume_internal_entity_issue_629);
6069   tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6070   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6071   tcase_add_test(tc_basic, test_restart_on_error);
6072   tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6073   tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6074   tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6075   tcase_add_test(tc_basic, test_standalone_internal_entity);
6076   tcase_add_test(tc_basic, test_skipped_external_entity);
6077   tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6078   tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6079   tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6080   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6081   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6082   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6083   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6084   tcase_add_test(tc_basic, test_pi_handled_in_default);
6085   tcase_add_test(tc_basic, test_comment_handled_in_default);
6086   tcase_add_test(tc_basic, test_pi_yml);
6087   tcase_add_test(tc_basic, test_pi_xnl);
6088   tcase_add_test(tc_basic, test_pi_xmm);
6089   tcase_add_test(tc_basic, test_utf16_pi);
6090   tcase_add_test(tc_basic, test_utf16_be_pi);
6091   tcase_add_test(tc_basic, test_utf16_be_comment);
6092   tcase_add_test(tc_basic, test_utf16_le_comment);
6093   tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6094   tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6095   tcase_add_test(tc_basic, test_unknown_encoding_success);
6096   tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6097   tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6098   tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6099   tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6100   tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6101   tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6102   tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6103   tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6104   tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6105   tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6106   tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6107   tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6108   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6109   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6110   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6111   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6112   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6113   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6114   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6115   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6116   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6117   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6118   tcase_add_test(tc_basic, test_utf8_in_start_tags);
6119   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6120   tcase_add_test(tc_basic, test_utf16_attribute);
6121   tcase_add_test(tc_basic, test_utf16_second_attr);
6122   tcase_add_test(tc_basic, test_attr_after_solidus);
6123   tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6124   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6125   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6126   tcase_add_test(tc_basic, test_bad_doctype);
6127   tcase_add_test(tc_basic, test_bad_doctype_utf8);
6128   tcase_add_test(tc_basic, test_bad_doctype_utf16);
6129   tcase_add_test(tc_basic, test_bad_doctype_plus);
6130   tcase_add_test(tc_basic, test_bad_doctype_star);
6131   tcase_add_test(tc_basic, test_bad_doctype_query);
6132   tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6133   tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6134   tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6135   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6136   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6137   tcase_add_test(tc_basic, test_short_doctype);
6138   tcase_add_test(tc_basic, test_short_doctype_2);
6139   tcase_add_test(tc_basic, test_short_doctype_3);
6140   tcase_add_test(tc_basic, test_long_doctype);
6141   tcase_add_test(tc_basic, test_bad_entity);
6142   tcase_add_test(tc_basic, test_bad_entity_2);
6143   tcase_add_test(tc_basic, test_bad_entity_3);
6144   tcase_add_test(tc_basic, test_bad_entity_4);
6145   tcase_add_test(tc_basic, test_bad_notation);
6146   tcase_add_test(tc_basic, test_default_doctype_handler);
6147   tcase_add_test(tc_basic, test_empty_element_abort);
6148   tcase_add_test__ifdef_xml_dtd(tc_basic,
6149                                 test_pool_integrity_with_unfinished_attr);
6150   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6151   tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6152   tcase_add_test(tc_basic, test_set_reparse_deferral);
6153   tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6154   tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6155   tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6156   tcase_add_test(tc_basic, test_set_bad_reparse_option);
6157   tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6158   tcase_add_test(tc_basic, test_varying_buffer_fills);
6159 }
6160