xref: /freebsd/contrib/expat/tests/basic_tests.c (revision c82aeee8a6d39371006f5eeb1b51704e7b97e2b7)
1 /* Tests in the "basic" test case for the Expat test suite
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org>
14    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
17    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21    Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22    Copyright (c) 2024-2026 Berkay Eren Ürün <berkay.ueruen@siemens.com>
23    Copyright (c) 2026      Francesco Bertolaccini
24    Copyright (c) 2026      Matthew Fernandez <matthew.fernandez@gmail.com>
25    Licensed under the MIT license:
26 
27    Permission is  hereby granted,  free of charge,  to any  person obtaining
28    a  copy  of  this  software   and  associated  documentation  files  (the
29    "Software"),  to  deal in  the  Software  without restriction,  including
30    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
31    distribute, sublicense, and/or sell copies of the Software, and to permit
32    persons  to whom  the Software  is  furnished to  do so,  subject to  the
33    following conditions:
34 
35    The above copyright  notice and this permission notice  shall be included
36    in all copies or substantial portions of the Software.
37 
38    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
39    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
40    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
41    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
42    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
43    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
44    USE OR OTHER DEALINGS IN THE SOFTWARE.
45 */
46 
47 #if defined(NDEBUG)
48 #  undef NDEBUG /* because test suite relies on assert(...) at the moment */
49 #endif
50 
51 #include "expat_config.h"
52 
53 #include <assert.h>
54 
55 #include <stdbool.h>
56 #include <stdio.h>
57 #include <string.h>
58 #include <time.h>
59 
60 #include "expat.h"
61 #include "internal.h"
62 #include "minicheck.h"
63 #include "structdata.h"
64 #include "common.h"
65 #include "dummy.h"
66 #include "handlers.h"
67 #include "siphash.h"
68 #include "basic_tests.h"
69 
70 static void
71 basic_setup(void) {
72   g_parser = XML_ParserCreate(NULL);
73   if (g_parser == NULL)
74     fail("Parser not created.");
75 }
76 
77 /*
78  * Character & encoding tests.
79  */
80 
81 START_TEST(test_nul_byte) {
82   char text[] = "<doc>\0</doc>";
83 
84   /* test that a NUL byte (in US-ASCII data) is an error */
85   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
86       == XML_STATUS_OK)
87     fail("Parser did not report error on NUL-byte.");
88   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
89     xml_failure(g_parser);
90 }
91 END_TEST
92 
93 START_TEST(test_u0000_char) {
94   /* test that a NUL byte (in US-ASCII data) is an error */
95   expect_failure("<doc>&#0;</doc>", XML_ERROR_BAD_CHAR_REF,
96                  "Parser did not report error on NUL-byte.");
97 }
98 END_TEST
99 
100 START_TEST(test_siphash_self) {
101   if (! sip24_valid())
102     fail("SipHash self-test failed");
103 }
104 END_TEST
105 
106 START_TEST(test_siphash_spec) {
107   /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
108   const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
109                          "\x0a\x0b\x0c\x0d\x0e";
110   const size_t len = sizeof(message) - 1;
111   const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
112   struct siphash state;
113   struct sipkey key;
114 
115   sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
116                   "\x0a\x0b\x0c\x0d\x0e\x0f");
117   sip24_init(&state, &key);
118 
119   /* Cover spread across calls */
120   sip24_update(&state, message, 4);
121   sip24_update(&state, message + 4, len - 4);
122 
123   /* Cover null length */
124   sip24_update(&state, message, 0);
125 
126   if (sip24_final(&state) != expected)
127     fail("sip24_final failed spec test\n");
128 
129   /* Cover wrapper */
130   if (siphash24(message, len, &key) != expected)
131     fail("siphash24 failed spec test\n");
132 }
133 END_TEST
134 
135 START_TEST(test_bom_utf8) {
136   /* This test is really just making sure we don't core on a UTF-8 BOM. */
137   const char *text = "\357\273\277<e/>";
138 
139   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
140       == XML_STATUS_ERROR)
141     xml_failure(g_parser);
142 }
143 END_TEST
144 
145 START_TEST(test_bom_utf16_be) {
146   char text[] = "\376\377\0<\0e\0/\0>";
147 
148   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
149       == XML_STATUS_ERROR)
150     xml_failure(g_parser);
151 }
152 END_TEST
153 
154 START_TEST(test_bom_utf16_le) {
155   char text[] = "\377\376<\0e\0/\0>\0";
156 
157   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
158       == XML_STATUS_ERROR)
159     xml_failure(g_parser);
160 }
161 END_TEST
162 
163 START_TEST(test_nobom_utf16_le) {
164   char text[] = " \0<\0e\0/\0>\0";
165 
166   if (g_chunkSize == 1) {
167     // TODO: with just the first byte, we can't tell the difference between
168     // UTF-16-LE and UTF-8. Avoid the failure for now.
169     return;
170   }
171 
172   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
173       == XML_STATUS_ERROR)
174     xml_failure(g_parser);
175 }
176 END_TEST
177 
178 START_TEST(test_hash_collision) {
179   /* For full coverage of the lookup routine, we need to ensure a
180    * hash collision even though we can only tell that we have one
181    * through breakpoint debugging or coverage statistics.  The
182    * following will cause a hash collision on machines with a 64-bit
183    * long type; others will have to experiment.  The full coverage
184    * tests invoked from qa.sh usually provide a hash collision, but
185    * not always.  This is an attempt to provide insurance.
186    */
187 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
188   const char *text
189       = "<doc>\n"
190         "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
191         "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
192         "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
193         "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
194         "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
195         "<d8>This triggers the table growth and collides with b2</d8>\n"
196         "</doc>\n";
197 
198   XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
199   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
200       == XML_STATUS_ERROR)
201     xml_failure(g_parser);
202 }
203 END_TEST
204 #undef COLLIDING_HASH_SALT
205 
206 START_TEST(test_hash_salt_setter) {
207   const uint8_t entropy[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
208                                '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
209   XML_Parser parser = XML_ParserCreate(NULL);
210 
211   // NULL parser should be rejected
212   assert_true(XML_SetHashSalt16Bytes(NULL, entropy) == XML_FALSE);
213 
214   // NULL entropy should be rejected
215   assert_true(XML_SetHashSalt16Bytes(parser, NULL) == XML_FALSE);
216 
217   // Setting should be allowed more than once
218   assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE);
219   assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE);
220 
221   // But not after parsing has started
222   assert_true(XML_Parse(parser, "", 0, XML_FALSE /* isFinal */)
223               == XML_STATUS_OK);
224   assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_FALSE);
225 
226   XML_ParserFree(parser);
227 }
228 END_TEST
229 
230 /* Regression test for SF bug #491986. */
231 START_TEST(test_danish_latin1) {
232   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
233                      "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
234 #ifdef XML_UNICODE
235   const XML_Char *expected
236       = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
237 #else
238   const XML_Char *expected
239       = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
240 #endif
241   run_character_check(text, expected);
242 }
243 END_TEST
244 
245 /* Regression test for SF bug #514281. */
246 START_TEST(test_french_charref_hexidecimal) {
247   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
248                      "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
249 #ifdef XML_UNICODE
250   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
251 #else
252   const XML_Char *expected
253       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
254 #endif
255   run_character_check(text, expected);
256 }
257 END_TEST
258 
259 START_TEST(test_french_charref_decimal) {
260   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
261                      "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
262 #ifdef XML_UNICODE
263   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
264 #else
265   const XML_Char *expected
266       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
267 #endif
268   run_character_check(text, expected);
269 }
270 END_TEST
271 
272 START_TEST(test_french_latin1) {
273   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
274                      "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
275 #ifdef XML_UNICODE
276   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
277 #else
278   const XML_Char *expected
279       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
280 #endif
281   run_character_check(text, expected);
282 }
283 END_TEST
284 
285 START_TEST(test_french_utf8) {
286   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
287                      "<doc>\xC3\xA9</doc>";
288 #ifdef XML_UNICODE
289   const XML_Char *expected = XCS("\x00e9");
290 #else
291   const XML_Char *expected = XCS("\xC3\xA9");
292 #endif
293   run_character_check(text, expected);
294 }
295 END_TEST
296 
297 /* Regression test for SF bug #600479.
298    XXX There should be a test that exercises all legal XML Unicode
299    characters as PCDATA and attribute value content, and XML Name
300    characters as part of element and attribute names.
301 */
302 START_TEST(test_utf8_false_rejection) {
303   const char *text = "<doc>\xEF\xBA\xBF</doc>";
304 #ifdef XML_UNICODE
305   const XML_Char *expected = XCS("\xfebf");
306 #else
307   const XML_Char *expected = XCS("\xEF\xBA\xBF");
308 #endif
309   run_character_check(text, expected);
310 }
311 END_TEST
312 
313 /* Regression test for SF bug #477667.
314    This test assures that any 8-bit character followed by a 7-bit
315    character will not be mistakenly interpreted as a valid UTF-8
316    sequence.
317 */
318 START_TEST(test_illegal_utf8) {
319   char text[100];
320   int i;
321 
322   for (i = 128; i <= 255; ++i) {
323     snprintf(text, sizeof(text), "<e>%ccd</e>", i);
324     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
325         == XML_STATUS_OK) {
326       snprintf(text, sizeof(text),
327                "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
328                i);
329       fail(text);
330     } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
331       xml_failure(g_parser);
332     /* Reset the parser since we use the same parser repeatedly. */
333     XML_ParserReset(g_parser, NULL);
334   }
335 }
336 END_TEST
337 
338 /* Examples, not masks: */
339 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
340 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
341 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
342 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
343 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
344 
345 START_TEST(test_utf8_auto_align) {
346   struct TestCase {
347     ptrdiff_t expectedMovementInChars;
348     const char *input;
349   };
350 
351   struct TestCase cases[] = {
352       {00, ""},
353 
354       {00, UTF8_LEAD_1},
355 
356       {-1, UTF8_LEAD_2},
357       {00, UTF8_LEAD_2 UTF8_FOLLOW},
358 
359       {-1, UTF8_LEAD_3},
360       {-2, UTF8_LEAD_3 UTF8_FOLLOW},
361       {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
362 
363       {-1, UTF8_LEAD_4},
364       {-2, UTF8_LEAD_4 UTF8_FOLLOW},
365       {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
366       {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
367   };
368 
369   size_t i = 0;
370   bool success = true;
371   for (; i < sizeof(cases) / sizeof(*cases); i++) {
372     const char *fromLim = cases[i].input + strlen(cases[i].input);
373     const char *const fromLimInitially = fromLim;
374     ptrdiff_t actualMovementInChars;
375 
376     _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
377 
378     actualMovementInChars = (fromLim - fromLimInitially);
379     if (actualMovementInChars != cases[i].expectedMovementInChars) {
380       size_t j = 0;
381       success = false;
382       printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
383              ", actually moved by %2d chars: \"",
384              (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
385              (int)actualMovementInChars);
386       for (; j < strlen(cases[i].input); j++) {
387         printf("\\x%02x", (unsigned char)cases[i].input[j]);
388       }
389       printf("\"\n");
390     }
391   }
392 
393   if (! success) {
394     fail("UTF-8 auto-alignment is not bullet-proof\n");
395   }
396 }
397 END_TEST
398 
399 START_TEST(test_utf16) {
400   /* <?xml version="1.0" encoding="UTF-16"?>
401    *  <doc a='123'>some {A} text</doc>
402    *
403    * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
404    */
405   char text[]
406       = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
407         "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
408         "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
409         "\000'\000?\000>\000\n"
410         "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
411         "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
412         "<\000/\000d\000o\000c\000>";
413 #ifdef XML_UNICODE
414   const XML_Char *expected = XCS("some \xff21 text");
415 #else
416   const XML_Char *expected = XCS("some \357\274\241 text");
417 #endif
418   CharData storage;
419 
420   CharData_Init(&storage);
421   XML_SetUserData(g_parser, &storage);
422   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
423   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
424       == XML_STATUS_ERROR)
425     xml_failure(g_parser);
426   CharData_CheckXMLChars(&storage, expected);
427 }
428 END_TEST
429 
430 START_TEST(test_utf16_le_epilog_newline) {
431   unsigned int first_chunk_bytes = 17;
432   char text[] = "\xFF\xFE"                  /* BOM */
433                 "<\000e\000/\000>\000"      /* document element */
434                 "\r\000\n\000\r\000\n\000"; /* epilog */
435 
436   if (first_chunk_bytes >= sizeof(text) - 1)
437     fail("bad value of first_chunk_bytes");
438   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)first_chunk_bytes, XML_FALSE)
439       == XML_STATUS_ERROR)
440     xml_failure(g_parser);
441   else {
442     enum XML_Status rc;
443     rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
444                                  (int)(sizeof(text) - first_chunk_bytes - 1),
445                                  XML_TRUE);
446     if (rc == XML_STATUS_ERROR)
447       xml_failure(g_parser);
448   }
449 }
450 END_TEST
451 
452 /* Test that an outright lie in the encoding is faulted */
453 START_TEST(test_not_utf16) {
454   const char *text = "<?xml version='1.0' encoding='utf-16'?>"
455                      "<doc>Hi</doc>";
456 
457   /* Use a handler to provoke the appropriate code paths */
458   XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
459   expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
460                  "UTF-16 declared in UTF-8 not faulted");
461 }
462 END_TEST
463 
464 /* Test that an unknown encoding is rejected */
465 START_TEST(test_bad_encoding) {
466   const char *text = "<doc>Hi</doc>";
467 
468   if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
469     fail("XML_SetEncoding failed");
470   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
471                  "Unknown encoding not faulted");
472 }
473 END_TEST
474 
475 /* Regression test for SF bug #481609, #774028. */
476 START_TEST(test_latin1_umlauts) {
477   const char *text
478       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
479         "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
480         "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
481 #ifdef XML_UNICODE
482   /* Expected results in UTF-16 */
483   const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
484       XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
485 #else
486   /* Expected results in UTF-8 */
487   const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
488       XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
489 #endif
490 
491   run_character_check(text, expected);
492   XML_ParserReset(g_parser, NULL);
493   run_attribute_check(text, expected);
494   /* Repeat with a default handler */
495   XML_ParserReset(g_parser, NULL);
496   XML_SetDefaultHandler(g_parser, dummy_default_handler);
497   run_character_check(text, expected);
498   XML_ParserReset(g_parser, NULL);
499   XML_SetDefaultHandler(g_parser, dummy_default_handler);
500   run_attribute_check(text, expected);
501 }
502 END_TEST
503 
504 /* Test that an element name with a 4-byte UTF-8 character is rejected */
505 START_TEST(test_long_utf8_character) {
506   const char *text
507       = "<?xml version='1.0' encoding='utf-8'?>\n"
508         /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
509         "<do\xf0\x90\x80\x80/>";
510   expect_failure(text, XML_ERROR_INVALID_TOKEN,
511                  "4-byte UTF-8 character in element name not faulted");
512 }
513 END_TEST
514 
515 /* Test that a long latin-1 attribute (too long to convert in one go)
516  * is correctly converted
517  */
518 START_TEST(test_long_latin1_attribute) {
519   const char *text
520       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
521         "<doc att='"
522         /* 64 characters per line */
523         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
524         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
525         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
526         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
527         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
528         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
529         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
530         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
531         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
532         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
533         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
534         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
535         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
536         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
537         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
538         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
539         /* Last character splits across a buffer boundary */
540         "\xe4'>\n</doc>";
541 
542   const XML_Char *expected =
543       /* 64 characters per line */
544       /* clang-format off */
545         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
546         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
547         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
548         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
549         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
550         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
551         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
552         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
553         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
554         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
555         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
556         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
557         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
558         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
559         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
560         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
561   /* clang-format on */
562 #ifdef XML_UNICODE
563                                                   XCS("\x00e4");
564 #else
565                                                   XCS("\xc3\xa4");
566 #endif
567 
568   run_attribute_check(text, expected);
569 }
570 END_TEST
571 
572 /* Test that a long ASCII attribute (too long to convert in one go)
573  * is correctly converted
574  */
575 START_TEST(test_long_ascii_attribute) {
576   const char *text
577       = "<?xml version='1.0' encoding='us-ascii'?>\n"
578         "<doc att='"
579         /* 64 characters per line */
580         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
581         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
582         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
583         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
584         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
585         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
586         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
587         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
588         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
589         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
590         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
591         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
592         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
593         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
594         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
595         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
596         "01234'>\n</doc>";
597   const XML_Char *expected =
598       /* 64 characters per line */
599       /* clang-format off */
600         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
601         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
602         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
603         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
604         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
605         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
606         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
607         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
608         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
609         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
610         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
611         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
612         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
613         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
614         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
615         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
616         XCS("01234");
617   /* clang-format on */
618 
619   run_attribute_check(text, expected);
620 }
621 END_TEST
622 
623 /* Regression test #1 for SF bug #653180. */
624 START_TEST(test_line_number_after_parse) {
625   const char *text = "<tag>\n"
626                      "\n"
627                      "\n</tag>";
628   XML_Size lineno;
629 
630   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
631       == XML_STATUS_ERROR)
632     xml_failure(g_parser);
633   lineno = XML_GetCurrentLineNumber(g_parser);
634   if (lineno != 4) {
635     char buffer[100];
636     snprintf(buffer, sizeof(buffer),
637              "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
638     fail(buffer);
639   }
640 }
641 END_TEST
642 
643 /* Regression test #2 for SF bug #653180. */
644 START_TEST(test_column_number_after_parse) {
645   const char *text = "<tag></tag>";
646   XML_Size colno;
647 
648   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
649       == XML_STATUS_ERROR)
650     xml_failure(g_parser);
651   colno = XML_GetCurrentColumnNumber(g_parser);
652   if (colno != 11) {
653     char buffer[100];
654     snprintf(buffer, sizeof(buffer),
655              "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
656     fail(buffer);
657   }
658 }
659 END_TEST
660 
661 /* Regression test #3 for SF bug #653180. */
662 START_TEST(test_line_and_column_numbers_inside_handlers) {
663   const char *text = "<a>\n"      /* Unix end-of-line */
664                      "  <b>\r\n"  /* Windows end-of-line */
665                      "    <c/>\r" /* Mac OS end-of-line */
666                      "  </b>\n"
667                      "  <d>\n"
668                      "    <f/>\n"
669                      "  </d>\n"
670                      "</a>";
671   const StructDataEntry expected[]
672       = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
673          {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
674          {XCS("b"), 2, 4, STRUCT_END_TAG},   {XCS("d"), 2, 5, STRUCT_START_TAG},
675          {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
676          {XCS("d"), 2, 7, STRUCT_END_TAG},   {XCS("a"), 0, 8, STRUCT_END_TAG}};
677   const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
678   StructData storage;
679 
680   StructData_Init(&storage);
681   XML_SetUserData(g_parser, &storage);
682   XML_SetStartElementHandler(g_parser, start_element_event_handler2);
683   XML_SetEndElementHandler(g_parser, end_element_event_handler2);
684   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
685       == XML_STATUS_ERROR)
686     xml_failure(g_parser);
687 
688   StructData_CheckItems(&storage, expected, expected_count);
689   StructData_Dispose(&storage);
690 }
691 END_TEST
692 
693 /* Regression test #4 for SF bug #653180. */
694 START_TEST(test_line_number_after_error) {
695   const char *text = "<a>\n"
696                      "  <b>\n"
697                      "  </a>"; /* missing </b> */
698   XML_Size lineno;
699   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
700       != XML_STATUS_ERROR)
701     fail("Expected a parse error");
702 
703   lineno = XML_GetCurrentLineNumber(g_parser);
704   if (lineno != 3) {
705     char buffer[100];
706     snprintf(buffer, sizeof(buffer),
707              "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
708     fail(buffer);
709   }
710 }
711 END_TEST
712 
713 /* Regression test #5 for SF bug #653180. */
714 START_TEST(test_column_number_after_error) {
715   const char *text = "<a>\n"
716                      "  <b>\n"
717                      "  </a>"; /* missing </b> */
718   XML_Size colno;
719   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
720       != XML_STATUS_ERROR)
721     fail("Expected a parse error");
722 
723   colno = XML_GetCurrentColumnNumber(g_parser);
724   if (colno != 4) {
725     char buffer[100];
726     snprintf(buffer, sizeof(buffer),
727              "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
728     fail(buffer);
729   }
730 }
731 END_TEST
732 
733 /* Regression test for SF bug #478332. */
734 START_TEST(test_really_long_lines) {
735   /* This parses an input line longer than INIT_DATA_BUF_SIZE
736      characters long (defined to be 1024 in xmlparse.c).  We take a
737      really cheesy approach to building the input buffer, because
738      this avoids writing bugs in buffer-filling code.
739   */
740   const char *text
741       = "<e>"
742         /* 64 chars */
743         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
744         /* until we have at least 1024 characters on the line: */
745         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
746         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
747         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
748         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
749         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
750         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
751         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
752         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
753         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
754         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
755         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
757         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761         "</e>";
762   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
763       == XML_STATUS_ERROR)
764     xml_failure(g_parser);
765 }
766 END_TEST
767 
768 /* Test cdata processing across a buffer boundary */
769 START_TEST(test_really_long_encoded_lines) {
770   /* As above, except that we want to provoke an output buffer
771    * overflow with a non-trivial encoding.  For this we need to pass
772    * the whole cdata in one go, not byte-by-byte.
773    */
774   void *buffer;
775   const char *text
776       = "<?xml version='1.0' encoding='iso-8859-1'?>"
777         "<e>"
778         /* 64 chars */
779         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
780         /* until we have at least 1024 characters on the line: */
781         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
782         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
783         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
784         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
785         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
786         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
787         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
788         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
789         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
790         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
791         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
792         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
793         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
794         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
795         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
796         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
797         "</e>";
798   int parse_len = (int)strlen(text);
799 
800   /* Need a cdata handler to provoke the code path we want to test */
801   XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
802   buffer = XML_GetBuffer(g_parser, parse_len);
803   if (buffer == NULL)
804     fail("Could not allocate parse buffer");
805   assert(buffer != NULL);
806   memcpy(buffer, text, parse_len);
807   if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
808     xml_failure(g_parser);
809 }
810 END_TEST
811 
812 /*
813  * Element event tests.
814  */
815 
816 START_TEST(test_end_element_events) {
817   const char *text = "<a><b><c/></b><d><f/></d></a>";
818   const XML_Char *expected = XCS("/c/b/f/d/a");
819   CharData storage;
820 
821   CharData_Init(&storage);
822   XML_SetUserData(g_parser, &storage);
823   XML_SetEndElementHandler(g_parser, end_element_event_handler);
824   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
825       == XML_STATUS_ERROR)
826     xml_failure(g_parser);
827   CharData_CheckXMLChars(&storage, expected);
828 }
829 END_TEST
830 
831 /*
832  * Attribute tests.
833  */
834 
835 /* Helper used by the following tests; this checks any "attr" and "refs"
836    attributes to make sure whitespace has been normalized.
837 
838    Return true if whitespace has been normalized in a string, using
839    the rules for attribute value normalization.  The 'is_cdata' flag
840    is needed since CDATA attributes don't need to have multiple
841    whitespace characters collapsed to a single space, while other
842    attribute data types do.  (Section 3.3.3 of the recommendation.)
843 */
844 static int
845 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
846   int blanks = 0;
847   int at_start = 1;
848   while (*s) {
849     if (*s == XCS(' '))
850       ++blanks;
851     else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
852       return 0;
853     else {
854       if (at_start) {
855         at_start = 0;
856         if (blanks && ! is_cdata)
857           /* illegal leading blanks */
858           return 0;
859       } else if (blanks > 1 && ! is_cdata)
860         return 0;
861       blanks = 0;
862     }
863     ++s;
864   }
865   if (blanks && ! is_cdata)
866     return 0;
867   return 1;
868 }
869 
870 /* Check the attribute whitespace checker: */
871 START_TEST(test_helper_is_whitespace_normalized) {
872   assert(is_whitespace_normalized(XCS("abc"), 0));
873   assert(is_whitespace_normalized(XCS("abc"), 1));
874   assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
875   assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
876   assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
877   assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
878   assert(! is_whitespace_normalized(XCS("abc  def ghi"), 0));
879   assert(is_whitespace_normalized(XCS("abc  def ghi"), 1));
880   assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
881   assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
882   assert(! is_whitespace_normalized(XCS(" "), 0));
883   assert(is_whitespace_normalized(XCS(" "), 1));
884   assert(! is_whitespace_normalized(XCS("\t"), 0));
885   assert(! is_whitespace_normalized(XCS("\t"), 1));
886   assert(! is_whitespace_normalized(XCS("\n"), 0));
887   assert(! is_whitespace_normalized(XCS("\n"), 1));
888   assert(! is_whitespace_normalized(XCS("\r"), 0));
889   assert(! is_whitespace_normalized(XCS("\r"), 1));
890   assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
891 }
892 END_TEST
893 
894 static void XMLCALL
895 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
896                                           const XML_Char **atts) {
897   int i;
898   UNUSED_P(userData);
899   UNUSED_P(name);
900   for (i = 0; atts[i] != NULL; i += 2) {
901     const XML_Char *attrname = atts[i];
902     const XML_Char *value = atts[i + 1];
903     if (xcstrcmp(XCS("attr"), attrname) == 0
904         || xcstrcmp(XCS("ents"), attrname) == 0
905         || xcstrcmp(XCS("refs"), attrname) == 0) {
906       if (! is_whitespace_normalized(value, 0)) {
907         char buffer[256];
908         snprintf(buffer, sizeof(buffer),
909                  "attribute value not normalized: %" XML_FMT_STR
910                  "='%" XML_FMT_STR "'",
911                  attrname, value);
912         fail(buffer);
913       }
914     }
915   }
916 }
917 
918 START_TEST(test_attr_whitespace_normalization) {
919   const char *text
920       = "<!DOCTYPE doc [\n"
921         "  <!ATTLIST doc\n"
922         "            attr NMTOKENS #REQUIRED\n"
923         "            ents ENTITIES #REQUIRED\n"
924         "            refs IDREFS   #REQUIRED>\n"
925         "]>\n"
926         "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
927         "     ents=' ent-1   \t\r\n"
928         "            ent-2  ' >\n"
929         "  <e id='id-1'/>\n"
930         "  <e id='id-2'/>\n"
931         "</doc>";
932 
933   XML_SetStartElementHandler(g_parser,
934                              check_attr_contains_normalized_whitespace);
935   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
936       == XML_STATUS_ERROR)
937     xml_failure(g_parser);
938 }
939 END_TEST
940 
941 /*
942  * XML declaration tests.
943  */
944 
945 START_TEST(test_xmldecl_misplaced) {
946   expect_failure("\n"
947                  "<?xml version='1.0'?>\n"
948                  "<a/>",
949                  XML_ERROR_MISPLACED_XML_PI,
950                  "failed to report misplaced XML declaration");
951 }
952 END_TEST
953 
954 START_TEST(test_xmldecl_invalid) {
955   expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
956                  "Failed to report invalid XML declaration");
957 }
958 END_TEST
959 
960 START_TEST(test_xmldecl_missing_attr) {
961   expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
962                  "Failed to report missing XML declaration attribute");
963 }
964 END_TEST
965 
966 START_TEST(test_xmldecl_missing_value) {
967   expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
968                  "<doc/>",
969                  XML_ERROR_XML_DECL,
970                  "Failed to report missing attribute value");
971 }
972 END_TEST
973 
974 /* Regression test for SF bug #584832. */
975 START_TEST(test_unknown_encoding_internal_entity) {
976   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
977                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
978                      "<test a='&foo;'/>";
979 
980   XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
981   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
982       == XML_STATUS_ERROR)
983     xml_failure(g_parser);
984 }
985 END_TEST
986 
987 /* Test unrecognised encoding handler */
988 START_TEST(test_unrecognised_encoding_internal_entity) {
989   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
990                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
991                      "<test a='&foo;'/>";
992 
993   XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
994   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
995       != XML_STATUS_ERROR)
996     fail("Unrecognised encoding not rejected");
997 }
998 END_TEST
999 
1000 /* Regression test for SF bug #620106. */
1001 START_TEST(test_ext_entity_set_encoding) {
1002   const char *text = "<!DOCTYPE doc [\n"
1003                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1004                      "]>\n"
1005                      "<doc>&en;</doc>";
1006   ExtTest test_data
1007       = {/* This text says it's an unsupported encoding, but it's really
1008             UTF-8, which we tell Expat using XML_SetEncoding().
1009          */
1010          "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
1011 #ifdef XML_UNICODE
1012   const XML_Char *expected = XCS("\x00e9");
1013 #else
1014   const XML_Char *expected = XCS("\xc3\xa9");
1015 #endif
1016 
1017   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1018   run_ext_character_check(text, &test_data, expected);
1019 }
1020 END_TEST
1021 
1022 /* Test external entities with no handler */
1023 START_TEST(test_ext_entity_no_handler) {
1024   const char *text = "<!DOCTYPE doc [\n"
1025                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1026                      "]>\n"
1027                      "<doc>&en;</doc>";
1028 
1029   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1030   run_character_check(text, XCS(""));
1031 }
1032 END_TEST
1033 
1034 /* Test UTF-8 BOM is accepted */
1035 START_TEST(test_ext_entity_set_bom) {
1036   const char *text = "<!DOCTYPE doc [\n"
1037                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1038                      "]>\n"
1039                      "<doc>&en;</doc>";
1040   ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1041                        "<?xml encoding='iso-8859-3'?>"
1042                        "\xC3\xA9",
1043                        XCS("utf-8"), NULL};
1044 #ifdef XML_UNICODE
1045   const XML_Char *expected = XCS("\x00e9");
1046 #else
1047   const XML_Char *expected = XCS("\xc3\xa9");
1048 #endif
1049 
1050   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1051   run_ext_character_check(text, &test_data, expected);
1052 }
1053 END_TEST
1054 
1055 /* Test that bad encodings are faulted */
1056 START_TEST(test_ext_entity_bad_encoding) {
1057   const char *text = "<!DOCTYPE doc [\n"
1058                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1059                      "]>\n"
1060                      "<doc>&en;</doc>";
1061   ExtFaults fault
1062       = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1063          XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1064 
1065   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1066   XML_SetUserData(g_parser, &fault);
1067   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1068                  "Bad encoding should not have been accepted");
1069 }
1070 END_TEST
1071 
1072 /* Try handing an invalid encoding to an external entity parser */
1073 START_TEST(test_ext_entity_bad_encoding_2) {
1074   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1075                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1076                      "<doc>&entity;</doc>";
1077   ExtFaults fault
1078       = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1079          XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1080 
1081   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1082   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1083   XML_SetUserData(g_parser, &fault);
1084   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1085                  "Bad encoding not faulted in external entity handler");
1086 }
1087 END_TEST
1088 
1089 /* Test that no error is reported for unknown entities if we don't
1090    read an external subset.  This was fixed in Expat 1.95.5.
1091 */
1092 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1093   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1094                      "<doc>&entity;</doc>";
1095 
1096   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1097       == XML_STATUS_ERROR)
1098     xml_failure(g_parser);
1099 }
1100 END_TEST
1101 
1102 /* Test that an error is reported for unknown entities if we don't
1103    have an external subset.
1104 */
1105 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1106   expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1107                  "Parser did not report undefined entity w/out a DTD.");
1108 }
1109 END_TEST
1110 
1111 /* Test that an error is reported for unknown entities if we don't
1112    read an external subset, but have been declared standalone.
1113 */
1114 START_TEST(test_wfc_undeclared_entity_standalone) {
1115   const char *text
1116       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1117         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1118         "<doc>&entity;</doc>";
1119 
1120   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1121                  "Parser did not report undefined entity (standalone).");
1122 }
1123 END_TEST
1124 
1125 /* Test that an error is reported for unknown entities if we have read
1126    an external subset, and standalone is true.
1127 */
1128 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1129   const char *text
1130       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1131         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1132         "<doc>&entity;</doc>";
1133   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1134 
1135   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1136   XML_SetUserData(g_parser, &test_data);
1137   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1138   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1139                  "Parser did not report undefined entity (external DTD).");
1140 }
1141 END_TEST
1142 
1143 /* Test that external entity handling is not done if the parsing flag
1144  * is set to UNLESS_STANDALONE
1145  */
1146 START_TEST(test_entity_with_external_subset_unless_standalone) {
1147   const char *text
1148       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1149         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1150         "<doc>&entity;</doc>";
1151   ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1152 
1153   XML_SetParamEntityParsing(g_parser,
1154                             XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1155   XML_SetUserData(g_parser, &test_data);
1156   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1157   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1158                  "Parser did not report undefined entity");
1159 }
1160 END_TEST
1161 
1162 /* Test that no error is reported for unknown entities if we have read
1163    an external subset, and standalone is false.
1164 */
1165 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1166   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1167                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1168                      "<doc>&entity;</doc>";
1169   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1170 
1171   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1172   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1173   run_ext_character_check(text, &test_data, XCS(""));
1174 }
1175 END_TEST
1176 
1177 /* Test that an error is reported if our NotStandalone handler fails */
1178 START_TEST(test_not_standalone_handler_reject) {
1179   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1180                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1181                      "<doc>&entity;</doc>";
1182   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1183 
1184   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1185   XML_SetUserData(g_parser, &test_data);
1186   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1187   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1188   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1189                  "NotStandalone handler failed to reject");
1190 
1191   /* Try again but without external entity handling */
1192   XML_ParserReset(g_parser, NULL);
1193   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1194   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1195                  "NotStandalone handler failed to reject");
1196 }
1197 END_TEST
1198 
1199 /* Test that no error is reported if our NotStandalone handler succeeds */
1200 START_TEST(test_not_standalone_handler_accept) {
1201   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1202                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1203                      "<doc>&entity;</doc>";
1204   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1205 
1206   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1207   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1208   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1209   run_ext_character_check(text, &test_data, XCS(""));
1210 
1211   /* Repeat without the external entity handler */
1212   XML_ParserReset(g_parser, NULL);
1213   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1214   run_character_check(text, XCS(""));
1215 }
1216 END_TEST
1217 
1218 START_TEST(test_entity_start_tag_level_greater_than_one) {
1219   const char *const text = "<!DOCTYPE t1 [\n"
1220                            "  <!ENTITY e1 'hello'>\n"
1221                            "]>\n"
1222                            "<t1>\n"
1223                            "  <t2>&e1;</t2>\n"
1224                            "</t1>\n";
1225 
1226   XML_Parser parser = XML_ParserCreate(NULL);
1227   assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
1228                                       /*isFinal*/ XML_TRUE)
1229               == XML_STATUS_OK);
1230   XML_ParserFree(parser);
1231 }
1232 END_TEST
1233 
1234 START_TEST(test_wfc_no_recursive_entity_refs) {
1235   const char *text = "<!DOCTYPE doc [\n"
1236                      "  <!ENTITY entity '&#38;entity;'>\n"
1237                      "]>\n"
1238                      "<doc>&entity;</doc>";
1239 
1240   expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1241                  "Parser did not report recursive entity reference.");
1242 }
1243 END_TEST
1244 
1245 START_TEST(test_no_indirectly_recursive_entity_refs) {
1246   struct TestCase {
1247     const char *doc;
1248     bool usesParameterEntities;
1249   };
1250 
1251   const struct TestCase cases[] = {
1252       // general entity + character data
1253       {"<!DOCTYPE a [\n"
1254        "  <!ENTITY e1 '&e2;'>\n"
1255        "  <!ENTITY e2 '&e1;'>\n"
1256        "]><a>&e2;</a>\n",
1257        false},
1258 
1259       // general entity + attribute value
1260       {"<!DOCTYPE a [\n"
1261        "  <!ENTITY e1 '&e2;'>\n"
1262        "  <!ENTITY e2 '&e1;'>\n"
1263        "]><a k1='&e2;' />\n",
1264        false},
1265 
1266       // parameter entity
1267       {"<!DOCTYPE doc [\n"
1268        "  <!ENTITY % p1 '&#37;p2;'>\n"
1269        "  <!ENTITY % p2 '&#37;p1;'>\n"
1270        "  <!ENTITY % define_g \"<!ENTITY g '&#37;p2;'>\">\n"
1271        "  %define_g;\n"
1272        "]>\n"
1273        "<doc/>\n",
1274        true},
1275   };
1276   const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE};
1277 
1278   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1279     for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]);
1280          j++) {
1281       const XML_Bool reset_wanted = reset_or_not[j];
1282       const char *const doc = cases[i].doc;
1283       const bool usesParameterEntities = cases[i].usesParameterEntities;
1284 
1285       set_subtest("[%i,reset=%i] %s", (int)i, (int)j, doc);
1286 
1287 #ifdef XML_DTD // both GE and DTD
1288       const bool rejection_expected = true;
1289 #elif XML_GE == 1 // GE but not DTD
1290       const bool rejection_expected = ! usesParameterEntities;
1291 #else             // neither DTD nor GE
1292       const bool rejection_expected = false;
1293 #endif
1294 
1295       XML_Parser parser = XML_ParserCreate(NULL);
1296 
1297 #ifdef XML_DTD
1298       if (usesParameterEntities) {
1299         assert_true(
1300             XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS)
1301             == 1);
1302       }
1303 #else
1304       UNUSED_P(usesParameterEntities);
1305 #endif // XML_DTD
1306 
1307       const enum XML_Status status
1308           = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
1309                                     /*isFinal*/ XML_TRUE);
1310 
1311       if (rejection_expected) {
1312         assert_true(status == XML_STATUS_ERROR);
1313         assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF);
1314       } else {
1315         assert_true(status == XML_STATUS_OK);
1316       }
1317 
1318       if (reset_wanted) {
1319         // This covers free'ing of (eventually) all three open entity lists by
1320         // XML_ParserReset.
1321         XML_ParserReset(parser, NULL);
1322       }
1323 
1324       // This covers free'ing of (eventually) all three open entity lists by
1325       // XML_ParserFree (unless XML_ParserReset has already done that above).
1326       XML_ParserFree(parser);
1327     }
1328   }
1329 }
1330 END_TEST
1331 
1332 START_TEST(test_recursive_external_parameter_entity_2) {
1333   struct TestCase {
1334     const char *doc;
1335     enum XML_Status expectedStatus;
1336   };
1337 
1338   struct TestCase cases[] = {
1339       {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1340       {"<!ENTITY % p1 '%p1;'>"
1341        "<!ENTITY % p1 'first declaration wins'>",
1342        XML_STATUS_ERROR},
1343       {"<!ENTITY % p1 'first declaration wins'>"
1344        "<!ENTITY % p1 '%p1;'>",
1345        XML_STATUS_OK},
1346       {"<!ENTITY % p1 '&#37;p1;'>", XML_STATUS_OK},
1347   };
1348 
1349   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1350     const char *const doc = cases[i].doc;
1351     const enum XML_Status expectedStatus = cases[i].expectedStatus;
1352     set_subtest("%s", doc);
1353 
1354     XML_Parser parser = XML_ParserCreate(NULL);
1355     assert_true(parser != NULL);
1356 
1357     XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1358     assert_true(ext_parser != NULL);
1359 
1360     const enum XML_Status actualStatus
1361         = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1362 
1363     assert_true(actualStatus == expectedStatus);
1364     if (actualStatus != XML_STATUS_OK) {
1365       assert_true(XML_GetErrorCode(ext_parser)
1366                   == XML_ERROR_RECURSIVE_ENTITY_REF);
1367     }
1368 
1369     XML_ParserFree(ext_parser);
1370     XML_ParserFree(parser);
1371   }
1372 }
1373 END_TEST
1374 
1375 /* Test incomplete external entities are faulted */
1376 START_TEST(test_ext_entity_invalid_parse) {
1377   const char *text = "<!DOCTYPE doc [\n"
1378                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1379                      "]>\n"
1380                      "<doc>&en;</doc>";
1381   const ExtFaults faults[]
1382       = {{"<", "Incomplete element declaration not faulted", NULL,
1383           XML_ERROR_UNCLOSED_TOKEN},
1384          {"<\xe2\x82", /* First two bytes of a three-byte char */
1385           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1386          {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1387           XML_ERROR_PARTIAL_CHAR},
1388          {NULL, NULL, NULL, XML_ERROR_NONE}};
1389   const ExtFaults *fault = faults;
1390 
1391   for (; fault->parse_text != NULL; fault++) {
1392     set_subtest("\"%s\"", fault->parse_text);
1393     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1394     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1395     XML_SetUserData(g_parser, (void *)fault);
1396     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1397                    "Parser did not report external entity error");
1398     XML_ParserReset(g_parser, NULL);
1399   }
1400 }
1401 END_TEST
1402 
1403 /* Regression test for SF bug #483514. */
1404 START_TEST(test_dtd_default_handling) {
1405   const char *text = "<!DOCTYPE doc [\n"
1406                      "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1407                      "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1408                      "<!ELEMENT doc EMPTY>\n"
1409                      "<!ATTLIST doc a CDATA #IMPLIED>\n"
1410                      "<?pi in dtd?>\n"
1411                      "<!--comment in dtd-->\n"
1412                      "]><doc/>";
1413 
1414   XML_SetDefaultHandler(g_parser, accumulate_characters);
1415   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1416   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1417   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1418   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1419   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1420   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1421   XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1422   XML_SetCommentHandler(g_parser, dummy_comment_handler);
1423   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1424   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1425   run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1426 }
1427 END_TEST
1428 
1429 /* Test handling of attribute declarations */
1430 START_TEST(test_dtd_attr_handling) {
1431   const char *prolog = "<!DOCTYPE doc [\n"
1432                        "<!ELEMENT doc EMPTY>\n";
1433   AttTest attr_data[]
1434       = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1435           "]>"
1436           "<doc a='two'/>",
1437           XCS("doc"), XCS("a"),
1438           XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1439           NULL, XML_TRUE},
1440          {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1441           "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1442           "]>"
1443           "<doc/>",
1444           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1445          {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1446           "]>"
1447           "<doc/>",
1448           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1449          {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1450           "]>"
1451           "<doc/>",
1452           XCS("doc"), XCS("a"), XCS("CDATA"),
1453 #ifdef XML_UNICODE
1454           XCS("\x06f2"),
1455 #else
1456           XCS("\xdb\xb2"),
1457 #endif
1458           XML_FALSE},
1459          {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1460   AttTest *test;
1461 
1462   for (test = attr_data; test->definition != NULL; test++) {
1463     set_subtest("%s", test->definition);
1464     XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1465     XML_SetUserData(g_parser, test);
1466     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1467                                 XML_FALSE)
1468         == XML_STATUS_ERROR)
1469       xml_failure(g_parser);
1470     if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1471                                 (int)strlen(test->definition), XML_TRUE)
1472         == XML_STATUS_ERROR)
1473       xml_failure(g_parser);
1474     XML_ParserReset(g_parser, NULL);
1475   }
1476 }
1477 END_TEST
1478 
1479 /* See related SF bug #673791.
1480    When namespace processing is enabled, setting the namespace URI for
1481    a prefix is not allowed; this test ensures that it *is* allowed
1482    when namespace processing is not enabled.
1483    (See Namespaces in XML, section 2.)
1484 */
1485 START_TEST(test_empty_ns_without_namespaces) {
1486   const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1487                      "  <e xmlns:prefix=''/>\n"
1488                      "</doc>";
1489 
1490   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1491       == XML_STATUS_ERROR)
1492     xml_failure(g_parser);
1493 }
1494 END_TEST
1495 
1496 /* Regression test for SF bug #824420.
1497    Checks that an xmlns:prefix attribute set in an attribute's default
1498    value isn't misinterpreted.
1499 */
1500 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1501   const char *text = "<!DOCTYPE e:element [\n"
1502                      "  <!ATTLIST e:element\n"
1503                      "    xmlns:e CDATA 'http://example.org/'>\n"
1504                      "      ]>\n"
1505                      "<e:element/>";
1506 
1507   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1508       == XML_STATUS_ERROR)
1509     xml_failure(g_parser);
1510 }
1511 END_TEST
1512 
1513 /* Regression test for SF bug #1515266: missing check of stopped
1514    parser in doContext() 'for' loop. */
1515 START_TEST(test_stop_parser_between_char_data_calls) {
1516   /* The sample data must be big enough that there are two calls to
1517      the character data handler from within the inner "for" loop of
1518      the XML_TOK_DATA_CHARS case in doContent(), and the character
1519      handler must stop the parser and clear the character data
1520      handler.
1521   */
1522   const char *text = long_character_data_text;
1523 
1524   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1525   g_resumable = XML_FALSE;
1526   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1527       != XML_STATUS_ERROR)
1528     xml_failure(g_parser);
1529   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1530     xml_failure(g_parser);
1531 }
1532 END_TEST
1533 
1534 /* Regression test for SF bug #1515266: missing check of stopped
1535    parser in doContext() 'for' loop. */
1536 START_TEST(test_suspend_parser_between_char_data_calls) {
1537   /* The sample data must be big enough that there are two calls to
1538      the character data handler from within the inner "for" loop of
1539      the XML_TOK_DATA_CHARS case in doContent(), and the character
1540      handler must stop the parser and clear the character data
1541      handler.
1542   */
1543   const char *text = long_character_data_text;
1544 
1545   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1546   g_resumable = XML_TRUE;
1547   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1548   // we won't know exactly how much input we actually managed to give Expat.
1549   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
1550       != XML_STATUS_SUSPENDED)
1551     xml_failure(g_parser);
1552   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1553     xml_failure(g_parser);
1554   /* Try parsing directly */
1555   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1556       != XML_STATUS_ERROR)
1557     fail("Attempt to continue parse while suspended not faulted");
1558   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1559     fail("Suspended parse not faulted with correct error");
1560 }
1561 END_TEST
1562 
1563 /* Test repeated calls to XML_StopParser are handled correctly */
1564 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1565   const char *text = long_character_data_text;
1566 
1567   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1568   g_resumable = XML_FALSE;
1569   g_abortable = XML_FALSE;
1570   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1571       != XML_STATUS_ERROR)
1572     fail("Failed to double-stop parser");
1573 
1574   XML_ParserReset(g_parser, NULL);
1575   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1576   g_resumable = XML_TRUE;
1577   g_abortable = XML_FALSE;
1578   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1579   // we won't know exactly how much input we actually managed to give Expat.
1580   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
1581       != XML_STATUS_SUSPENDED)
1582     fail("Failed to double-suspend parser");
1583 
1584   XML_ParserReset(g_parser, NULL);
1585   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1586   g_resumable = XML_TRUE;
1587   g_abortable = XML_TRUE;
1588   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1589       != XML_STATUS_ERROR)
1590     fail("Failed to suspend-abort parser");
1591 }
1592 END_TEST
1593 
1594 START_TEST(test_good_cdata_ascii) {
1595   const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1596   const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1597 
1598   CharData storage;
1599   CharData_Init(&storage);
1600   XML_SetUserData(g_parser, &storage);
1601   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1602   /* Add start and end handlers for coverage */
1603   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1604   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1605 
1606   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1607       == XML_STATUS_ERROR)
1608     xml_failure(g_parser);
1609   CharData_CheckXMLChars(&storage, expected);
1610 
1611   /* Try again, this time with a default handler */
1612   XML_ParserReset(g_parser, NULL);
1613   CharData_Init(&storage);
1614   XML_SetUserData(g_parser, &storage);
1615   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1616   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1617 
1618   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1619       == XML_STATUS_ERROR)
1620     xml_failure(g_parser);
1621   CharData_CheckXMLChars(&storage, expected);
1622 }
1623 END_TEST
1624 
1625 START_TEST(test_good_cdata_utf16) {
1626   /* Test data is:
1627    *   <?xml version='1.0' encoding='utf-16'?>
1628    *   <a><![CDATA[hello]]></a>
1629    */
1630   const char text[]
1631       = "\0<\0?\0x\0m\0l\0"
1632         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1633         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1634         "1\0"
1635         "6\0'"
1636         "\0?\0>\0\n"
1637         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1638   const XML_Char *expected = XCS("hello");
1639 
1640   CharData storage;
1641   CharData_Init(&storage);
1642   XML_SetUserData(g_parser, &storage);
1643   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1644 
1645   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1646       == XML_STATUS_ERROR)
1647     xml_failure(g_parser);
1648   CharData_CheckXMLChars(&storage, expected);
1649 }
1650 END_TEST
1651 
1652 START_TEST(test_good_cdata_utf16_le) {
1653   /* Test data is:
1654    *   <?xml version='1.0' encoding='utf-16'?>
1655    *   <a><![CDATA[hello]]></a>
1656    */
1657   const char text[]
1658       = "<\0?\0x\0m\0l\0"
1659         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1660         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1661         "1\0"
1662         "6\0'"
1663         "\0?\0>\0\n"
1664         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1665   const XML_Char *expected = XCS("hello");
1666 
1667   CharData storage;
1668   CharData_Init(&storage);
1669   XML_SetUserData(g_parser, &storage);
1670   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1671 
1672   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1673       == XML_STATUS_ERROR)
1674     xml_failure(g_parser);
1675   CharData_CheckXMLChars(&storage, expected);
1676 }
1677 END_TEST
1678 
1679 /* Test UTF16 conversion of a long cdata string */
1680 
1681 /* 16 characters: handy macro to reduce visual clutter */
1682 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1683 
1684 START_TEST(test_long_cdata_utf16) {
1685   /* Test data is:
1686    * <?xlm version='1.0' encoding='utf-16'?>
1687    * <a><![CDATA[
1688    * ABCDEFGHIJKLMNOP
1689    * ]]></a>
1690    */
1691   const char text[]
1692       = "\0<\0?\0x\0m\0l\0 "
1693         "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1694         "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1695         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1696       /* 64 characters per line */
1697       /* clang-format off */
1698         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1699         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1700         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1701         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1702         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1703         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1704         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1705         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1706         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1707         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1708         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1709         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1710         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1711         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1712         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1713         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1714         A_TO_P_IN_UTF16
1715         /* clang-format on */
1716         "\0]\0]\0>\0<\0/\0a\0>";
1717   const XML_Char *expected =
1718       /* clang-format off */
1719         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1720         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1721         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1722         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1723         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1724         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1725         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1726         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1727         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1728         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1729         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1730         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1731         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1732         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1733         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1734         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1735         XCS("ABCDEFGHIJKLMNOP");
1736   /* clang-format on */
1737   CharData storage;
1738   void *buffer;
1739 
1740   CharData_Init(&storage);
1741   XML_SetUserData(g_parser, &storage);
1742   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1743   buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1744   if (buffer == NULL)
1745     fail("Could not allocate parse buffer");
1746   assert(buffer != NULL);
1747   memcpy(buffer, text, sizeof(text) - 1);
1748   if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1749     xml_failure(g_parser);
1750   CharData_CheckXMLChars(&storage, expected);
1751 }
1752 END_TEST
1753 
1754 /* Test handling of multiple unit UTF-16 characters */
1755 START_TEST(test_multichar_cdata_utf16) {
1756   /* Test data is:
1757    *   <?xml version='1.0' encoding='utf-16'?>
1758    *   <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1759    *
1760    * where {MINIM} is U+1d15e (a minim or half-note)
1761    *   UTF-16: 0xd834 0xdd5e
1762    *   UTF-8:  0xf0 0x9d 0x85 0x9e
1763    * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1764    *   UTF-16: 0xd834 0xdd5f
1765    *   UTF-8:  0xf0 0x9d 0x85 0x9f
1766    */
1767   const char text[] = "\0<\0?\0x\0m\0l\0"
1768                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1769                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1770                       "1\0"
1771                       "6\0'"
1772                       "\0?\0>\0\n"
1773                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1774                       "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1775                       "\0]\0]\0>\0<\0/\0a\0>";
1776 #ifdef XML_UNICODE
1777   const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1778 #else
1779   const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1780 #endif
1781   CharData storage;
1782 
1783   CharData_Init(&storage);
1784   XML_SetUserData(g_parser, &storage);
1785   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1786 
1787   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1788       == XML_STATUS_ERROR)
1789     xml_failure(g_parser);
1790   CharData_CheckXMLChars(&storage, expected);
1791 }
1792 END_TEST
1793 
1794 /* Test that an element name with a UTF-16 surrogate pair is rejected */
1795 START_TEST(test_utf16_bad_surrogate_pair) {
1796   /* Test data is:
1797    *   <?xml version='1.0' encoding='utf-16'?>
1798    *   <a><![CDATA[{BADLINB}]]></a>
1799    *
1800    * where {BADLINB} is U+10000 (the first Linear B character)
1801    * with the UTF-16 surrogate pair in the wrong order, i.e.
1802    *   0xdc00 0xd800
1803    */
1804   const char text[] = "\0<\0?\0x\0m\0l\0"
1805                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1806                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1807                       "1\0"
1808                       "6\0'"
1809                       "\0?\0>\0\n"
1810                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1811                       "\xdc\x00\xd8\x00"
1812                       "\0]\0]\0>\0<\0/\0a\0>";
1813 
1814   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1815       != XML_STATUS_ERROR)
1816     fail("Reversed UTF-16 surrogate pair not faulted");
1817   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1818     xml_failure(g_parser);
1819 }
1820 END_TEST
1821 
1822 START_TEST(test_bad_cdata) {
1823   struct CaseData {
1824     const char *text;
1825     enum XML_Error expectedError;
1826   };
1827 
1828   struct CaseData cases[]
1829       = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1830          {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1831          {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1832          {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1833          {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1834          {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1835          {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1836          {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1837 
1838          {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1839          {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1840          {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1841 
1842          {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1843          {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN},  /* ?! */
1844          {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1845          {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1846          {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1847          {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1848          {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1849 
1850          {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1851          {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1852          {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1853 
1854   size_t i = 0;
1855   for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1856     set_subtest("%s", cases[i].text);
1857     const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1858         g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1859     const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1860 
1861     assert(actualStatus == XML_STATUS_ERROR);
1862 
1863     if (actualError != cases[i].expectedError) {
1864       char message[100];
1865       snprintf(message, sizeof(message),
1866                "Expected error %d but got error %d for case %u: \"%s\"\n",
1867                cases[i].expectedError, actualError, (unsigned int)i + 1,
1868                cases[i].text);
1869       fail(message);
1870     }
1871 
1872     XML_ParserReset(g_parser, NULL);
1873   }
1874 }
1875 END_TEST
1876 
1877 /* Test failures in UTF-16 CDATA */
1878 START_TEST(test_bad_cdata_utf16) {
1879   struct CaseData {
1880     size_t text_bytes;
1881     const char *text;
1882     enum XML_Error expected_error;
1883   };
1884 
1885   const char prolog[] = "\0<\0?\0x\0m\0l\0"
1886                         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1887                         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1888                         "1\0"
1889                         "6\0'"
1890                         "\0?\0>\0\n"
1891                         "\0<\0a\0>";
1892   struct CaseData cases[] = {
1893       {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1894       {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1895       {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1896       {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1897       {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1898       {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1899       {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1900       {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1901       {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1902       {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1903       {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1904       {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1905       {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1906       {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1907       {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1908       {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1909       {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1910       {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1911       {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1912       {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1913       /* Now add a four-byte UTF-16 character */
1914       {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1915        XML_ERROR_UNCLOSED_CDATA_SECTION},
1916       {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1917       {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1918        XML_ERROR_PARTIAL_CHAR},
1919       {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1920        XML_ERROR_UNCLOSED_CDATA_SECTION}};
1921   size_t i;
1922 
1923   for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1924     set_subtest("case %lu", (long unsigned)(i + 1));
1925     enum XML_Status actual_status;
1926     enum XML_Error actual_error;
1927 
1928     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1929                                 XML_FALSE)
1930         == XML_STATUS_ERROR)
1931       xml_failure(g_parser);
1932     actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1933                                             (int)cases[i].text_bytes, XML_TRUE);
1934     assert(actual_status == XML_STATUS_ERROR);
1935     actual_error = XML_GetErrorCode(g_parser);
1936     if (actual_error != cases[i].expected_error) {
1937       char message[1024];
1938 
1939       snprintf(message, sizeof(message),
1940                "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1941                ") for case %lu\n",
1942                cases[i].expected_error,
1943                XML_ErrorString(cases[i].expected_error), actual_error,
1944                XML_ErrorString(actual_error), (long unsigned)(i + 1));
1945       fail(message);
1946     }
1947     XML_ParserReset(g_parser, NULL);
1948   }
1949 }
1950 END_TEST
1951 
1952 /* Test stopping the parser in cdata handler */
1953 START_TEST(test_stop_parser_between_cdata_calls) {
1954   const char *text = long_cdata_text;
1955 
1956   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1957   g_resumable = XML_FALSE;
1958   expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1959 }
1960 END_TEST
1961 
1962 /* Test suspending the parser in cdata handler */
1963 START_TEST(test_suspend_parser_between_cdata_calls) {
1964   if (g_chunkSize != 0) {
1965     // this test does not use SINGLE_BYTES, because of suspension
1966     return;
1967   }
1968 
1969   const char *text = long_cdata_text;
1970   enum XML_Status result;
1971 
1972   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1973   g_resumable = XML_TRUE;
1974   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1975   // we won't know exactly how much input we actually managed to give Expat.
1976   result = XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE);
1977   if (result != XML_STATUS_SUSPENDED) {
1978     if (result == XML_STATUS_ERROR)
1979       xml_failure(g_parser);
1980     fail("Parse not suspended in CDATA handler");
1981   }
1982   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1983     xml_failure(g_parser);
1984 }
1985 END_TEST
1986 
1987 /* Test memory allocation functions */
1988 START_TEST(test_memory_allocation) {
1989   char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1990   char *p;
1991 
1992   if (buffer == NULL) {
1993     fail("Allocation failed");
1994   } else {
1995     /* Try writing to memory; some OSes try to cheat! */
1996     buffer[0] = 'T';
1997     buffer[1] = 'E';
1998     buffer[2] = 'S';
1999     buffer[3] = 'T';
2000     buffer[4] = '\0';
2001     if (strcmp(buffer, "TEST") != 0) {
2002       fail("Memory not writable");
2003     } else {
2004       p = (char *)XML_MemRealloc(g_parser, buffer, 512);
2005       if (p == NULL) {
2006         fail("Reallocation failed");
2007       } else {
2008         /* Write again, just to be sure */
2009         buffer = p;
2010         buffer[0] = 'V';
2011         if (strcmp(buffer, "VEST") != 0) {
2012           fail("Reallocated memory not writable");
2013         }
2014       }
2015     }
2016     XML_MemFree(g_parser, buffer);
2017   }
2018 }
2019 END_TEST
2020 
2021 /* Test XML_DefaultCurrent() passes handling on correctly */
2022 START_TEST(test_default_current) {
2023   const char *text = "<doc>hell]</doc>";
2024   const char *entity_text = "<!DOCTYPE doc [\n"
2025                             "<!ENTITY entity '&#37;'>\n"
2026                             "]>\n"
2027                             "<doc>&entity;</doc>";
2028 
2029   set_subtest("with defaulting");
2030   {
2031     struct handler_record_list storage;
2032     storage.count = 0;
2033     XML_SetDefaultHandler(g_parser, record_default_handler);
2034     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2035     XML_SetUserData(g_parser, &storage);
2036     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2037         == XML_STATUS_ERROR)
2038       xml_failure(g_parser);
2039     int i = 0;
2040     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
2041     // we should have gotten one or more cdata callbacks, totaling 5 chars
2042     int cdata_len_remaining = 5;
2043     while (cdata_len_remaining > 0) {
2044       const struct handler_record_entry *c_entry
2045           = handler_record_get(&storage, i++);
2046       assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
2047       assert_true(c_entry->arg > 0);
2048       assert_true(c_entry->arg <= cdata_len_remaining);
2049       cdata_len_remaining -= c_entry->arg;
2050       // default handler must follow, with the exact same len argument.
2051       assert_record_handler_called(&storage, i++, "record_default_handler",
2052                                    c_entry->arg);
2053     }
2054     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
2055     assert_true(storage.count == i);
2056   }
2057 
2058   /* Again, without the defaulting */
2059   set_subtest("no defaulting");
2060   {
2061     struct handler_record_list storage;
2062     storage.count = 0;
2063     XML_ParserReset(g_parser, NULL);
2064     XML_SetDefaultHandler(g_parser, record_default_handler);
2065     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2066     XML_SetUserData(g_parser, &storage);
2067     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2068         == XML_STATUS_ERROR)
2069       xml_failure(g_parser);
2070     int i = 0;
2071     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
2072     // we should have gotten one or more cdata callbacks, totaling 5 chars
2073     int cdata_len_remaining = 5;
2074     while (cdata_len_remaining > 0) {
2075       const struct handler_record_entry *c_entry
2076           = handler_record_get(&storage, i++);
2077       assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
2078       assert_true(c_entry->arg > 0);
2079       assert_true(c_entry->arg <= cdata_len_remaining);
2080       cdata_len_remaining -= c_entry->arg;
2081     }
2082     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
2083     assert_true(storage.count == i);
2084   }
2085 
2086   /* Now with an internal entity to complicate matters */
2087   set_subtest("with internal entity");
2088   {
2089     struct handler_record_list storage;
2090     storage.count = 0;
2091     XML_ParserReset(g_parser, NULL);
2092     XML_SetDefaultHandler(g_parser, record_default_handler);
2093     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2094     XML_SetUserData(g_parser, &storage);
2095     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2096                                 XML_TRUE)
2097         == XML_STATUS_ERROR)
2098       xml_failure(g_parser);
2099     /* The default handler suppresses the entity */
2100     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2101     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2102     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2103     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2104     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2105     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2106     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2107     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2108     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2109     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2110     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2111     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2112     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2113     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2114     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2115     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2116     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2117     assert_record_handler_called(&storage, 17, "record_default_handler", 8);
2118     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2119     assert_true(storage.count == 19);
2120   }
2121 
2122   /* Again, with a skip handler */
2123   set_subtest("with skip handler");
2124   {
2125     struct handler_record_list storage;
2126     storage.count = 0;
2127     XML_ParserReset(g_parser, NULL);
2128     XML_SetDefaultHandler(g_parser, record_default_handler);
2129     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2130     XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
2131     XML_SetUserData(g_parser, &storage);
2132     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2133                                 XML_TRUE)
2134         == XML_STATUS_ERROR)
2135       xml_failure(g_parser);
2136     /* The default handler suppresses the entity */
2137     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2138     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2139     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2140     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2141     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2142     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2143     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2144     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2145     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2146     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2147     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2148     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2149     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2150     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2151     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2152     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2153     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2154     assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2155     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2156     assert_true(storage.count == 19);
2157   }
2158 
2159   /* This time, allow the entity through */
2160   set_subtest("allow entity");
2161   {
2162     struct handler_record_list storage;
2163     storage.count = 0;
2164     XML_ParserReset(g_parser, NULL);
2165     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2166     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2167     XML_SetUserData(g_parser, &storage);
2168     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2169                                 XML_TRUE)
2170         == XML_STATUS_ERROR)
2171       xml_failure(g_parser);
2172     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2173     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2174     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2175     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2176     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2177     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2178     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2179     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2180     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2181     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2182     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2183     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2184     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2185     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2186     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2187     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2188     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2189     assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2190     assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2191     assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2192     assert_true(storage.count == 20);
2193   }
2194 
2195   /* Finally, without passing the cdata to the default handler */
2196   set_subtest("not passing cdata");
2197   {
2198     struct handler_record_list storage;
2199     storage.count = 0;
2200     XML_ParserReset(g_parser, NULL);
2201     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2202     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2203     XML_SetUserData(g_parser, &storage);
2204     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2205                                 XML_TRUE)
2206         == XML_STATUS_ERROR)
2207       xml_failure(g_parser);
2208     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2209     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2210     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2211     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2212     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2213     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2214     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2215     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2216     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2217     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2218     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2219     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2220     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2221     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2222     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2223     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2224     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2225     assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2226                                  1);
2227     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2228     assert_true(storage.count == 19);
2229   }
2230 }
2231 END_TEST
2232 
2233 /* Test DTD element parsing code paths */
2234 START_TEST(test_dtd_elements) {
2235   const char *text = "<!DOCTYPE doc [\n"
2236                      "<!ELEMENT doc (chapter)>\n"
2237                      "<!ELEMENT chapter (#PCDATA)>\n"
2238                      "]>\n"
2239                      "<doc><chapter>Wombats are go</chapter></doc>";
2240 
2241   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2242   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2243       == XML_STATUS_ERROR)
2244     xml_failure(g_parser);
2245 }
2246 END_TEST
2247 
2248 static void XMLCALL
2249 element_decl_check_model(void *userData, const XML_Char *name,
2250                          XML_Content *model) {
2251   UNUSED_P(userData);
2252   uint32_t errorFlags = 0;
2253 
2254   /* Expected model array structure is this:
2255    * [0] (type 6, quant 0)
2256    *   [1] (type 5, quant 0)
2257    *     [3] (type 4, quant 0, name "bar")
2258    *     [4] (type 4, quant 0, name "foo")
2259    *     [5] (type 4, quant 3, name "xyz")
2260    *   [2] (type 4, quant 2, name "zebra")
2261    */
2262   errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2263   errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2264 
2265   if (model != NULL) {
2266     errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2267     errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2268     errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2269     errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2270     errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2271 
2272     errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2273     errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2274     errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2275     errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2276     errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2277 
2278     errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2279     errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2280     errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2281     errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2282     errorFlags
2283         |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2284 
2285     errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2286     errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2287     errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2288     errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2289     errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2290 
2291     errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2292     errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2293     errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2294     errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2295     errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2296 
2297     errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2298     errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2299     errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2300     errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2301     errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2302   }
2303 
2304   XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2305   XML_FreeContentModel(g_parser, model);
2306 }
2307 
2308 START_TEST(test_dtd_elements_nesting) {
2309   // Payload inspired by a test in Perl's XML::Parser
2310   const char *text = "<!DOCTYPE foo [\n"
2311                      "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2312                      "]>\n"
2313                      "<foo/>";
2314 
2315   XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2316 
2317   XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2318   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2319       == XML_STATUS_ERROR)
2320     xml_failure(g_parser);
2321 
2322   if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2323     fail("Element declaration model regression detected");
2324 }
2325 END_TEST
2326 
2327 /* Test foreign DTD handling */
2328 START_TEST(test_set_foreign_dtd) {
2329   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2330   const char *text2 = "<doc>&entity;</doc>";
2331   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2332 
2333   /* Check hash salt is passed through too */
2334   XML_SetHashSalt(g_parser, 0x12345678);
2335   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2336   XML_SetUserData(g_parser, &test_data);
2337   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2338   /* Add a default handler to exercise more code paths */
2339   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2340   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2341     fail("Could not set foreign DTD");
2342   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2343       == XML_STATUS_ERROR)
2344     xml_failure(g_parser);
2345 
2346   /* Ensure that trying to set the DTD after parsing has started
2347    * is faulted, even if it's the same setting.
2348    */
2349   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2350       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2351     fail("Failed to reject late foreign DTD setting");
2352   /* Ditto for the hash salt */
2353   if (XML_SetHashSalt(g_parser, 0x23456789))
2354     fail("Failed to reject late hash salt change");
2355 
2356   /* Now finish the parse */
2357   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2358       == XML_STATUS_ERROR)
2359     xml_failure(g_parser);
2360 }
2361 END_TEST
2362 
2363 /* Test foreign DTD handling with a failing NotStandalone handler */
2364 START_TEST(test_foreign_dtd_not_standalone) {
2365   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2366                      "<doc>&entity;</doc>";
2367   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2368 
2369   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2370   XML_SetUserData(g_parser, &test_data);
2371   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2372   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2373   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2374     fail("Could not set foreign DTD");
2375   expect_failure(text, XML_ERROR_NOT_STANDALONE,
2376                  "NotStandalonehandler failed to reject");
2377 }
2378 END_TEST
2379 
2380 /* Test invalid character in a foreign DTD is faulted */
2381 START_TEST(test_invalid_foreign_dtd) {
2382   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2383                      "<doc>&entity;</doc>";
2384   ExtFaults test_data
2385       = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2386 
2387   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2388   XML_SetUserData(g_parser, &test_data);
2389   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2390   XML_UseForeignDTD(g_parser, XML_TRUE);
2391   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2392                  "Bad DTD should not have been accepted");
2393 }
2394 END_TEST
2395 
2396 /* Test foreign DTD use with a doctype */
2397 START_TEST(test_foreign_dtd_with_doctype) {
2398   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2399                       "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2400   const char *text2 = "<doc>&entity;</doc>";
2401   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2402 
2403   /* Check hash salt is passed through too */
2404   XML_SetHashSalt(g_parser, 0x12345678);
2405   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2406   XML_SetUserData(g_parser, &test_data);
2407   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2408   /* Add a default handler to exercise more code paths */
2409   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2410   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2411     fail("Could not set foreign DTD");
2412   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2413       == XML_STATUS_ERROR)
2414     xml_failure(g_parser);
2415 
2416   /* Ensure that trying to set the DTD after parsing has started
2417    * is faulted, even if it's the same setting.
2418    */
2419   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2420       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2421     fail("Failed to reject late foreign DTD setting");
2422   /* Ditto for the hash salt */
2423   if (XML_SetHashSalt(g_parser, 0x23456789))
2424     fail("Failed to reject late hash salt change");
2425 
2426   /* Now finish the parse */
2427   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2428       == XML_STATUS_ERROR)
2429     xml_failure(g_parser);
2430 }
2431 END_TEST
2432 
2433 /* Test XML_UseForeignDTD with no external subset present */
2434 START_TEST(test_foreign_dtd_without_external_subset) {
2435   const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2436                      "<doc>&foo;</doc>";
2437 
2438   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2439   XML_SetUserData(g_parser, NULL);
2440   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2441   XML_UseForeignDTD(g_parser, XML_TRUE);
2442   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2443       == XML_STATUS_ERROR)
2444     xml_failure(g_parser);
2445 }
2446 END_TEST
2447 
2448 START_TEST(test_empty_foreign_dtd) {
2449   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2450                      "<doc>&entity;</doc>";
2451 
2452   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2453   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2454   XML_UseForeignDTD(g_parser, XML_TRUE);
2455   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2456                  "Undefined entity not faulted");
2457 }
2458 END_TEST
2459 
2460 /* Test XML Base is set and unset appropriately */
2461 START_TEST(test_set_base) {
2462   const XML_Char *old_base;
2463   const XML_Char *new_base = XCS("/local/file/name.xml");
2464 
2465   old_base = XML_GetBase(g_parser);
2466   if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2467     fail("Unable to set base");
2468   if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2469     fail("Base setting not correct");
2470   if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2471     fail("Unable to NULL base");
2472   if (XML_GetBase(g_parser) != NULL)
2473     fail("Base setting not nulled");
2474   XML_SetBase(g_parser, old_base);
2475 }
2476 END_TEST
2477 
2478 /* Test attribute counts, indexing, etc */
2479 START_TEST(test_attributes) {
2480   const char *text = "<!DOCTYPE doc [\n"
2481                      "<!ELEMENT doc (tag)>\n"
2482                      "<!ATTLIST doc id ID #REQUIRED>\n"
2483                      "]>"
2484                      "<doc a='1' id='one' b='2'>"
2485                      "<tag c='3'/>"
2486                      "</doc>";
2487   AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2488                          {XCS("b"), XCS("2")},
2489                          {XCS("id"), XCS("one")},
2490                          {NULL, NULL}};
2491   AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2492   ElementInfo info[] = {{XCS("doc"), 3, 0, XCS("id"), doc_info},
2493                         {XCS("tag"), 1, 0, NULL, tag_info},
2494                         {NULL, 0, 0, NULL, NULL}};
2495 
2496   XML_Parser parser = XML_ParserCreate(NULL);
2497   assert_true(parser != NULL);
2498   ParserAndElementInfo parserAndElementInfos = {
2499       parser,
2500       info,
2501   };
2502 
2503   XML_SetStartElementHandler(parser, counting_start_element_handler);
2504   XML_SetUserData(parser, &parserAndElementInfos);
2505   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2506       == XML_STATUS_ERROR)
2507     xml_failure(parser);
2508 
2509   XML_ParserFree(parser);
2510 }
2511 END_TEST
2512 
2513 START_TEST(test_duplicate_cdata_attribute) {
2514   /*
2515   https://www.w3.org/TR/xml/#attdecls
2516 
2517   Test the following statement from the linked specification:
2518     When more than one definition is provided for the same attribute of a given
2519     element type, the first declaration is binding and later declarations are
2520     ignored.
2521   */
2522 
2523   const char *text
2524       = "<!DOCTYPE doc [\n"
2525         "  <!ATTLIST doc attribute CDATA 'expected' attribute CDATA 'ignored'>\n"
2526         "]>\n"
2527         "<doc/>\n";
2528   AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected")}, {NULL, NULL}};
2529   ElementInfo info[]
2530       = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
2531 
2532   XML_Parser parser = XML_ParserCreate(NULL);
2533   assert_true(parser != NULL);
2534 
2535   ParserAndElementInfo parserAndElementInfos = {
2536       parser,
2537       info,
2538   };
2539 
2540   XML_SetStartElementHandler(parser, counting_start_element_handler);
2541   XML_SetUserData(parser, &parserAndElementInfos);
2542 
2543   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2544       != XML_STATUS_OK)
2545     xml_failure(parser);
2546 
2547   XML_ParserFree(parser);
2548 }
2549 END_TEST
2550 
2551 START_TEST(test_duplicate_id_attribute_1) {
2552   /*
2553   https://www.w3.org/TR/xml/#attdecls
2554 
2555   Test the following statement from the linked specification:
2556     When more than one definition is provided for the same attribute of a given
2557     element type, the first declaration is binding and later declarations are
2558     ignored.
2559   */
2560 
2561   const char *text
2562       = "<!DOCTYPE doc [\n"
2563         "  <!ATTLIST doc identifier CDATA 'expected' identifier ID #REQUIRED>\n"
2564         "]>\n"
2565         "<doc/>\n";
2566   AttrInfo doc_info[] = {{XCS("identifier"), XCS("expected")}, {NULL, NULL}};
2567   ElementInfo info[]
2568       = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
2569 
2570   XML_Parser parser = XML_ParserCreate(NULL);
2571   assert_true(parser != NULL);
2572 
2573   ParserAndElementInfo parserAndElementInfos = {
2574       parser,
2575       info,
2576   };
2577 
2578   XML_SetStartElementHandler(parser, counting_start_element_handler);
2579   XML_SetUserData(parser, &parserAndElementInfos);
2580 
2581   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2582       != XML_STATUS_OK)
2583     xml_failure(parser);
2584 
2585   XML_ParserFree(parser);
2586 }
2587 END_TEST
2588 
2589 START_TEST(test_duplicate_id_attribute_2) {
2590   /*
2591   https://www.w3.org/TR/xml/#attdecls
2592 
2593   Test the following statement from the linked specification:
2594     When more than one definition is provided for the same attribute of a given
2595     element type, the first declaration is binding and later declarations are
2596     ignored.
2597   */
2598 
2599   const char *text
2600       = "<!DOCTYPE doc [\n"
2601         "  <!ATTLIST doc identifier ID #REQUIRED identifier CDATA 'unexpected'>\n"
2602         "]>\n"
2603         "<doc/>\n";
2604   AttrInfo doc_info[] = {{NULL, NULL}};
2605 
2606   ElementInfo info[]
2607       = {{XCS("doc"), 0, 0, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
2608 
2609   XML_Parser parser = XML_ParserCreate(NULL);
2610   assert_true(parser != NULL);
2611 
2612   ParserAndElementInfo parserAndElementInfos = {
2613       parser,
2614       info,
2615   };
2616 
2617   XML_SetStartElementHandler(parser, counting_start_element_handler);
2618   XML_SetUserData(parser, &parserAndElementInfos);
2619 
2620   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2621       != XML_STATUS_OK)
2622     xml_failure(parser);
2623 
2624   XML_ParserFree(parser);
2625 }
2626 END_TEST
2627 
2628 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl) {
2629   /*
2630   https://www.w3.org/TR/xml/#attdecls
2631 
2632   Test the following statement from the linked specification:
2633     When more than one AttlistDecl is provided for a given element type,
2634     the contents of all those provided are merged.
2635   */
2636   const char *text = "<!DOCTYPE doc [\n"
2637                      "  <!ATTLIST doc attribute CDATA 'expected'>\n"
2638                      "  <!ATTLIST doc attribute CDATA 'ignored'>\n"
2639                      "]>\n"
2640                      "<doc/>\n";
2641   AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected")}, {NULL, NULL}};
2642   ElementInfo info[]
2643       = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
2644 
2645   XML_Parser parser = XML_ParserCreate(NULL);
2646   assert_true(parser != NULL);
2647 
2648   ParserAndElementInfo parserAndElementInfos = {
2649       parser,
2650       info,
2651   };
2652 
2653   XML_SetStartElementHandler(parser, counting_start_element_handler);
2654   XML_SetUserData(parser, &parserAndElementInfos);
2655 
2656   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2657       != XML_STATUS_OK)
2658     xml_failure(parser);
2659 
2660   XML_ParserFree(parser);
2661 }
2662 END_TEST
2663 
2664 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_2) {
2665   /*
2666   https://www.w3.org/TR/xml/#attdecls
2667 
2668   Test the following statement from the linked specification:
2669     When more than one AttlistDecl is provided for a given element type,
2670     the contents of all those provided are merged.
2671   */
2672   const char *text = "<!DOCTYPE doc [\n"
2673                      "  <!ATTLIST doc attribute CDATA 'expected_doc'>\n"
2674                      "  <!ATTLIST tag attribute CDATA 'expected_tag'>\n"
2675                      "  <!ATTLIST doc attribute CDATA 'ignored_doc'>\n"
2676                      "]>\n"
2677                      "<doc><tag></tag></doc>\n";
2678   AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected_doc")}, {NULL, NULL}};
2679   AttrInfo tag_info[] = {{XCS("attribute"), XCS("expected_tag")}, {NULL, NULL}};
2680   ElementInfo info[] = {{XCS("doc"), 0, 1, NULL, doc_info},
2681                         {XCS("tag"), 0, 1, NULL, tag_info},
2682                         {NULL, 0, 0, NULL, NULL}};
2683 
2684   XML_Parser parser = XML_ParserCreate(NULL);
2685   assert_true(parser != NULL);
2686 
2687   ParserAndElementInfo parserAndElementInfos = {
2688       parser,
2689       info,
2690   };
2691 
2692   XML_SetStartElementHandler(parser, counting_start_element_handler);
2693   XML_SetUserData(parser, &parserAndElementInfos);
2694 
2695   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2696       != XML_STATUS_OK)
2697     xml_failure(parser);
2698 
2699   XML_ParserFree(parser);
2700 }
2701 END_TEST
2702 
2703 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_3) {
2704   /*
2705   https://www.w3.org/TR/xml/#attdecls
2706 
2707   Test the following statement from the linked specification:
2708     When more than one AttlistDecl is provided for a given element type,
2709     the contents of all those provided are merged.
2710   */
2711   const char *text
2712       = "<!DOCTYPE doc [\n"
2713         "  <!ATTLIST doc attribute CDATA 'expected_doc'>\n"
2714         "  <!ATTLIST tag attribute CDATA 'expected_tag'>\n"
2715         "  <!ATTLIST doc second_attribute CDATA 'second_expected_doc' attribute CDATA 'ignored_doc'>\n"
2716         "]>\n"
2717         "<doc><tag></tag></doc>\n";
2718   AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected_doc")},
2719                          {XCS("second_attribute"), XCS("second_expected_doc")},
2720                          {NULL, NULL}};
2721   AttrInfo tag_info[] = {{XCS("attribute"), XCS("expected_tag")}, {NULL, NULL}};
2722   ElementInfo info[] = {{XCS("doc"), 0, 2, NULL, doc_info},
2723                         {XCS("tag"), 0, 1, NULL, tag_info},
2724                         {NULL, 0, 0, NULL, NULL}};
2725 
2726   XML_Parser parser = XML_ParserCreate(NULL);
2727   assert_true(parser != NULL);
2728 
2729   ParserAndElementInfo parserAndElementInfos = {
2730       parser,
2731       info,
2732   };
2733 
2734   XML_SetStartElementHandler(parser, counting_start_element_handler);
2735   XML_SetUserData(parser, &parserAndElementInfos);
2736 
2737   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2738       != XML_STATUS_OK)
2739     xml_failure(parser);
2740 
2741   XML_ParserFree(parser);
2742 }
2743 END_TEST
2744 
2745 START_TEST(test_duplicate_id_attribute_multiple_attlistdecl) {
2746   /*
2747   https://www.w3.org/TR/xml/#attdecls
2748 
2749   Test the following statement from the linked specification:
2750     When more than one AttlistDecl is provided for a given element type,
2751     the contents of all those provided are merged.
2752   */
2753   const char *text = "<!DOCTYPE doc [\n"
2754                      "  <!ATTLIST doc identifier ID #REQUIRED>\n"
2755                      "  <!ATTLIST tag identifier CDATA 'identifier_tag'>\n"
2756                      "  <!ATTLIST doc identifier CDATA 'ignored'>\n"
2757                      "]>\n"
2758                      "<doc identifier='doc_identity'><tag></tag></doc>\n";
2759   AttrInfo doc_info[]
2760       = {{XCS("identifier"), XCS("doc_identity")}, {NULL, NULL}};
2761   AttrInfo tag_info[]
2762       = {{XCS("identifier"), XCS("identifier_tag")}, {NULL, NULL}};
2763   ElementInfo info[] = {{XCS("doc"), 1, 0, XCS("identifier"), doc_info},
2764                         {XCS("tag"), 0, 1, NULL, tag_info},
2765                         {NULL, 0, 0, NULL, NULL}};
2766 
2767   XML_Parser parser = XML_ParserCreate(NULL);
2768   assert_true(parser != NULL);
2769 
2770   ParserAndElementInfo parserAndElementInfos = {
2771       parser,
2772       info,
2773   };
2774 
2775   XML_SetStartElementHandler(parser, counting_start_element_handler);
2776   XML_SetUserData(parser, &parserAndElementInfos);
2777 
2778   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2779       != XML_STATUS_OK)
2780     xml_failure(parser);
2781 
2782   XML_ParserFree(parser);
2783 }
2784 END_TEST
2785 
2786 /* Test reset works correctly in the middle of processing an internal
2787  * entity.  Exercises some obscure code in XML_ParserReset().
2788  */
2789 START_TEST(test_reset_in_entity) {
2790   if (g_chunkSize != 0) {
2791     // this test does not use SINGLE_BYTES, because of suspension
2792     return;
2793   }
2794 
2795   const char *text = "<!DOCTYPE doc [\n"
2796                      "<!ENTITY wombat 'wom'>\n"
2797                      "<!ENTITY entity 'hi &wom; there'>\n"
2798                      "]>\n"
2799                      "<doc>&entity;</doc>";
2800   XML_ParsingStatus status;
2801 
2802   g_resumable = XML_TRUE;
2803   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2804   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
2805   // we won't know exactly how much input we actually managed to give Expat.
2806   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2807       == XML_STATUS_ERROR)
2808     xml_failure(g_parser);
2809   XML_GetParsingStatus(g_parser, &status);
2810   if (status.parsing != XML_SUSPENDED)
2811     fail("Parsing status not SUSPENDED");
2812   XML_ParserReset(g_parser, NULL);
2813   XML_GetParsingStatus(g_parser, &status);
2814   if (status.parsing != XML_INITIALIZED)
2815     fail("Parsing status doesn't reset to INITIALIZED");
2816 }
2817 END_TEST
2818 
2819 /* Test that resume correctly passes through parse errors */
2820 START_TEST(test_resume_invalid_parse) {
2821   const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2822 
2823   g_resumable = XML_TRUE;
2824   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2825   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2826       == XML_STATUS_ERROR)
2827     xml_failure(g_parser);
2828   if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2829     fail("Resumed invalid parse not faulted");
2830   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2831     fail("Invalid parse not correctly faulted");
2832 }
2833 END_TEST
2834 
2835 /* Test that re-suspended parses are correctly passed through */
2836 START_TEST(test_resume_resuspended) {
2837   const char *text = "<doc>Hello<meep/>world</doc>";
2838 
2839   g_resumable = XML_TRUE;
2840   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2841   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2842       == XML_STATUS_ERROR)
2843     xml_failure(g_parser);
2844   g_resumable = XML_TRUE;
2845   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2846   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2847     fail("Resumption not suspended");
2848   /* This one should succeed and finish up */
2849   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2850     xml_failure(g_parser);
2851 }
2852 END_TEST
2853 
2854 /* Test that CDATA shows up correctly through a default handler */
2855 START_TEST(test_cdata_default) {
2856   const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2857   const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2858   CharData storage;
2859 
2860   CharData_Init(&storage);
2861   XML_SetUserData(g_parser, &storage);
2862   XML_SetDefaultHandler(g_parser, accumulate_characters);
2863 
2864   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2865       == XML_STATUS_ERROR)
2866     xml_failure(g_parser);
2867   CharData_CheckXMLChars(&storage, expected);
2868 }
2869 END_TEST
2870 
2871 /* Test resetting a subordinate parser does exactly nothing */
2872 START_TEST(test_subordinate_reset) {
2873   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2874                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2875                      "<doc>&entity;</doc>";
2876 
2877   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2878   XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2879   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2880       == XML_STATUS_ERROR)
2881     xml_failure(g_parser);
2882 }
2883 END_TEST
2884 
2885 /* Test suspending a subordinate parser */
2886 START_TEST(test_subordinate_suspend) {
2887   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2888                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2889                      "<doc>&entity;</doc>";
2890 
2891   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2892   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2893   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2894       == XML_STATUS_ERROR)
2895     xml_failure(g_parser);
2896 }
2897 END_TEST
2898 
2899 /* Test suspending a subordinate parser from an XML declaration */
2900 /* Increases code coverage of the tests */
2901 
2902 START_TEST(test_subordinate_xdecl_suspend) {
2903   const char *text
2904       = "<!DOCTYPE doc [\n"
2905         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2906         "]>\n"
2907         "<doc>&entity;</doc>";
2908 
2909   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2910   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2911   g_resumable = XML_TRUE;
2912   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2913       == XML_STATUS_ERROR)
2914     xml_failure(g_parser);
2915 }
2916 END_TEST
2917 
2918 START_TEST(test_subordinate_xdecl_abort) {
2919   const char *text
2920       = "<!DOCTYPE doc [\n"
2921         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2922         "]>\n"
2923         "<doc>&entity;</doc>";
2924 
2925   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2926   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2927   g_resumable = XML_FALSE;
2928   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2929       == XML_STATUS_ERROR)
2930     xml_failure(g_parser);
2931 }
2932 END_TEST
2933 
2934 /* Test external entity fault handling with suspension */
2935 START_TEST(test_ext_entity_invalid_suspended_parse) {
2936   const char *text = "<!DOCTYPE doc [\n"
2937                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2938                      "]>\n"
2939                      "<doc>&en;</doc>";
2940   ExtFaults faults[]
2941       = {{"<?xml version='1.0' encoding='us-ascii'?><",
2942           "Incomplete element declaration not faulted", NULL,
2943           XML_ERROR_UNCLOSED_TOKEN},
2944          {/* First two bytes of a three-byte char */
2945           "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2946           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2947          {NULL, NULL, NULL, XML_ERROR_NONE}};
2948   ExtFaults *fault;
2949 
2950   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2951     set_subtest("%s", fault->parse_text);
2952     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2953     XML_SetExternalEntityRefHandler(g_parser,
2954                                     external_entity_suspending_faulter);
2955     XML_SetUserData(g_parser, fault);
2956     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2957                    "Parser did not report external entity error");
2958     XML_ParserReset(g_parser, NULL);
2959   }
2960 }
2961 END_TEST
2962 
2963 /* Test setting an explicit encoding */
2964 START_TEST(test_explicit_encoding) {
2965   const char *text1 = "<doc>Hello ";
2966   const char *text2 = " World</doc>";
2967 
2968   /* Just check that we can set the encoding to NULL before starting */
2969   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2970     fail("Failed to initialise encoding to NULL");
2971   /* Say we are UTF-8 */
2972   if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2973     fail("Failed to set explicit encoding");
2974   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2975       == XML_STATUS_ERROR)
2976     xml_failure(g_parser);
2977   /* Try to switch encodings mid-parse */
2978   if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2979     fail("Allowed encoding change");
2980   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2981       == XML_STATUS_ERROR)
2982     xml_failure(g_parser);
2983   /* Try now the parse is over */
2984   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2985     fail("Failed to unset encoding");
2986 }
2987 END_TEST
2988 
2989 /* Test handling of trailing CR (rather than newline) */
2990 START_TEST(test_trailing_cr) {
2991   const char *text = "<doc>\r";
2992   int found_cr;
2993 
2994   /* Try with a character handler, for code coverage */
2995   XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2996   XML_SetUserData(g_parser, &found_cr);
2997   found_cr = 0;
2998   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2999       == XML_STATUS_OK)
3000     fail("Failed to fault unclosed doc");
3001   if (found_cr == 0)
3002     fail("Did not catch the carriage return");
3003   XML_ParserReset(g_parser, NULL);
3004 
3005   /* Now with a default handler instead */
3006   XML_SetDefaultHandler(g_parser, cr_cdata_handler);
3007   XML_SetUserData(g_parser, &found_cr);
3008   found_cr = 0;
3009   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3010       == XML_STATUS_OK)
3011     fail("Failed to fault unclosed doc");
3012   if (found_cr == 0)
3013     fail("Did not catch default carriage return");
3014 }
3015 END_TEST
3016 
3017 /* Test trailing CR in an external entity parse */
3018 START_TEST(test_ext_entity_trailing_cr) {
3019   const char *text = "<!DOCTYPE doc [\n"
3020                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
3021                      "]>\n"
3022                      "<doc>&en;</doc>";
3023   int found_cr;
3024 
3025   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3026   XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
3027   XML_SetUserData(g_parser, &found_cr);
3028   found_cr = 0;
3029   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3030       != XML_STATUS_OK)
3031     xml_failure(g_parser);
3032   if (found_cr == 0)
3033     fail("No carriage return found");
3034   XML_ParserReset(g_parser, NULL);
3035 
3036   /* Try again with a different trailing CR */
3037   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3038   XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
3039   XML_SetUserData(g_parser, &found_cr);
3040   found_cr = 0;
3041   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3042       != XML_STATUS_OK)
3043     xml_failure(g_parser);
3044   if (found_cr == 0)
3045     fail("No carriage return found");
3046 }
3047 END_TEST
3048 
3049 /* Test handling of trailing square bracket */
3050 START_TEST(test_trailing_rsqb) {
3051   const char *text8 = "<doc>]";
3052   const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
3053   int found_rsqb;
3054   int text8_len = (int)strlen(text8);
3055 
3056   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
3057   XML_SetUserData(g_parser, &found_rsqb);
3058   found_rsqb = 0;
3059   if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
3060       == XML_STATUS_OK)
3061     fail("Failed to fault unclosed doc");
3062   if (found_rsqb == 0)
3063     fail("Did not catch the right square bracket");
3064 
3065   /* Try again with a different encoding */
3066   XML_ParserReset(g_parser, NULL);
3067   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
3068   XML_SetUserData(g_parser, &found_rsqb);
3069   found_rsqb = 0;
3070   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
3071                               XML_TRUE)
3072       == XML_STATUS_OK)
3073     fail("Failed to fault unclosed doc");
3074   if (found_rsqb == 0)
3075     fail("Did not catch the right square bracket");
3076 
3077   /* And finally with a default handler */
3078   XML_ParserReset(g_parser, NULL);
3079   XML_SetDefaultHandler(g_parser, rsqb_handler);
3080   XML_SetUserData(g_parser, &found_rsqb);
3081   found_rsqb = 0;
3082   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
3083                               XML_TRUE)
3084       == XML_STATUS_OK)
3085     fail("Failed to fault unclosed doc");
3086   if (found_rsqb == 0)
3087     fail("Did not catch the right square bracket");
3088 }
3089 END_TEST
3090 
3091 /* Test trailing right square bracket in an external entity parse */
3092 START_TEST(test_ext_entity_trailing_rsqb) {
3093   const char *text = "<!DOCTYPE doc [\n"
3094                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
3095                      "]>\n"
3096                      "<doc>&en;</doc>";
3097   int found_rsqb;
3098 
3099   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3100   XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
3101   XML_SetUserData(g_parser, &found_rsqb);
3102   found_rsqb = 0;
3103   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3104       != XML_STATUS_OK)
3105     xml_failure(g_parser);
3106   if (found_rsqb == 0)
3107     fail("No right square bracket found");
3108 }
3109 END_TEST
3110 
3111 /* Test CDATA handling in an external entity */
3112 START_TEST(test_ext_entity_good_cdata) {
3113   const char *text = "<!DOCTYPE doc [\n"
3114                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
3115                      "]>\n"
3116                      "<doc>&en;</doc>";
3117 
3118   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3119   XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
3120   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3121       != XML_STATUS_OK)
3122     xml_failure(g_parser);
3123 }
3124 END_TEST
3125 
3126 /* Test user parameter settings */
3127 START_TEST(test_user_parameters) {
3128   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
3129                      "<!-- Primary parse -->\n"
3130                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
3131                      "<doc>&entity;";
3132   const char *epilog = "<!-- Back to primary parser -->\n"
3133                        "</doc>";
3134 
3135   g_comment_count = 0;
3136   g_skip_count = 0;
3137   g_xdecl_count = 0;
3138   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3139   XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
3140   XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
3141   XML_SetCommentHandler(g_parser, data_check_comment_handler);
3142   XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
3143   XML_UseParserAsHandlerArg(g_parser);
3144   XML_SetUserData(g_parser, (void *)1);
3145   g_handler_data = g_parser;
3146   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3147       == XML_STATUS_ERROR)
3148     xml_failure(g_parser);
3149   /* Ensure we can't change policy mid-parse */
3150   if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
3151     fail("Changed param entity parsing policy while parsing");
3152   if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
3153       == XML_STATUS_ERROR)
3154     xml_failure(g_parser);
3155   if (g_comment_count != 3)
3156     fail("Comment handler not invoked enough times");
3157   if (g_skip_count != 1)
3158     fail("Skip handler not invoked enough times");
3159   if (g_xdecl_count != 1)
3160     fail("XML declaration handler not invoked");
3161 }
3162 END_TEST
3163 
3164 /* Test that an explicit external entity handler argument replaces
3165  * the parser as the first argument.
3166  *
3167  * We do not call the first parameter to the external entity handler
3168  * 'parser' for once, since the first time the handler is called it
3169  * will actually be a text string.  We need to be able to access the
3170  * global 'parser' variable to create our external entity parser from,
3171  * since there are code paths we need to ensure get executed.
3172  */
3173 START_TEST(test_ext_entity_ref_parameter) {
3174   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
3175                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
3176                      "<doc>&entity;</doc>";
3177 
3178   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3179   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
3180   /* Set a handler arg that is not NULL and not parser (which is
3181    * what NULL would cause to be passed.
3182    */
3183   XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
3184   g_handler_data = text;
3185   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3186       == XML_STATUS_ERROR)
3187     xml_failure(g_parser);
3188 
3189   /* Now try again with unset args */
3190   XML_ParserReset(g_parser, NULL);
3191   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3192   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
3193   XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
3194   g_handler_data = g_parser;
3195   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3196       == XML_STATUS_ERROR)
3197     xml_failure(g_parser);
3198 }
3199 END_TEST
3200 
3201 /* Test the parsing of an empty string */
3202 START_TEST(test_empty_parse) {
3203   const char *text = "<doc></doc>";
3204   const char *partial = "<doc>";
3205 
3206   if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
3207     fail("Parsing empty string faulted");
3208   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3209     fail("Parsing final empty string not faulted");
3210   if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
3211     fail("Parsing final empty string faulted for wrong reason");
3212 
3213   /* Now try with valid text before the empty end */
3214   XML_ParserReset(g_parser, NULL);
3215   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3216       == XML_STATUS_ERROR)
3217     xml_failure(g_parser);
3218   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
3219     fail("Parsing final empty string faulted");
3220 
3221   /* Now try with invalid text before the empty end */
3222   XML_ParserReset(g_parser, NULL);
3223   if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
3224                               XML_FALSE)
3225       == XML_STATUS_ERROR)
3226     xml_failure(g_parser);
3227   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3228     fail("Parsing final incomplete empty string not faulted");
3229 }
3230 END_TEST
3231 
3232 /* Test XML_Parse for len < 0 */
3233 START_TEST(test_negative_len_parse) {
3234   const char *const doc = "<root/>";
3235   for (int isFinal = 0; isFinal < 2; isFinal++) {
3236     set_subtest("isFinal=%d", isFinal);
3237 
3238     XML_Parser parser = XML_ParserCreate(NULL);
3239 
3240     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
3241       fail("There was not supposed to be any initial parse error.");
3242 
3243     const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
3244 
3245     if (status != XML_STATUS_ERROR)
3246       fail("Negative len was expected to fail the parse but did not.");
3247 
3248     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
3249       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
3250 
3251     XML_ParserFree(parser);
3252   }
3253 }
3254 END_TEST
3255 
3256 /* Test XML_ParseBuffer for len < 0 */
3257 START_TEST(test_negative_len_parse_buffer) {
3258   const char *const doc = "<root/>";
3259   for (int isFinal = 0; isFinal < 2; isFinal++) {
3260     set_subtest("isFinal=%d", isFinal);
3261 
3262     XML_Parser parser = XML_ParserCreate(NULL);
3263 
3264     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
3265       fail("There was not supposed to be any initial parse error.");
3266 
3267     void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
3268 
3269     if (buffer == NULL)
3270       fail("XML_GetBuffer failed.");
3271 
3272     memcpy(buffer, doc, strlen(doc));
3273 
3274     const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
3275 
3276     if (status != XML_STATUS_ERROR)
3277       fail("Negative len was expected to fail the parse but did not.");
3278 
3279     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
3280       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
3281 
3282     XML_ParserFree(parser);
3283   }
3284 }
3285 END_TEST
3286 
3287 /* Test odd corners of the XML_GetBuffer interface */
3288 static enum XML_Status
3289 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
3290   const XML_Feature *feature = XML_GetFeatureList();
3291 
3292   if (feature == NULL)
3293     return XML_STATUS_ERROR;
3294   for (; feature->feature != XML_FEATURE_END; feature++) {
3295     if (feature->feature == feature_id) {
3296       *presult = feature->value;
3297       return XML_STATUS_OK;
3298     }
3299   }
3300   return XML_STATUS_ERROR;
3301 }
3302 
3303 /* Test odd corners of the XML_GetBuffer interface */
3304 START_TEST(test_get_buffer_1) {
3305   const char *text = get_buffer_test_text;
3306   long context_bytes;
3307 
3308   /* Attempt to allocate a negative length buffer */
3309   if (XML_GetBuffer(g_parser, -12) != NULL)
3310     fail("Negative length buffer not failed");
3311 
3312   /* Now get a small buffer and extend it past valid length */
3313   void *const buffer = XML_GetBuffer(g_parser, 1536);
3314   if (buffer == NULL)
3315     fail("1.5K buffer failed");
3316   assert(buffer != NULL);
3317   memcpy(buffer, text, strlen(text));
3318   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
3319       == XML_STATUS_ERROR)
3320     xml_failure(g_parser);
3321   if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
3322     fail("INT_MAX buffer not failed");
3323 
3324   /* Now try extending it a more reasonable but still too large
3325    * amount.  The allocator in XML_GetBuffer() doubles the buffer
3326    * size until it exceeds the requested amount or INT_MAX.  If it
3327    * exceeds INT_MAX, it rejects the request, so we want a request
3328    * between INT_MAX and INT_MAX/2.  A gap of 1K seems comfortable,
3329    * with an extra byte just to ensure that the request is off any
3330    * boundary.  The request will be inflated internally by
3331    * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
3332    * request.
3333    */
3334   if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
3335     context_bytes = 0;
3336   if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
3337     fail("INT_MAX- buffer not failed");
3338 
3339   /* Now try extending it a carefully crafted amount */
3340   if (XML_GetBuffer(g_parser, 1000) == NULL)
3341     fail("1000 buffer failed");
3342 }
3343 END_TEST
3344 
3345 /* Test more corners of the XML_GetBuffer interface */
3346 START_TEST(test_get_buffer_2) {
3347   const char *text = get_buffer_test_text;
3348 
3349   /* Now get a decent buffer */
3350   void *const buffer = XML_GetBuffer(g_parser, 1536);
3351   if (buffer == NULL)
3352     fail("1.5K buffer failed");
3353   assert(buffer != NULL);
3354   memcpy(buffer, text, strlen(text));
3355   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
3356       == XML_STATUS_ERROR)
3357     xml_failure(g_parser);
3358 
3359   /* Extend it, to catch a different code path */
3360   if (XML_GetBuffer(g_parser, 1024) == NULL)
3361     fail("1024 buffer failed");
3362 }
3363 END_TEST
3364 
3365 /* Test for signed integer overflow CVE-2022-23852 */
3366 #if XML_CONTEXT_BYTES > 0
3367 START_TEST(test_get_buffer_3_overflow) {
3368   XML_Parser parser = XML_ParserCreate(NULL);
3369   assert(parser != NULL);
3370 
3371   const char *const text = "\n";
3372   const int expectedKeepValue = (int)strlen(text);
3373 
3374   // After this call, variable "keep" in XML_GetBuffer will
3375   // have value expectedKeepValue
3376   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
3377                               XML_FALSE /* isFinal */)
3378       == XML_STATUS_ERROR)
3379     xml_failure(parser);
3380 
3381   assert(expectedKeepValue > 0);
3382   if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
3383     fail("enlarging buffer not failed");
3384 
3385   XML_ParserFree(parser);
3386 }
3387 END_TEST
3388 #endif // XML_CONTEXT_BYTES > 0
3389 
3390 START_TEST(test_buffer_can_grow_to_max) {
3391   const char *const prefixes[] = {
3392       "",
3393       "<",
3394       "<x a='",
3395       "<doc><x a='",
3396       "<document><x a='",
3397       "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
3398       "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
3399       "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
3400       "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
3401       "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
3402   const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
3403   int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
3404 #if defined(__MINGW32__) && ! defined(__MINGW64__)
3405   // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
3406   // Can we make a big allocation?
3407   for (int i = 1; i <= 2; i++) {
3408     void *const big = malloc(maxbuf);
3409     if (big != NULL) {
3410       free(big);
3411       break;
3412     }
3413     // The big allocation failed. Let's be a little lenient.
3414     maxbuf = maxbuf / 2;
3415     fprintf(stderr, "Reducing maxbuf to %d...\n", maxbuf);
3416   }
3417 #endif
3418 
3419   for (int i = 0; i < num_prefixes; ++i) {
3420     set_subtest("\"%s\"", prefixes[i]);
3421     XML_Parser parser = XML_ParserCreate(NULL);
3422 #if XML_GE == 1
3423     assert_true(XML_SetAllocTrackerActivationThreshold(parser, (size_t)-1)
3424                 == XML_TRUE); // i.e. deactivate
3425 #endif
3426     const int prefix_len = (int)strlen(prefixes[i]);
3427     const enum XML_Status s
3428         = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
3429     if (s != XML_STATUS_OK)
3430       xml_failure(parser);
3431 
3432     // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
3433     // subtracting the whole prefix is easiest, and close enough.
3434     assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
3435     // The limit should be consistent; no prefix should allow us to
3436     // reach above the max buffer size.
3437     assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
3438     XML_ParserFree(parser);
3439   }
3440 }
3441 END_TEST
3442 
3443 START_TEST(test_getbuffer_allocates_on_zero_len) {
3444   for (int first_len = 1; first_len >= 0; first_len--) {
3445     set_subtest("with len=%d first", first_len);
3446     XML_Parser parser = XML_ParserCreate(NULL);
3447     assert_true(parser != NULL);
3448     assert_true(XML_GetBuffer(parser, first_len) != NULL);
3449     assert_true(XML_GetBuffer(parser, 0) != NULL);
3450     if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
3451       xml_failure(parser);
3452     XML_ParserFree(parser);
3453   }
3454 }
3455 END_TEST
3456 
3457 /* Test position information macros */
3458 START_TEST(test_byte_info_at_end) {
3459   const char *text = "<doc></doc>";
3460 
3461   if (XML_GetCurrentByteIndex(g_parser) != -1
3462       || XML_GetCurrentByteCount(g_parser) != 0)
3463     fail("Byte index/count incorrect at start of parse");
3464   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3465       == XML_STATUS_ERROR)
3466     xml_failure(g_parser);
3467   /* At end, the count will be zero and the index the end of string */
3468   if (XML_GetCurrentByteCount(g_parser) != 0)
3469     fail("Terminal byte count incorrect");
3470   if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
3471     fail("Terminal byte index incorrect");
3472 }
3473 END_TEST
3474 
3475 /* Test position information from errors */
3476 #define PRE_ERROR_STR "<doc></"
3477 #define POST_ERROR_STR "wombat></doc>"
3478 START_TEST(test_byte_info_at_error) {
3479   const char *text = PRE_ERROR_STR POST_ERROR_STR;
3480 
3481   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3482       == XML_STATUS_OK)
3483     fail("Syntax error not faulted");
3484   if (XML_GetCurrentByteCount(g_parser) != 0)
3485     fail("Error byte count incorrect");
3486   if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3487     fail("Error byte index incorrect");
3488 }
3489 END_TEST
3490 #undef PRE_ERROR_STR
3491 #undef POST_ERROR_STR
3492 
3493 /* Test position information in handler */
3494 #define START_ELEMENT "<e>"
3495 #define CDATA_TEXT "Hello"
3496 #define END_ELEMENT "</e>"
3497 START_TEST(test_byte_info_at_cdata) {
3498   const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3499   int offset, size;
3500   ByteTestData data;
3501 
3502   /* Check initial context is empty */
3503   if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3504     fail("Unexpected context at start of parse");
3505 
3506   data.start_element_len = (int)strlen(START_ELEMENT);
3507   data.cdata_len = (int)strlen(CDATA_TEXT);
3508   data.total_string_len = (int)strlen(text);
3509   XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3510   XML_SetUserData(g_parser, &data);
3511   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3512     xml_failure(g_parser);
3513 }
3514 END_TEST
3515 #undef START_ELEMENT
3516 #undef CDATA_TEXT
3517 #undef END_ELEMENT
3518 
3519 /* Test predefined entities are correctly recognised */
3520 START_TEST(test_predefined_entities) {
3521   const char *text = "<doc>&lt;&gt;&amp;&quot;&apos;</doc>";
3522   const XML_Char *expected = XCS("<doc>&lt;&gt;&amp;&quot;&apos;</doc>");
3523   const XML_Char *result = XCS("<>&\"'");
3524   CharData storage;
3525 
3526   XML_SetDefaultHandler(g_parser, accumulate_characters);
3527   /* run_character_check uses XML_SetCharacterDataHandler(), which
3528    * unfortunately heads off a code path that we need to exercise.
3529    */
3530   CharData_Init(&storage);
3531   XML_SetUserData(g_parser, &storage);
3532   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3533       == XML_STATUS_ERROR)
3534     xml_failure(g_parser);
3535   /* The default handler doesn't translate the entities */
3536   CharData_CheckXMLChars(&storage, expected);
3537 
3538   /* Now try again and check the translation */
3539   XML_ParserReset(g_parser, NULL);
3540   run_character_check(text, result);
3541 }
3542 END_TEST
3543 
3544 /* Regression test that an invalid tag in an external parameter
3545  * reference in an external DTD is correctly faulted.
3546  *
3547  * Only a few specific tags are legal in DTDs ignoring comments and
3548  * processing instructions, all of which begin with an exclamation
3549  * mark.  "<el/>" is not one of them, so the parser should raise an
3550  * error on encountering it.
3551  */
3552 START_TEST(test_invalid_tag_in_dtd) {
3553   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3554                      "<doc></doc>\n";
3555 
3556   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3557   XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3558   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3559                  "Invalid tag IN DTD external param not rejected");
3560 }
3561 END_TEST
3562 
3563 /* Test entities not quite the predefined ones are not mis-recognised */
3564 START_TEST(test_not_predefined_entities) {
3565   const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3566                         "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3567   int i = 0;
3568 
3569   while (text[i] != NULL) {
3570     expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3571                    "Undefined entity not rejected");
3572     XML_ParserReset(g_parser, NULL);
3573     i++;
3574   }
3575 }
3576 END_TEST
3577 
3578 /* Test conditional inclusion (IGNORE) */
3579 START_TEST(test_ignore_section) {
3580   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3581                      "<doc><e>&entity;</e></doc>";
3582   const XML_Char *expected
3583       = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3584   CharData storage;
3585 
3586   CharData_Init(&storage);
3587   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3588   XML_SetUserData(g_parser, &storage);
3589   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3590   XML_SetDefaultHandler(g_parser, accumulate_characters);
3591   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3592   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3593   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3594   XML_SetStartElementHandler(g_parser, dummy_start_element);
3595   XML_SetEndElementHandler(g_parser, dummy_end_element);
3596   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3597       == XML_STATUS_ERROR)
3598     xml_failure(g_parser);
3599   CharData_CheckXMLChars(&storage, expected);
3600 }
3601 END_TEST
3602 
3603 START_TEST(test_ignore_section_utf16) {
3604   const char text[] =
3605       /* <!DOCTYPE d SYSTEM 's'> */
3606       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3607       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3608       /* <d><e>&en;</e></d> */
3609       "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3610   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3611   CharData storage;
3612 
3613   CharData_Init(&storage);
3614   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3615   XML_SetUserData(g_parser, &storage);
3616   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3617   XML_SetDefaultHandler(g_parser, accumulate_characters);
3618   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3619   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3620   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3621   XML_SetStartElementHandler(g_parser, dummy_start_element);
3622   XML_SetEndElementHandler(g_parser, dummy_end_element);
3623   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3624       == XML_STATUS_ERROR)
3625     xml_failure(g_parser);
3626   CharData_CheckXMLChars(&storage, expected);
3627 }
3628 END_TEST
3629 
3630 START_TEST(test_ignore_section_utf16_be) {
3631   const char text[] =
3632       /* <!DOCTYPE d SYSTEM 's'> */
3633       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3634       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3635       /* <d><e>&en;</e></d> */
3636       "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3637   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3638   CharData storage;
3639 
3640   CharData_Init(&storage);
3641   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3642   XML_SetUserData(g_parser, &storage);
3643   XML_SetExternalEntityRefHandler(g_parser,
3644                                   external_entity_load_ignore_utf16_be);
3645   XML_SetDefaultHandler(g_parser, accumulate_characters);
3646   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3647   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3648   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3649   XML_SetStartElementHandler(g_parser, dummy_start_element);
3650   XML_SetEndElementHandler(g_parser, dummy_end_element);
3651   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3652       == XML_STATUS_ERROR)
3653     xml_failure(g_parser);
3654   CharData_CheckXMLChars(&storage, expected);
3655 }
3656 END_TEST
3657 
3658 /* Test mis-formatted conditional exclusion */
3659 START_TEST(test_bad_ignore_section) {
3660   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3661                      "<doc><e>&entity;</e></doc>";
3662   ExtFaults faults[]
3663       = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3664           XML_ERROR_SYNTAX},
3665          {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3666           XML_ERROR_INVALID_TOKEN},
3667          {/* FIrst two bytes of a three-byte char */
3668           "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3669           XML_ERROR_PARTIAL_CHAR},
3670          {NULL, NULL, NULL, XML_ERROR_NONE}};
3671   ExtFaults *fault;
3672 
3673   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3674     set_subtest("%s", fault->parse_text);
3675     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3676     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3677     XML_SetUserData(g_parser, fault);
3678     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3679                    "Incomplete IGNORE section not failed");
3680     XML_ParserReset(g_parser, NULL);
3681   }
3682 }
3683 END_TEST
3684 
3685 struct bom_testdata {
3686   const char *external;
3687   int split;
3688   XML_Bool nested_callback_happened;
3689 };
3690 
3691 static int XMLCALL
3692 external_bom_checker(XML_Parser parser, const XML_Char *context,
3693                      const XML_Char *base, const XML_Char *systemId,
3694                      const XML_Char *publicId) {
3695   const char *text;
3696   UNUSED_P(base);
3697   UNUSED_P(systemId);
3698   UNUSED_P(publicId);
3699 
3700   XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3701   if (ext_parser == NULL)
3702     fail("Could not create external entity parser");
3703 
3704   if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3705     struct bom_testdata *const testdata = XML_GetUserData(parser);
3706     const char *const external = testdata->external;
3707     const int split = testdata->split;
3708     testdata->nested_callback_happened = XML_TRUE;
3709 
3710     if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3711         != XML_STATUS_OK) {
3712       xml_failure(ext_parser);
3713     }
3714     text = external + split; // the parse below will continue where we left off.
3715   } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3716     text = "<!ELEMENT doc EMPTY>\n"
3717            "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3718            "<!ENTITY % e2 '%e1;'>\n";
3719   } else {
3720     fail("unknown systemId");
3721   }
3722 
3723   if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3724       != XML_STATUS_OK)
3725     xml_failure(ext_parser);
3726 
3727   XML_ParserFree(ext_parser);
3728   return XML_STATUS_OK;
3729 }
3730 
3731 /* regression test: BOM should be consumed when followed by a partial token. */
3732 START_TEST(test_external_bom_consumed) {
3733   const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3734                            "<doc></doc>\n";
3735   const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3736   const int len = (int)strlen(external);
3737   for (int split = 0; split <= len; ++split) {
3738     set_subtest("split at byte %d", split);
3739 
3740     struct bom_testdata testdata;
3741     testdata.external = external;
3742     testdata.split = split;
3743     testdata.nested_callback_happened = XML_FALSE;
3744 
3745     XML_Parser parser = XML_ParserCreate(NULL);
3746     if (parser == NULL) {
3747       fail("Couldn't create parser");
3748     }
3749     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3750     XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3751     XML_SetUserData(parser, &testdata);
3752     if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3753         == XML_STATUS_ERROR)
3754       xml_failure(parser);
3755     if (! testdata.nested_callback_happened) {
3756       fail("ref handler not called");
3757     }
3758     XML_ParserFree(parser);
3759   }
3760 }
3761 END_TEST
3762 
3763 /* Test recursive parsing */
3764 START_TEST(test_external_entity_values) {
3765   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3766                      "<doc></doc>\n";
3767   ExtFaults data_004_2[] = {
3768       {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3769       {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3770        XML_ERROR_INVALID_TOKEN},
3771       {"'wombat", "Unterminated string not faulted", NULL,
3772        XML_ERROR_UNCLOSED_TOKEN},
3773       {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3774        XML_ERROR_PARTIAL_CHAR},
3775       {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3776       {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3777        XML_ERROR_XML_DECL},
3778       {/* UTF-8 BOM */
3779        "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3780        XML_ERROR_NONE},
3781       {"<?xml version='1.0' encoding='utf-8'?>\n$",
3782        "Invalid token after text declaration not faulted", NULL,
3783        XML_ERROR_INVALID_TOKEN},
3784       {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3785        "Unterminated string after text decl not faulted", NULL,
3786        XML_ERROR_UNCLOSED_TOKEN},
3787       {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3788        "Partial UTF-8 character after text decl not faulted", NULL,
3789        XML_ERROR_PARTIAL_CHAR},
3790       {"%e1;", "Recursive parameter entity not faulted", NULL,
3791        XML_ERROR_RECURSIVE_ENTITY_REF},
3792       {NULL, NULL, NULL, XML_ERROR_NONE}};
3793   int i;
3794 
3795   for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3796     set_subtest("%s", data_004_2[i].parse_text);
3797     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3798     XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3799     XML_SetUserData(g_parser, &data_004_2[i]);
3800     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3801         == XML_STATUS_ERROR)
3802       xml_failure(g_parser);
3803     XML_ParserReset(g_parser, NULL);
3804   }
3805 }
3806 END_TEST
3807 
3808 /* Test the recursive parse interacts with a not standalone handler */
3809 START_TEST(test_ext_entity_not_standalone) {
3810   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3811                      "<doc></doc>";
3812 
3813   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3814   XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3815   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3816                  "Standalone rejection not caught");
3817 }
3818 END_TEST
3819 
3820 START_TEST(test_ext_entity_value_abort) {
3821   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3822                      "<doc></doc>\n";
3823 
3824   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3825   XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3826   g_resumable = XML_FALSE;
3827   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3828       == XML_STATUS_ERROR)
3829     xml_failure(g_parser);
3830 }
3831 END_TEST
3832 
3833 START_TEST(test_bad_public_doctype) {
3834   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3835                      "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3836                      "<doc></doc>";
3837 
3838   /* Setting a handler provokes a particular code path */
3839   XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3840                             dummy_end_doctype_handler);
3841   expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3842 }
3843 END_TEST
3844 
3845 /* Test based on ibm/valid/P32/ibm32v04.xml */
3846 START_TEST(test_attribute_enum_value) {
3847   const char *text = "<?xml version='1.0' standalone='no'?>\n"
3848                      "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3849                      "<animal>This is a \n    <a/>  \n\nyellow tiger</animal>";
3850   ExtTest dtd_data
3851       = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3852          "<!ELEMENT a EMPTY>\n"
3853          "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3854          NULL, NULL};
3855   const XML_Char *expected = XCS("This is a \n      \n\nyellow tiger");
3856 
3857   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3858   XML_SetUserData(g_parser, &dtd_data);
3859   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3860   /* An attribute list handler provokes a different code path */
3861   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3862   run_ext_character_check(text, &dtd_data, expected);
3863 }
3864 END_TEST
3865 
3866 /* Slightly bizarrely, the library seems to silently ignore entity
3867  * definitions for predefined entities, even when they are wrong.  The
3868  * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3869  * to happen, so this is currently treated as acceptable.
3870  */
3871 START_TEST(test_predefined_entity_redefinition) {
3872   const char *text = "<!DOCTYPE doc [\n"
3873                      "<!ENTITY apos 'foo'>\n"
3874                      "]>\n"
3875                      "<doc>&apos;</doc>";
3876   run_character_check(text, XCS("'"));
3877 }
3878 END_TEST
3879 
3880 /* Test that the parser stops processing the DTD after an unresolved
3881  * parameter entity is encountered.
3882  */
3883 START_TEST(test_dtd_stop_processing) {
3884   const char *text = "<!DOCTYPE doc [\n"
3885                      "%foo;\n"
3886                      "<!ENTITY bar 'bas'>\n"
3887                      "]><doc/>";
3888 
3889   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3890   init_dummy_handlers();
3891   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3892       == XML_STATUS_ERROR)
3893     xml_failure(g_parser);
3894   if (get_dummy_handler_flags() != 0)
3895     fail("DTD processing still going after undefined PE");
3896 }
3897 END_TEST
3898 
3899 /* Test public notations with no system ID */
3900 START_TEST(test_public_notation_no_sysid) {
3901   const char *text = "<!DOCTYPE doc [\n"
3902                      "<!NOTATION note PUBLIC 'foo'>\n"
3903                      "<!ELEMENT doc EMPTY>\n"
3904                      "]>\n<doc/>";
3905 
3906   init_dummy_handlers();
3907   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3908   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3909       == XML_STATUS_ERROR)
3910     xml_failure(g_parser);
3911   if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3912     fail("Notation declaration handler not called");
3913 }
3914 END_TEST
3915 
3916 START_TEST(test_nested_groups) {
3917   const char *text
3918       = "<!DOCTYPE doc [\n"
3919         "<!ELEMENT doc "
3920         /* Sixteen elements per line */
3921         "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3922         "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3923         "))))))))))))))))))))))))))))))))>\n"
3924         "<!ELEMENT e EMPTY>"
3925         "]>\n"
3926         "<doc><e/></doc>";
3927   CharData storage;
3928 
3929   CharData_Init(&storage);
3930   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3931   XML_SetStartElementHandler(g_parser, record_element_start_handler);
3932   XML_SetUserData(g_parser, &storage);
3933   init_dummy_handlers();
3934   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3935       == XML_STATUS_ERROR)
3936     xml_failure(g_parser);
3937   CharData_CheckXMLChars(&storage, XCS("doce"));
3938   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3939     fail("Element handler not fired");
3940 }
3941 END_TEST
3942 
3943 START_TEST(test_group_choice) {
3944   const char *text = "<!DOCTYPE doc [\n"
3945                      "<!ELEMENT doc (a|b|c)+>\n"
3946                      "<!ELEMENT a EMPTY>\n"
3947                      "<!ELEMENT b (#PCDATA)>\n"
3948                      "<!ELEMENT c ANY>\n"
3949                      "]>\n"
3950                      "<doc>\n"
3951                      "<a/>\n"
3952                      "<b attr='foo'>This is a foo</b>\n"
3953                      "<c></c>\n"
3954                      "</doc>\n";
3955 
3956   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3957   init_dummy_handlers();
3958   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3959       == XML_STATUS_ERROR)
3960     xml_failure(g_parser);
3961   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3962     fail("Element handler flag not raised");
3963 }
3964 END_TEST
3965 
3966 START_TEST(test_standalone_parameter_entity) {
3967   const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3968                      "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3969                      "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3970                      "%entity;\n"
3971                      "]>\n"
3972                      "<doc></doc>";
3973   char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3974 
3975   XML_SetUserData(g_parser, dtd_data);
3976   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3977   XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3978   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3979       == XML_STATUS_ERROR)
3980     xml_failure(g_parser);
3981 }
3982 END_TEST
3983 
3984 /* Test skipping of parameter entity in an external DTD */
3985 /* Derived from ibm/invalid/P69/ibm69i01.xml */
3986 START_TEST(test_skipped_parameter_entity) {
3987   const char *text = "<?xml version='1.0'?>\n"
3988                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3989                      "<!ELEMENT root (#PCDATA|a)* >\n"
3990                      "]>\n"
3991                      "<root></root>";
3992   ExtTest dtd_data = {"%pe2;", NULL, NULL};
3993 
3994   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3995   XML_SetUserData(g_parser, &dtd_data);
3996   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3997   XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3998   init_dummy_handlers();
3999   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4000       == XML_STATUS_ERROR)
4001     xml_failure(g_parser);
4002   if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
4003     fail("Skip handler not executed");
4004 }
4005 END_TEST
4006 
4007 /* Test recursive parameter entity definition rejected in external DTD */
4008 START_TEST(test_recursive_external_parameter_entity) {
4009   const char *text = "<?xml version='1.0'?>\n"
4010                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
4011                      "<!ELEMENT root (#PCDATA|a)* >\n"
4012                      "]>\n"
4013                      "<root></root>";
4014   ExtFaults dtd_data = {"<!ENTITY % pe2 '&#37;pe2;'>\n%pe2;",
4015                         "Recursive external parameter entity not faulted", NULL,
4016                         XML_ERROR_RECURSIVE_ENTITY_REF};
4017 
4018   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
4019   XML_SetUserData(g_parser, &dtd_data);
4020   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4021   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4022                  "Recursive external parameter not spotted");
4023 }
4024 END_TEST
4025 
4026 /* Test undefined parameter entity in external entity handler */
4027 START_TEST(test_undefined_ext_entity_in_external_dtd) {
4028   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
4029                      "<doc></doc>\n";
4030 
4031   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4032   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
4033   XML_SetUserData(g_parser, NULL);
4034   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4035       == XML_STATUS_ERROR)
4036     xml_failure(g_parser);
4037 
4038   /* Now repeat without the external entity ref handler invoking
4039    * another copy of itself.
4040    */
4041   XML_ParserReset(g_parser, NULL);
4042   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4043   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
4044   XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
4045   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4046       == XML_STATUS_ERROR)
4047     xml_failure(g_parser);
4048 }
4049 END_TEST
4050 
4051 /* Test suspending the parse on receiving an XML declaration works */
4052 START_TEST(test_suspend_xdecl) {
4053   const char *text = long_character_data_text;
4054 
4055   XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
4056   XML_SetUserData(g_parser, g_parser);
4057   g_resumable = XML_TRUE;
4058   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
4059   // we won't know exactly how much input we actually managed to give Expat.
4060   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4061       != XML_STATUS_SUSPENDED)
4062     xml_failure(g_parser);
4063   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
4064     xml_failure(g_parser);
4065   /* Attempt to start a new parse while suspended */
4066   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4067       != XML_STATUS_ERROR)
4068     fail("Attempt to parse while suspended not faulted");
4069   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
4070     fail("Suspended parse not faulted with correct error");
4071 }
4072 END_TEST
4073 
4074 /* Test aborting the parse in an epilog works */
4075 START_TEST(test_abort_epilog) {
4076   const char *text = "<doc></doc>\n\r\n";
4077   XML_Char trigger_char = XCS('\r');
4078 
4079   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
4080   XML_SetUserData(g_parser, &trigger_char);
4081   g_resumable = XML_FALSE;
4082   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4083       != XML_STATUS_ERROR)
4084     fail("Abort not triggered");
4085   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
4086     xml_failure(g_parser);
4087 }
4088 END_TEST
4089 
4090 /* Test a different code path for abort in the epilog */
4091 START_TEST(test_abort_epilog_2) {
4092   const char *text = "<doc></doc>\n";
4093   XML_Char trigger_char = XCS('\n');
4094 
4095   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
4096   XML_SetUserData(g_parser, &trigger_char);
4097   g_resumable = XML_FALSE;
4098   expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
4099 }
4100 END_TEST
4101 
4102 /* Test suspension from the epilog */
4103 START_TEST(test_suspend_epilog) {
4104   const char *text = "<doc></doc>\n";
4105   XML_Char trigger_char = XCS('\n');
4106 
4107   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
4108   XML_SetUserData(g_parser, &trigger_char);
4109   g_resumable = XML_TRUE;
4110   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4111       != XML_STATUS_SUSPENDED)
4112     xml_failure(g_parser);
4113 }
4114 END_TEST
4115 
4116 START_TEST(test_suspend_in_sole_empty_tag) {
4117   const char *text = "<doc/>";
4118   enum XML_Status rc;
4119 
4120   XML_SetEndElementHandler(g_parser, suspending_end_handler);
4121   XML_SetUserData(g_parser, g_parser);
4122   rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
4123   if (rc == XML_STATUS_ERROR)
4124     xml_failure(g_parser);
4125   else if (rc != XML_STATUS_SUSPENDED)
4126     fail("Suspend not triggered");
4127   rc = XML_ResumeParser(g_parser);
4128   if (rc == XML_STATUS_ERROR)
4129     xml_failure(g_parser);
4130   else if (rc != XML_STATUS_OK)
4131     fail("Resume failed");
4132 }
4133 END_TEST
4134 
4135 START_TEST(test_unfinished_epilog) {
4136   const char *text = "<doc></doc><";
4137 
4138   expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
4139                  "Incomplete epilog entry not faulted");
4140 }
4141 END_TEST
4142 
4143 START_TEST(test_partial_char_in_epilog) {
4144   const char *text = "<doc></doc>\xe2\x82";
4145 
4146   /* First check that no fault is raised if the parse is not finished */
4147   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
4148       == XML_STATUS_ERROR)
4149     xml_failure(g_parser);
4150   /* Now check that it is faulted once we finish */
4151   if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
4152     fail("Partial character in epilog not faulted");
4153   if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
4154     xml_failure(g_parser);
4155 }
4156 END_TEST
4157 
4158 /* Test resuming a parse suspended in entity substitution */
4159 START_TEST(test_suspend_resume_internal_entity) {
4160   const char *text
4161       = "<!DOCTYPE doc [\n"
4162         "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
4163         "]>\n"
4164         "<doc>&foo;</doc>\n";
4165   const XML_Char *expected1 = XCS("Hi");
4166   const XML_Char *expected2 = XCS("HiHo");
4167   CharData storage;
4168 
4169   CharData_Init(&storage);
4170   XML_SetStartElementHandler(g_parser, start_element_suspender);
4171   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
4172   XML_SetUserData(g_parser, &storage);
4173   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
4174   // we won't know exactly how much input we actually managed to give Expat.
4175   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4176       != XML_STATUS_SUSPENDED)
4177     xml_failure(g_parser);
4178   CharData_CheckXMLChars(&storage, XCS(""));
4179   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
4180     xml_failure(g_parser);
4181   CharData_CheckXMLChars(&storage, expected1);
4182   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
4183     xml_failure(g_parser);
4184   CharData_CheckXMLChars(&storage, expected2);
4185 }
4186 END_TEST
4187 
4188 START_TEST(test_suspend_resume_internal_entity_issue_629) {
4189   const char *const text
4190       = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
4191         "<"
4192         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4193         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4194         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4195         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4196         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4197         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4198         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4199         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4200         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4201         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4202         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4203         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4204         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4205         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4206         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4207         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4208         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4209         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4210         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4211         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4212         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4213         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4214         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4215         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4216         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4217         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4218         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4219         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4220         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4221         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4222         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4223         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4224         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4225         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4226         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4227         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4228         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4229         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4230         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4231         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4232         "/>"
4233         "</b></a>";
4234   const size_t firstChunkSizeBytes = 54;
4235 
4236   XML_Parser parser = XML_ParserCreate(NULL);
4237   XML_SetUserData(parser, parser);
4238   XML_SetCommentHandler(parser, suspending_comment_handler);
4239 
4240   if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
4241       != XML_STATUS_SUSPENDED)
4242     xml_failure(parser);
4243   if (XML_ResumeParser(parser) != XML_STATUS_OK)
4244     xml_failure(parser);
4245   if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
4246                               (int)(strlen(text) - firstChunkSizeBytes),
4247                               XML_TRUE)
4248       != XML_STATUS_OK)
4249     xml_failure(parser);
4250   XML_ParserFree(parser);
4251 }
4252 END_TEST
4253 
4254 /* Test syntax error is caught at parse resumption */
4255 START_TEST(test_resume_entity_with_syntax_error) {
4256   if (g_chunkSize != 0) {
4257     // this test does not use SINGLE_BYTES, because of suspension
4258     return;
4259   }
4260 
4261   const char *text = "<!DOCTYPE doc [\n"
4262                      "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
4263                      "]>\n"
4264                      "<doc>&foo;</doc>\n";
4265 
4266   XML_SetStartElementHandler(g_parser, start_element_suspender);
4267   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
4268   // we won't know exactly how much input we actually managed to give Expat.
4269   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4270       != XML_STATUS_SUSPENDED)
4271     xml_failure(g_parser);
4272   if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
4273     fail("Syntax error in entity not faulted");
4274   if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
4275     xml_failure(g_parser);
4276 }
4277 END_TEST
4278 
4279 /* Test suspending and resuming in a parameter entity substitution */
4280 START_TEST(test_suspend_resume_parameter_entity) {
4281   const char *text = "<!DOCTYPE doc [\n"
4282                      "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
4283                      "%foo;\n"
4284                      "]>\n"
4285                      "<doc>Hello, world</doc>";
4286   const XML_Char *expected = XCS("Hello, world");
4287   CharData storage;
4288 
4289   CharData_Init(&storage);
4290   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4291   XML_SetElementDeclHandler(g_parser, element_decl_suspender);
4292   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
4293   XML_SetUserData(g_parser, &storage);
4294   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4295       != XML_STATUS_SUSPENDED)
4296     xml_failure(g_parser);
4297   CharData_CheckXMLChars(&storage, XCS(""));
4298   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
4299     xml_failure(g_parser);
4300   CharData_CheckXMLChars(&storage, expected);
4301 }
4302 END_TEST
4303 
4304 /* Test attempting to use parser after an error is faulted */
4305 START_TEST(test_restart_on_error) {
4306   const char *text = "<$doc><doc></doc>";
4307 
4308   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4309       != XML_STATUS_ERROR)
4310     fail("Invalid tag name not faulted");
4311   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
4312     xml_failure(g_parser);
4313   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
4314     fail("Restarting invalid parse not faulted");
4315   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
4316     xml_failure(g_parser);
4317 }
4318 END_TEST
4319 
4320 /* Test that angle brackets in an attribute default value are faulted */
4321 START_TEST(test_reject_lt_in_attribute_value) {
4322   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
4323                      "<doc></doc>";
4324 
4325   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4326                  "Bad attribute default not faulted");
4327 }
4328 END_TEST
4329 
4330 START_TEST(test_reject_unfinished_param_in_att_value) {
4331   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
4332                      "<doc></doc>";
4333 
4334   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4335                  "Bad attribute default not faulted");
4336 }
4337 END_TEST
4338 
4339 START_TEST(test_trailing_cr_in_att_value) {
4340   const char *text = "<doc a='value\r'/>";
4341 
4342   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4343       == XML_STATUS_ERROR)
4344     xml_failure(g_parser);
4345 }
4346 END_TEST
4347 
4348 /* Try parsing a general entity within a parameter entity in a
4349  * standalone internal DTD.  Covers a corner case in the parser.
4350  */
4351 START_TEST(test_standalone_internal_entity) {
4352   const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
4353                      "<!DOCTYPE doc [\n"
4354                      "  <!ELEMENT doc (#PCDATA)>\n"
4355                      "  <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"&ge;\">'>\n"
4356                      "  <!ENTITY ge 'AttDefaultValue'>\n"
4357                      "  %pe;\n"
4358                      "]>\n"
4359                      "<doc att2='any'/>";
4360 
4361   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4362   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4363       == XML_STATUS_ERROR)
4364     xml_failure(g_parser);
4365 }
4366 END_TEST
4367 
4368 /* Test that a reference to an unknown external entity is skipped */
4369 START_TEST(test_skipped_external_entity) {
4370   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
4371                      "<doc></doc>\n";
4372   ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
4373                        "<!ENTITY % e2 '%e1;'>\n",
4374                        NULL, NULL};
4375 
4376   XML_SetUserData(g_parser, &test_data);
4377   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4378   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4379   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4380       == XML_STATUS_ERROR)
4381     xml_failure(g_parser);
4382 }
4383 END_TEST
4384 
4385 START_TEST(test_scaff_index_shared_across_external_entity_parser) {
4386   const char text[]
4387       = "<!DOCTYPE doc [\n"
4388         "<!ELEMENT a "
4389         "((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((b))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))>\n"
4390         "<!ENTITY % e SYSTEM 'ext'>\n"
4391         "%e;\n"
4392         "<!ELEMENT c "
4393         "(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((d)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))>\n"
4394         "]>\n"
4395         "<doc/>";
4396   ExtOption options[]
4397       = {{XCS("ext"),
4398           "<!ELEMENT x "
4399           "((((((((((((((((((((((((((((((((y))))))))))))))))))))))))))))))))>"},
4400          {NULL, NULL}};
4401 
4402   XML_Parser parser = XML_ParserCreate(NULL);
4403   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4404   XML_SetUserData(parser, options);
4405   XML_SetExternalEntityRefHandler(parser, external_entity_optioner);
4406   XML_SetElementDeclHandler(parser, dummy_element_decl_handler);
4407 
4408   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
4409       == XML_STATUS_ERROR)
4410     xml_failure(parser);
4411 
4412   XML_ParserFree(parser);
4413 }
4414 END_TEST
4415 
4416 /* Test a different form of unknown external entity */
4417 START_TEST(test_skipped_null_loaded_ext_entity) {
4418   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
4419                      "<doc />";
4420   ExtHdlrData test_data
4421       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
4422          "<!ENTITY % pe2 '%pe1;'>\n"
4423          "%pe2;\n",
4424          external_entity_null_loader, NULL};
4425 
4426   XML_SetUserData(g_parser, &test_data);
4427   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4428   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
4429   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4430       == XML_STATUS_ERROR)
4431     xml_failure(g_parser);
4432 }
4433 END_TEST
4434 
4435 START_TEST(test_skipped_unloaded_ext_entity) {
4436   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
4437                      "<doc />";
4438   ExtHdlrData test_data
4439       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
4440          "<!ENTITY % pe2 '%pe1;'>\n"
4441          "%pe2;\n",
4442          NULL, NULL};
4443 
4444   XML_SetUserData(g_parser, &test_data);
4445   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4446   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
4447   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4448       == XML_STATUS_ERROR)
4449     xml_failure(g_parser);
4450 }
4451 END_TEST
4452 
4453 /* Test that a parameter entity value ending with a carriage return
4454  * has it translated internally into a newline.
4455  */
4456 START_TEST(test_param_entity_with_trailing_cr) {
4457 #define PARAM_ENTITY_NAME "pe"
4458 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
4459   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
4460                      "<doc/>";
4461   ExtTest test_data
4462       = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
4463          "%" PARAM_ENTITY_NAME ";\n",
4464          NULL, NULL};
4465 
4466   XML_SetUserData(g_parser, &test_data);
4467   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4468   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4469   XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
4470   param_entity_match_init(XCS(PARAM_ENTITY_NAME),
4471                           XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
4472   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4473       == XML_STATUS_ERROR)
4474     xml_failure(g_parser);
4475   int entity_match_flag = get_param_entity_match_flag();
4476   if (entity_match_flag == ENTITY_MATCH_FAIL)
4477     fail("Parameter entity CR->NEWLINE conversion failed");
4478   else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
4479     fail("Parameter entity not parsed");
4480 }
4481 #undef PARAM_ENTITY_NAME
4482 #undef PARAM_ENTITY_CORE_VALUE
4483 END_TEST
4484 
4485 START_TEST(test_invalid_character_entity) {
4486   const char *text = "<!DOCTYPE doc [\n"
4487                      "  <!ENTITY entity '&#x110000;'>\n"
4488                      "]>\n"
4489                      "<doc>&entity;</doc>";
4490 
4491   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4492                  "Out of range character reference not faulted");
4493 }
4494 END_TEST
4495 
4496 START_TEST(test_invalid_character_entity_2) {
4497   const char *text = "<!DOCTYPE doc [\n"
4498                      "  <!ENTITY entity '&#xg0;'>\n"
4499                      "]>\n"
4500                      "<doc>&entity;</doc>";
4501 
4502   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4503                  "Out of range character reference not faulted");
4504 }
4505 END_TEST
4506 
4507 START_TEST(test_invalid_character_entity_3) {
4508   const char text[] =
4509       /* <!DOCTYPE doc [\n */
4510       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4511       /* U+0E04 = KHO KHWAI
4512        * U+0E08 = CHO CHAN */
4513       /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
4514       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
4515       "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
4516       /* ]>\n */
4517       "\0]\0>\0\n"
4518       /* <doc>&entity;</doc> */
4519       "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
4520 
4521   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4522       != XML_STATUS_ERROR)
4523     fail("Invalid start of entity name not faulted");
4524   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4525     xml_failure(g_parser);
4526 }
4527 END_TEST
4528 
4529 START_TEST(test_invalid_character_entity_4) {
4530   const char *text = "<!DOCTYPE doc [\n"
4531                      "  <!ENTITY entity '&#1114112;'>\n" /* = &#x110000 */
4532                      "]>\n"
4533                      "<doc>&entity;</doc>";
4534 
4535   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4536                  "Out of range character reference not faulted");
4537 }
4538 END_TEST
4539 
4540 /* Test that processing instructions are picked up by a default handler */
4541 START_TEST(test_pi_handled_in_default) {
4542   const char *text = "<?test processing instruction?>\n<doc/>";
4543   const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4544   CharData storage;
4545 
4546   CharData_Init(&storage);
4547   XML_SetDefaultHandler(g_parser, accumulate_characters);
4548   XML_SetUserData(g_parser, &storage);
4549   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4550       == XML_STATUS_ERROR)
4551     xml_failure(g_parser);
4552   CharData_CheckXMLChars(&storage, expected);
4553 }
4554 END_TEST
4555 
4556 /* Test that comments are picked up by a default handler */
4557 START_TEST(test_comment_handled_in_default) {
4558   const char *text = "<!-- This is a comment -->\n<doc/>";
4559   const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4560   CharData storage;
4561 
4562   CharData_Init(&storage);
4563   XML_SetDefaultHandler(g_parser, accumulate_characters);
4564   XML_SetUserData(g_parser, &storage);
4565   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4566       == XML_STATUS_ERROR)
4567     xml_failure(g_parser);
4568   CharData_CheckXMLChars(&storage, expected);
4569 }
4570 END_TEST
4571 
4572 /* Test PIs that look almost but not quite like XML declarations */
4573 START_TEST(test_pi_yml) {
4574   const char *text = "<?yml something like data?><doc/>";
4575   const XML_Char *expected = XCS("yml: something like data\n");
4576   CharData storage;
4577 
4578   CharData_Init(&storage);
4579   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4580   XML_SetUserData(g_parser, &storage);
4581   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4582       == XML_STATUS_ERROR)
4583     xml_failure(g_parser);
4584   CharData_CheckXMLChars(&storage, expected);
4585 }
4586 END_TEST
4587 
4588 START_TEST(test_pi_xnl) {
4589   const char *text = "<?xnl nothing like data?><doc/>";
4590   const XML_Char *expected = XCS("xnl: nothing like data\n");
4591   CharData storage;
4592 
4593   CharData_Init(&storage);
4594   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4595   XML_SetUserData(g_parser, &storage);
4596   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4597       == XML_STATUS_ERROR)
4598     xml_failure(g_parser);
4599   CharData_CheckXMLChars(&storage, expected);
4600 }
4601 END_TEST
4602 
4603 START_TEST(test_pi_xmm) {
4604   const char *text = "<?xmm everything like data?><doc/>";
4605   const XML_Char *expected = XCS("xmm: everything like data\n");
4606   CharData storage;
4607 
4608   CharData_Init(&storage);
4609   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4610   XML_SetUserData(g_parser, &storage);
4611   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4612       == XML_STATUS_ERROR)
4613     xml_failure(g_parser);
4614   CharData_CheckXMLChars(&storage, expected);
4615 }
4616 END_TEST
4617 
4618 START_TEST(test_utf16_pi) {
4619   const char text[] =
4620       /* <?{KHO KHWAI}{CHO CHAN}?>
4621        * where {KHO KHWAI} = U+0E04
4622        * and   {CHO CHAN}  = U+0E08
4623        */
4624       "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4625       /* <q/> */
4626       "<\0q\0/\0>\0";
4627 #ifdef XML_UNICODE
4628   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4629 #else
4630   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4631 #endif
4632   CharData storage;
4633 
4634   CharData_Init(&storage);
4635   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4636   XML_SetUserData(g_parser, &storage);
4637   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4638       == XML_STATUS_ERROR)
4639     xml_failure(g_parser);
4640   CharData_CheckXMLChars(&storage, expected);
4641 }
4642 END_TEST
4643 
4644 START_TEST(test_utf16_be_pi) {
4645   const char text[] =
4646       /* <?{KHO KHWAI}{CHO CHAN}?>
4647        * where {KHO KHWAI} = U+0E04
4648        * and   {CHO CHAN}  = U+0E08
4649        */
4650       "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4651       /* <q/> */
4652       "\0<\0q\0/\0>";
4653 #ifdef XML_UNICODE
4654   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4655 #else
4656   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4657 #endif
4658   CharData storage;
4659 
4660   CharData_Init(&storage);
4661   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4662   XML_SetUserData(g_parser, &storage);
4663   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4664       == XML_STATUS_ERROR)
4665     xml_failure(g_parser);
4666   CharData_CheckXMLChars(&storage, expected);
4667 }
4668 END_TEST
4669 
4670 /* Test that comments can be picked up and translated */
4671 START_TEST(test_utf16_be_comment) {
4672   const char text[] =
4673       /* <!-- Comment A --> */
4674       "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4675       /* <doc/> */
4676       "\0<\0d\0o\0c\0/\0>";
4677   const XML_Char *expected = XCS(" Comment A ");
4678   CharData storage;
4679 
4680   CharData_Init(&storage);
4681   XML_SetCommentHandler(g_parser, accumulate_comment);
4682   XML_SetUserData(g_parser, &storage);
4683   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4684       == XML_STATUS_ERROR)
4685     xml_failure(g_parser);
4686   CharData_CheckXMLChars(&storage, expected);
4687 }
4688 END_TEST
4689 
4690 START_TEST(test_utf16_le_comment) {
4691   const char text[] =
4692       /* <!-- Comment B --> */
4693       "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4694       /* <doc/> */
4695       "<\0d\0o\0c\0/\0>\0";
4696   const XML_Char *expected = XCS(" Comment B ");
4697   CharData storage;
4698 
4699   CharData_Init(&storage);
4700   XML_SetCommentHandler(g_parser, accumulate_comment);
4701   XML_SetUserData(g_parser, &storage);
4702   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4703       == XML_STATUS_ERROR)
4704     xml_failure(g_parser);
4705   CharData_CheckXMLChars(&storage, expected);
4706 }
4707 END_TEST
4708 
4709 /* Test that the unknown encoding handler with map entries that expect
4710  * conversion but no conversion function is faulted
4711  */
4712 START_TEST(test_missing_encoding_conversion_fn) {
4713   const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4714                      "<doc>\x81</doc>";
4715 
4716   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4717   /* MiscEncodingHandler sets up an encoding with every top-bit-set
4718    * character introducing a two-byte sequence.  For this, it
4719    * requires a convert function.  The above function call doesn't
4720    * pass one through, so when BadEncodingHandler actually gets
4721    * called it should supply an invalid encoding.
4722    */
4723   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4724                  "Encoding with missing convert() not faulted");
4725 }
4726 END_TEST
4727 
4728 START_TEST(test_failing_encoding_conversion_fn) {
4729   const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4730                      "<doc>\x81</doc>";
4731 
4732   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4733   /* BadEncodingHandler sets up an encoding with every top-bit-set
4734    * character introducing a two-byte sequence.  For this, it
4735    * requires a convert function.  The above function call passes
4736    * one that insists all possible sequences are invalid anyway.
4737    */
4738   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4739                  "Encoding with failing convert() not faulted");
4740 }
4741 END_TEST
4742 
4743 /* Test unknown encoding conversions */
4744 START_TEST(test_unknown_encoding_success) {
4745   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4746                      /* Equivalent to <eoc>Hello, world</eoc> */
4747                      "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4748 
4749   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4750   run_character_check(text, XCS("Hello, world"));
4751 }
4752 END_TEST
4753 
4754 /* Test bad name character in unknown encoding */
4755 START_TEST(test_unknown_encoding_bad_name) {
4756   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4757                      "<\xff\x64oc>Hello, world</\xff\x64oc>";
4758 
4759   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4760   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4761                  "Bad name start in unknown encoding not faulted");
4762 }
4763 END_TEST
4764 
4765 /* Test bad mid-name character in unknown encoding */
4766 START_TEST(test_unknown_encoding_bad_name_2) {
4767   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4768                      "<d\xffoc>Hello, world</d\xffoc>";
4769 
4770   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4771   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4772                  "Bad name in unknown encoding not faulted");
4773 }
4774 END_TEST
4775 
4776 /* Test element name that is long enough to fill the conversion buffer
4777  * in an unknown encoding, finishing with an encoded character.
4778  */
4779 START_TEST(test_unknown_encoding_long_name_1) {
4780   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4781                      "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4782                      "Hi"
4783                      "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4784   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4785   CharData storage;
4786 
4787   CharData_Init(&storage);
4788   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4789   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4790   XML_SetUserData(g_parser, &storage);
4791   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4792       == XML_STATUS_ERROR)
4793     xml_failure(g_parser);
4794   CharData_CheckXMLChars(&storage, expected);
4795 }
4796 END_TEST
4797 
4798 /* Test element name that is long enough to fill the conversion buffer
4799  * in an unknown encoding, finishing with an simple character.
4800  */
4801 START_TEST(test_unknown_encoding_long_name_2) {
4802   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4803                      "<abcdefghabcdefghabcdefghijklmnop>"
4804                      "Hi"
4805                      "</abcdefghabcdefghabcdefghijklmnop>";
4806   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4807   CharData storage;
4808 
4809   CharData_Init(&storage);
4810   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4811   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4812   XML_SetUserData(g_parser, &storage);
4813   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4814       == XML_STATUS_ERROR)
4815     xml_failure(g_parser);
4816   CharData_CheckXMLChars(&storage, expected);
4817 }
4818 END_TEST
4819 
4820 START_TEST(test_invalid_unknown_encoding) {
4821   const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4822                      "<doc>Hello world</doc>";
4823 
4824   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4825   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4826                  "Invalid unknown encoding not faulted");
4827 }
4828 END_TEST
4829 
4830 START_TEST(test_unknown_ascii_encoding_ok) {
4831   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4832                      "<doc>Hello, world</doc>";
4833 
4834   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4835   run_character_check(text, XCS("Hello, world"));
4836 }
4837 END_TEST
4838 
4839 START_TEST(test_unknown_ascii_encoding_fail) {
4840   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4841                      "<doc>Hello, \x80 world</doc>";
4842 
4843   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4844   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4845                  "Invalid character not faulted");
4846 }
4847 END_TEST
4848 
4849 START_TEST(test_unknown_encoding_invalid_length) {
4850   const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4851                      "<doc>Hello, world</doc>";
4852 
4853   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4854   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4855                  "Invalid unknown encoding not faulted");
4856 }
4857 END_TEST
4858 
4859 START_TEST(test_unknown_encoding_invalid_topbit) {
4860   const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4861                      "<doc>Hello, world</doc>";
4862 
4863   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4864   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4865                  "Invalid unknown encoding not faulted");
4866 }
4867 END_TEST
4868 
4869 START_TEST(test_unknown_encoding_invalid_surrogate) {
4870   const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4871                      "<doc>Hello, \x82 world</doc>";
4872 
4873   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4874   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4875                  "Invalid unknown encoding not faulted");
4876 }
4877 END_TEST
4878 
4879 START_TEST(test_unknown_encoding_invalid_high) {
4880   const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4881                      "<doc>Hello, world</doc>";
4882 
4883   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4884   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4885                  "Invalid unknown encoding not faulted");
4886 }
4887 END_TEST
4888 
4889 START_TEST(test_unknown_encoding_invalid_attr_value) {
4890   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4891                      "<doc attr='\xff\x30'/>";
4892 
4893   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4894   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4895                  "Invalid attribute valid not faulted");
4896 }
4897 END_TEST
4898 
4899 START_TEST(test_unknown_encoding_user_data_primary) {
4900   // This test is based on ideas contributed by Artiphishell Inc.
4901   const char *const text = "<?xml version='1.0' encoding='x-unk'?>\n"
4902                            "<root />\n";
4903   XML_Parser parser = XML_ParserCreate(NULL);
4904   XML_SetUnknownEncodingHandler(parser,
4905                                 user_data_checking_unknown_encoding_handler,
4906                                 (void *)(intptr_t)0xC0FFEE);
4907 
4908   assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
4909               == XML_STATUS_OK);
4910 
4911   XML_ParserFree(parser);
4912 }
4913 END_TEST
4914 
4915 START_TEST(test_unknown_encoding_user_data_secondary) {
4916   // This test is based on ideas contributed by Artiphishell Inc.
4917   const char *const text_main = "<!DOCTYPE r [\n"
4918                                 "  <!ENTITY ext SYSTEM 'ext.ent'>\n"
4919                                 "]>\n"
4920                                 "<r>&ext;</r>\n";
4921   const char *const text_external = "<?xml version='1.0' encoding='x-unk'?>\n"
4922                                     "<e>data</e>";
4923   ExtTest2 test_data = {text_external, (int)strlen(text_external), NULL, NULL};
4924   XML_Parser parser = XML_ParserCreate(NULL);
4925   XML_SetExternalEntityRefHandler(parser, external_entity_loader2);
4926   XML_SetUnknownEncodingHandler(parser,
4927                                 user_data_checking_unknown_encoding_handler,
4928                                 (void *)(intptr_t)0xC0FFEE);
4929   XML_SetUserData(parser, &test_data);
4930 
4931   assert_true(_XML_Parse_SINGLE_BYTES(parser, text_main, (int)strlen(text_main),
4932                                       XML_TRUE)
4933               == XML_STATUS_OK);
4934 
4935   XML_ParserFree(parser);
4936 }
4937 END_TEST
4938 
4939 /* Test an external entity parser set to use latin-1 detects UTF-16
4940  * BOMs correctly.
4941  */
4942 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
4943 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4944   const char *text = "<!DOCTYPE doc [\n"
4945                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4946                      "]>\n"
4947                      "<doc>&en;</doc>";
4948   ExtTest2 test_data
4949       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4950          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4951           *   0x4c = L and 0x20 is a space
4952           */
4953          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4954 #ifdef XML_UNICODE
4955   const XML_Char *expected = XCS("\x00ff\x00feL ");
4956 #else
4957   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4958   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4959 #endif
4960   CharData storage;
4961 
4962   CharData_Init(&storage);
4963   test_data.storage = &storage;
4964   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4965   XML_SetUserData(g_parser, &test_data);
4966   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4967   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4968       == XML_STATUS_ERROR)
4969     xml_failure(g_parser);
4970   CharData_CheckXMLChars(&storage, expected);
4971 }
4972 END_TEST
4973 
4974 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4975   const char *text = "<!DOCTYPE doc [\n"
4976                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4977                      "]>\n"
4978                      "<doc>&en;</doc>";
4979   ExtTest2 test_data
4980       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4981          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4982           *   0x4c = L and 0x20 is a space
4983           */
4984          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4985 #ifdef XML_UNICODE
4986   const XML_Char *expected = XCS("\x00fe\x00ff L");
4987 #else
4988   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4989   const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4990 #endif
4991   CharData storage;
4992 
4993   CharData_Init(&storage);
4994   test_data.storage = &storage;
4995   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4996   XML_SetUserData(g_parser, &test_data);
4997   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4998   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4999       == XML_STATUS_ERROR)
5000     xml_failure(g_parser);
5001   CharData_CheckXMLChars(&storage, expected);
5002 }
5003 END_TEST
5004 
5005 /* Parsing the full buffer rather than a byte at a time makes a
5006  * difference to the encoding scanning code, so repeat the above tests
5007  * without breaking them down by byte.
5008  */
5009 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
5010   const char *text = "<!DOCTYPE doc [\n"
5011                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5012                      "]>\n"
5013                      "<doc>&en;</doc>";
5014   ExtTest2 test_data
5015       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
5016          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
5017           *   0x4c = L and 0x20 is a space
5018           */
5019          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
5020 #ifdef XML_UNICODE
5021   const XML_Char *expected = XCS("\x00ff\x00feL ");
5022 #else
5023   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
5024   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
5025 #endif
5026   CharData storage;
5027 
5028   CharData_Init(&storage);
5029   test_data.storage = &storage;
5030   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5031   XML_SetUserData(g_parser, &test_data);
5032   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5033   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5034       == XML_STATUS_ERROR)
5035     xml_failure(g_parser);
5036   CharData_CheckXMLChars(&storage, expected);
5037 }
5038 END_TEST
5039 
5040 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
5041   const char *text = "<!DOCTYPE doc [\n"
5042                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5043                      "]>\n"
5044                      "<doc>&en;</doc>";
5045   ExtTest2 test_data
5046       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
5047          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
5048           *   0x4c = L and 0x20 is a space
5049           */
5050          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
5051 #ifdef XML_UNICODE
5052   const XML_Char *expected = XCS("\x00fe\x00ff L");
5053 #else
5054   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
5055   const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
5056 #endif
5057   CharData storage;
5058 
5059   CharData_Init(&storage);
5060   test_data.storage = &storage;
5061   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5062   XML_SetUserData(g_parser, &test_data);
5063   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5064   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5065       == XML_STATUS_ERROR)
5066     xml_failure(g_parser);
5067   CharData_CheckXMLChars(&storage, expected);
5068 }
5069 END_TEST
5070 
5071 /* Test little-endian UTF-16 given an explicit big-endian encoding */
5072 START_TEST(test_ext_entity_utf16_be) {
5073   const char *text = "<!DOCTYPE doc [\n"
5074                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5075                      "]>\n"
5076                      "<doc>&en;</doc>";
5077   ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
5078 #ifdef XML_UNICODE
5079   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
5080 #else
5081   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
5082                                  "\xe6\x94\x80"   /* U+6500 */
5083                                  "\xe2\xbc\x80"   /* U+2F00 */
5084                                  "\xe3\xb8\x80"); /* U+3E00 */
5085 #endif
5086   CharData storage;
5087 
5088   CharData_Init(&storage);
5089   test_data.storage = &storage;
5090   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5091   XML_SetUserData(g_parser, &test_data);
5092   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5093   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5094       == XML_STATUS_ERROR)
5095     xml_failure(g_parser);
5096   CharData_CheckXMLChars(&storage, expected);
5097 }
5098 END_TEST
5099 
5100 /* Test big-endian UTF-16 given an explicit little-endian encoding */
5101 START_TEST(test_ext_entity_utf16_le) {
5102   const char *text = "<!DOCTYPE doc [\n"
5103                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5104                      "]>\n"
5105                      "<doc>&en;</doc>";
5106   ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
5107 #ifdef XML_UNICODE
5108   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
5109 #else
5110   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
5111                                  "\xe6\x94\x80"   /* U+6500 */
5112                                  "\xe2\xbc\x80"   /* U+2F00 */
5113                                  "\xe3\xb8\x80"); /* U+3E00 */
5114 #endif
5115   CharData storage;
5116 
5117   CharData_Init(&storage);
5118   test_data.storage = &storage;
5119   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5120   XML_SetUserData(g_parser, &test_data);
5121   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5122   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5123       == XML_STATUS_ERROR)
5124     xml_failure(g_parser);
5125   CharData_CheckXMLChars(&storage, expected);
5126 }
5127 END_TEST
5128 
5129 /* Test little-endian UTF-16 given no explicit encoding.
5130  * The existing default encoding (UTF-8) is assumed to hold without a
5131  * BOM to contradict it, so the entity value will in fact provoke an
5132  * error because 0x00 is not a valid XML character.  We parse the
5133  * whole buffer in one go rather than feeding it in byte by byte to
5134  * exercise different code paths in the initial scanning routines.
5135  */
5136 START_TEST(test_ext_entity_utf16_unknown) {
5137   const char *text = "<!DOCTYPE doc [\n"
5138                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5139                      "]>\n"
5140                      "<doc>&en;</doc>";
5141   ExtFaults2 test_data
5142       = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
5143          XML_ERROR_INVALID_TOKEN};
5144 
5145   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
5146   XML_SetUserData(g_parser, &test_data);
5147   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5148                  "Invalid character should not have been accepted");
5149 }
5150 END_TEST
5151 
5152 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
5153 START_TEST(test_ext_entity_utf8_non_bom) {
5154   const char *text = "<!DOCTYPE doc [\n"
5155                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5156                      "]>\n"
5157                      "<doc>&en;</doc>";
5158   ExtTest2 test_data
5159       = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
5160          3, NULL, NULL};
5161 #ifdef XML_UNICODE
5162   const XML_Char *expected = XCS("\xfec0");
5163 #else
5164   const XML_Char *expected = XCS("\xef\xbb\x80");
5165 #endif
5166   CharData storage;
5167 
5168   CharData_Init(&storage);
5169   test_data.storage = &storage;
5170   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5171   XML_SetUserData(g_parser, &test_data);
5172   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5173   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5174       == XML_STATUS_ERROR)
5175     xml_failure(g_parser);
5176   CharData_CheckXMLChars(&storage, expected);
5177 }
5178 END_TEST
5179 
5180 /* Test that UTF-8 in a CDATA section is correctly passed through */
5181 START_TEST(test_utf8_in_cdata_section) {
5182   const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
5183 #ifdef XML_UNICODE
5184   const XML_Char *expected = XCS("one \x00e9 two");
5185 #else
5186   const XML_Char *expected = XCS("one \xc3\xa9 two");
5187 #endif
5188 
5189   run_character_check(text, expected);
5190 }
5191 END_TEST
5192 
5193 /* Test that little-endian UTF-16 in a CDATA section is handled */
5194 START_TEST(test_utf8_in_cdata_section_2) {
5195   const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
5196 #ifdef XML_UNICODE
5197   const XML_Char *expected = XCS("\x00e9]\x00e9two");
5198 #else
5199   const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
5200 #endif
5201 
5202   run_character_check(text, expected);
5203 }
5204 END_TEST
5205 
5206 START_TEST(test_utf8_in_start_tags) {
5207   struct test_case {
5208     bool goodName;
5209     bool goodNameStart;
5210     const char *tagName;
5211   };
5212 
5213   // The idea with the tests below is this:
5214   // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
5215   // go to isNever and are hence not a concern.
5216   //
5217   // We start with a character that is a valid name character
5218   // (or even name-start character, see XML 1.0r4 spec) and then we flip
5219   // single bits at places where (1) the result leaves the UTF-8 encoding space
5220   // and (2) we stay in the same n-byte sequence family.
5221   //
5222   // The flipped bits are highlighted in angle brackets in comments,
5223   // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
5224   // the most significant bit to 1 to leave UTF-8 encoding space.
5225   struct test_case cases[] = {
5226       // 1-byte UTF-8: [0xxx xxxx]
5227       {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
5228       {false, false, "\xBA"}, // [<1>011 1010]
5229       {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
5230       {false, false, "\xB9"}, // [<1>011 1001]
5231 
5232       // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
5233       {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
5234                                   // Arabic small waw U+06E5
5235       {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
5236       {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
5237       {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
5238       {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
5239                                   // combining char U+0301
5240       {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
5241       {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
5242       {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
5243 
5244       // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
5245       {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
5246                                       // Devanagari Letter A U+0905
5247       {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
5248       {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
5249       {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
5250       {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
5251       {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
5252       {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
5253                                       // combining char U+0901
5254       {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
5255       {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
5256       {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
5257       {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
5258       {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
5259   };
5260   const bool atNameStart[] = {true, false};
5261 
5262   size_t i = 0;
5263   char doc[1024];
5264   size_t failCount = 0;
5265 
5266   // we need all the bytes to be parsed, but we don't want the errors that can
5267   // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
5268   if (g_reparseDeferralEnabledDefault) {
5269     return;
5270   }
5271 
5272   for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
5273     size_t j = 0;
5274     for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
5275       const bool expectedSuccess
5276           = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
5277       snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
5278                cases[i].tagName);
5279       XML_Parser parser = XML_ParserCreate(NULL);
5280 
5281       const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
5282           parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
5283 
5284       bool success = true;
5285       if ((status == XML_STATUS_OK) != expectedSuccess) {
5286         success = false;
5287       }
5288       if ((status == XML_STATUS_ERROR)
5289           && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
5290         success = false;
5291       }
5292 
5293       if (! success) {
5294         fprintf(
5295             stderr,
5296             "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
5297             (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
5298             (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
5299         failCount++;
5300       }
5301 
5302       XML_ParserFree(parser);
5303     }
5304   }
5305 
5306   if (failCount > 0) {
5307     fail("UTF-8 regression detected");
5308   }
5309 }
5310 END_TEST
5311 
5312 /* Test trailing spaces in elements are accepted */
5313 START_TEST(test_trailing_spaces_in_elements) {
5314   const char *text = "<doc   >Hi</doc >";
5315   const XML_Char *expected = XCS("doc/doc");
5316   CharData storage;
5317 
5318   CharData_Init(&storage);
5319   XML_SetElementHandler(g_parser, record_element_start_handler,
5320                         record_element_end_handler);
5321   XML_SetUserData(g_parser, &storage);
5322   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5323       == XML_STATUS_ERROR)
5324     xml_failure(g_parser);
5325   CharData_CheckXMLChars(&storage, expected);
5326 }
5327 END_TEST
5328 
5329 START_TEST(test_utf16_attribute) {
5330   const char text[] =
5331       /* <d {KHO KHWAI}{CHO CHAN}='a'/>
5332        * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5333        * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5334        */
5335       "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
5336   const XML_Char *expected = XCS("a");
5337   CharData storage;
5338 
5339   CharData_Init(&storage);
5340   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5341   XML_SetUserData(g_parser, &storage);
5342   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5343       == XML_STATUS_ERROR)
5344     xml_failure(g_parser);
5345   CharData_CheckXMLChars(&storage, expected);
5346 }
5347 END_TEST
5348 
5349 START_TEST(test_utf16_second_attr) {
5350   /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
5351    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5352    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5353    */
5354   const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
5355                       "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
5356   const XML_Char *expected = XCS("1");
5357   CharData storage;
5358 
5359   CharData_Init(&storage);
5360   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5361   XML_SetUserData(g_parser, &storage);
5362   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5363       == XML_STATUS_ERROR)
5364     xml_failure(g_parser);
5365   CharData_CheckXMLChars(&storage, expected);
5366 }
5367 END_TEST
5368 
5369 START_TEST(test_attr_after_solidus) {
5370   const char *text = "<doc attr1='a' / attr2='b'>";
5371 
5372   expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
5373 }
5374 END_TEST
5375 
5376 START_TEST(test_utf16_pe) {
5377   /* <!DOCTYPE doc [
5378    * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
5379    * %{KHO KHWAI}{CHO CHAN};
5380    * ]>
5381    * <doc></doc>
5382    *
5383    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5384    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5385    */
5386   const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
5387                       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
5388                       "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
5389                       "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
5390                       "\0%\x0e\x04\x0e\x08\0;\0\n"
5391                       "\0]\0>\0\n"
5392                       "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
5393 #ifdef XML_UNICODE
5394   const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
5395 #else
5396   const XML_Char *expected
5397       = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
5398 #endif
5399   CharData storage;
5400 
5401   CharData_Init(&storage);
5402   XML_SetUserData(g_parser, &storage);
5403   XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
5404   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5405       == XML_STATUS_ERROR)
5406     xml_failure(g_parser);
5407   CharData_CheckXMLChars(&storage, expected);
5408 }
5409 END_TEST
5410 
5411 /* Test that duff attribute description keywords are rejected */
5412 START_TEST(test_bad_attr_desc_keyword) {
5413   const char *text = "<!DOCTYPE doc [\n"
5414                      "  <!ATTLIST doc attr CDATA #!IMPLIED>\n"
5415                      "]>\n"
5416                      "<doc />";
5417 
5418   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5419                  "Bad keyword !IMPLIED not faulted");
5420 }
5421 END_TEST
5422 
5423 /* Test that an invalid attribute description keyword consisting of
5424  * UTF-16 characters with their top bytes non-zero are correctly
5425  * faulted
5426  */
5427 START_TEST(test_bad_attr_desc_keyword_utf16) {
5428   /* <!DOCTYPE d [
5429    * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
5430    * ]><d/>
5431    *
5432    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5433    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5434    */
5435   const char text[]
5436       = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5437         "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
5438         "\0#\x0e\x04\x0e\x08\0>\0\n"
5439         "\0]\0>\0<\0d\0/\0>";
5440 
5441   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5442       != XML_STATUS_ERROR)
5443     fail("Invalid UTF16 attribute keyword not faulted");
5444   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
5445     xml_failure(g_parser);
5446 }
5447 END_TEST
5448 
5449 /* Test that invalid syntax in a <!DOCTYPE> is rejected.  Do this
5450  * using prefix-encoding (see above) to trigger specific code paths
5451  */
5452 START_TEST(test_bad_doctype) {
5453   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
5454                      "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
5455 
5456   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5457   expect_failure(text, XML_ERROR_SYNTAX,
5458                  "Invalid bytes in DOCTYPE not faulted");
5459 }
5460 END_TEST
5461 
5462 START_TEST(test_bad_doctype_utf8) {
5463   const char *text = "<!DOCTYPE \xDB\x25"
5464                      "doc><doc/>"; // [1101 1011] [<0>010 0101]
5465   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5466                  "Invalid UTF-8 in DOCTYPE not faulted");
5467 }
5468 END_TEST
5469 
5470 START_TEST(test_bad_doctype_utf16) {
5471   const char text[] =
5472       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
5473        *
5474        * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
5475        * (name character) but not a valid letter (name start character)
5476        */
5477       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
5478       "\x06\xf2"
5479       "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
5480 
5481   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5482       != XML_STATUS_ERROR)
5483     fail("Invalid bytes in DOCTYPE not faulted");
5484   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
5485     xml_failure(g_parser);
5486 }
5487 END_TEST
5488 
5489 START_TEST(test_bad_doctype_plus) {
5490   const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
5491                      "<1+>&foo;</1+>";
5492 
5493   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5494                  "'+' in document name not faulted");
5495 }
5496 END_TEST
5497 
5498 START_TEST(test_bad_doctype_star) {
5499   const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
5500                      "<1*>&foo;</1*>";
5501 
5502   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5503                  "'*' in document name not faulted");
5504 }
5505 END_TEST
5506 
5507 START_TEST(test_bad_doctype_query) {
5508   const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
5509                      "<1?>&foo;</1?>";
5510 
5511   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5512                  "'?' in document name not faulted");
5513 }
5514 END_TEST
5515 
5516 START_TEST(test_unknown_encoding_bad_ignore) {
5517   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
5518                      "<!DOCTYPE doc SYSTEM 'foo'>"
5519                      "<doc><e>&entity;</e></doc>";
5520   ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
5521                      "Invalid character not faulted", XCS("prefix-conv"),
5522                      XML_ERROR_INVALID_TOKEN};
5523 
5524   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5525   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5526   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
5527   XML_SetUserData(g_parser, &fault);
5528   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5529                  "Bad IGNORE section with unknown encoding not failed");
5530 }
5531 END_TEST
5532 
5533 START_TEST(test_entity_in_utf16_be_attr) {
5534   const char text[] =
5535       /* <e a='&#228; &#x00E4;'></e> */
5536       "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
5537       "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
5538 #ifdef XML_UNICODE
5539   const XML_Char *expected = XCS("\x00e4 \x00e4");
5540 #else
5541   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5542 #endif
5543   CharData storage;
5544 
5545   CharData_Init(&storage);
5546   XML_SetUserData(g_parser, &storage);
5547   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5548   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5549       == XML_STATUS_ERROR)
5550     xml_failure(g_parser);
5551   CharData_CheckXMLChars(&storage, expected);
5552 }
5553 END_TEST
5554 
5555 START_TEST(test_entity_in_utf16_le_attr) {
5556   const char text[] =
5557       /* <e a='&#228; &#x00E4;'></e> */
5558       "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
5559       "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
5560 #ifdef XML_UNICODE
5561   const XML_Char *expected = XCS("\x00e4 \x00e4");
5562 #else
5563   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5564 #endif
5565   CharData storage;
5566 
5567   CharData_Init(&storage);
5568   XML_SetUserData(g_parser, &storage);
5569   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5570   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5571       == XML_STATUS_ERROR)
5572     xml_failure(g_parser);
5573   CharData_CheckXMLChars(&storage, expected);
5574 }
5575 END_TEST
5576 
5577 START_TEST(test_entity_public_utf16_be) {
5578   const char text[] =
5579       /* <!DOCTYPE d [ */
5580       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5581       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5582       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5583       "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5584       /* %e; */
5585       "\0%\0e\0;\0\n"
5586       /* ]> */
5587       "\0]\0>\0\n"
5588       /* <d>&j;</d> */
5589       "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5590   ExtTest2 test_data
5591       = {/* <!ENTITY j 'baz'> */
5592          "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5593   const XML_Char *expected = XCS("baz");
5594   CharData storage;
5595 
5596   CharData_Init(&storage);
5597   test_data.storage = &storage;
5598   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5599   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5600   XML_SetUserData(g_parser, &test_data);
5601   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5602   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5603       == XML_STATUS_ERROR)
5604     xml_failure(g_parser);
5605   CharData_CheckXMLChars(&storage, expected);
5606 }
5607 END_TEST
5608 
5609 START_TEST(test_entity_public_utf16_le) {
5610   const char text[] =
5611       /* <!DOCTYPE d [ */
5612       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5613       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5614       "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5615       "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5616       /* %e; */
5617       "%\0e\0;\0\n\0"
5618       /* ]> */
5619       "]\0>\0\n\0"
5620       /* <d>&j;</d> */
5621       "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5622   ExtTest2 test_data
5623       = {/* <!ENTITY j 'baz'> */
5624          "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5625   const XML_Char *expected = XCS("baz");
5626   CharData storage;
5627 
5628   CharData_Init(&storage);
5629   test_data.storage = &storage;
5630   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5631   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5632   XML_SetUserData(g_parser, &test_data);
5633   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5634   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5635       == XML_STATUS_ERROR)
5636     xml_failure(g_parser);
5637   CharData_CheckXMLChars(&storage, expected);
5638 }
5639 END_TEST
5640 
5641 /* Test that a doctype with neither an internal nor external subset is
5642  * faulted
5643  */
5644 START_TEST(test_short_doctype) {
5645   const char *text = "<!DOCTYPE doc></doc>";
5646   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5647                  "DOCTYPE without subset not rejected");
5648 }
5649 END_TEST
5650 
5651 START_TEST(test_short_doctype_2) {
5652   const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5653   expect_failure(text, XML_ERROR_SYNTAX,
5654                  "DOCTYPE without Public ID not rejected");
5655 }
5656 END_TEST
5657 
5658 START_TEST(test_short_doctype_3) {
5659   const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5660   expect_failure(text, XML_ERROR_SYNTAX,
5661                  "DOCTYPE without System ID not rejected");
5662 }
5663 END_TEST
5664 
5665 START_TEST(test_long_doctype) {
5666   const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5667   expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5668 }
5669 END_TEST
5670 
5671 START_TEST(test_bad_entity) {
5672   const char *text = "<!DOCTYPE doc [\n"
5673                      "  <!ENTITY foo PUBLIC>\n"
5674                      "]>\n"
5675                      "<doc/>";
5676   expect_failure(text, XML_ERROR_SYNTAX,
5677                  "ENTITY without Public ID is not rejected");
5678 }
5679 END_TEST
5680 
5681 /* Test unquoted value is faulted */
5682 START_TEST(test_bad_entity_2) {
5683   const char *text = "<!DOCTYPE doc [\n"
5684                      "  <!ENTITY % foo bar>\n"
5685                      "]>\n"
5686                      "<doc/>";
5687   expect_failure(text, XML_ERROR_SYNTAX,
5688                  "ENTITY without Public ID is not rejected");
5689 }
5690 END_TEST
5691 
5692 START_TEST(test_bad_entity_3) {
5693   const char *text = "<!DOCTYPE doc [\n"
5694                      "  <!ENTITY % foo PUBLIC>\n"
5695                      "]>\n"
5696                      "<doc/>";
5697   expect_failure(text, XML_ERROR_SYNTAX,
5698                  "Parameter ENTITY without Public ID is not rejected");
5699 }
5700 END_TEST
5701 
5702 START_TEST(test_bad_entity_4) {
5703   const char *text = "<!DOCTYPE doc [\n"
5704                      "  <!ENTITY % foo SYSTEM>\n"
5705                      "]>\n"
5706                      "<doc/>";
5707   expect_failure(text, XML_ERROR_SYNTAX,
5708                  "Parameter ENTITY without Public ID is not rejected");
5709 }
5710 END_TEST
5711 
5712 START_TEST(test_bad_notation) {
5713   const char *text = "<!DOCTYPE doc [\n"
5714                      "  <!NOTATION n SYSTEM>\n"
5715                      "]>\n"
5716                      "<doc/>";
5717   expect_failure(text, XML_ERROR_SYNTAX,
5718                  "Notation without System ID is not rejected");
5719 }
5720 END_TEST
5721 
5722 /* Test for issue #11, wrongly suppressed default handler */
5723 START_TEST(test_default_doctype_handler) {
5724   const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5725                      "  <!ENTITY foo 'bar'>\n"
5726                      "]>\n"
5727                      "<doc>&foo;</doc>";
5728   DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5729                               {XCS("'test.dtd'"), 10, XML_FALSE},
5730                               {NULL, 0, XML_FALSE}};
5731   int i;
5732 
5733   XML_SetUserData(g_parser, &test_data);
5734   XML_SetDefaultHandler(g_parser, checking_default_handler);
5735   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5736   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5737       == XML_STATUS_ERROR)
5738     xml_failure(g_parser);
5739   for (i = 0; test_data[i].expected != NULL; i++)
5740     if (! test_data[i].seen)
5741       fail("Default handler not run for public !DOCTYPE");
5742 }
5743 END_TEST
5744 
5745 START_TEST(test_empty_element_abort) {
5746   const char *text = "<abort/>";
5747 
5748   XML_SetStartElementHandler(g_parser, start_element_suspender);
5749   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5750       != XML_STATUS_ERROR)
5751     fail("Expected to error on abort");
5752 }
5753 END_TEST
5754 
5755 /* Regression test for GH issue #612: unfinished m_declAttributeType
5756  * allocation in ->m_tempPool can corrupt following allocation.
5757  */
5758 START_TEST(test_pool_integrity_with_unfinished_attr) {
5759   const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5760                      "<!DOCTYPE foo [\n"
5761                      "<!ELEMENT foo ANY>\n"
5762                      "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5763                      "%entp;\n"
5764                      "]>\n"
5765                      "<a></a>\n";
5766   const XML_Char *expected = XCS("COMMENT");
5767   CharData storage;
5768 
5769   CharData_Init(&storage);
5770   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5771   XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5772   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5773   XML_SetCommentHandler(g_parser, accumulate_comment);
5774   XML_SetUserData(g_parser, &storage);
5775   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5776       == XML_STATUS_ERROR)
5777     xml_failure(g_parser);
5778   CharData_CheckXMLChars(&storage, expected);
5779 }
5780 END_TEST
5781 
5782 /* Test a possible early return location in internalEntityProcessor */
5783 START_TEST(test_entity_ref_no_elements) {
5784   const char *const text = "<!DOCTYPE foo [\n"
5785                            "<!ENTITY e1 \"test\">\n"
5786                            "]> <foo>&e1;"; // intentionally missing newline
5787 
5788   XML_Parser parser = XML_ParserCreate(NULL);
5789   assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5790               == XML_STATUS_ERROR);
5791   assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS);
5792   XML_ParserFree(parser);
5793 }
5794 END_TEST
5795 
5796 /* Tests if chained entity references lead to unbounded recursion */
5797 START_TEST(test_deep_nested_entity) {
5798   const size_t N_LINES = 60000;
5799   const size_t SIZE_PER_LINE = 50;
5800 
5801   char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE);
5802   if (text == NULL) {
5803     fail("malloc failed");
5804   }
5805 
5806   char *textPtr = text;
5807 
5808   // Create the XML
5809   textPtr += snprintf(textPtr, SIZE_PER_LINE,
5810                       "<!DOCTYPE foo [\n"
5811                       "	<!ENTITY s0 'deepText'>\n");
5812 
5813   for (size_t i = 1; i < N_LINES; ++i) {
5814     textPtr += snprintf(textPtr, SIZE_PER_LINE, "  <!ENTITY s%lu '&s%lu;'>\n",
5815                         (long unsigned)i, (long unsigned)(i - 1));
5816   }
5817 
5818   snprintf(textPtr, SIZE_PER_LINE, "]> <foo>&s%lu;</foo>\n",
5819            (long unsigned)(N_LINES - 1));
5820 
5821   const XML_Char *const expected = XCS("deepText");
5822 
5823   CharData storage;
5824   CharData_Init(&storage);
5825 
5826   XML_Parser parser = XML_ParserCreate(NULL);
5827 
5828   XML_SetCharacterDataHandler(parser, accumulate_characters);
5829   XML_SetUserData(parser, &storage);
5830 
5831   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5832       == XML_STATUS_ERROR)
5833     xml_failure(parser);
5834 
5835   CharData_CheckXMLChars(&storage, expected);
5836   XML_ParserFree(parser);
5837   free(text);
5838 }
5839 END_TEST
5840 
5841 /* Tests if chained entity references in attributes
5842 lead to unbounded recursion */
5843 START_TEST(test_deep_nested_attribute_entity) {
5844   const size_t N_LINES = 60000;
5845   const size_t SIZE_PER_LINE = 100;
5846 
5847   char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE);
5848   if (text == NULL) {
5849     fail("malloc failed");
5850   }
5851 
5852   char *textPtr = text;
5853 
5854   // Create the XML
5855   textPtr += snprintf(textPtr, SIZE_PER_LINE,
5856                       "<!DOCTYPE foo [\n"
5857                       "	<!ENTITY s0 'deepText'>\n");
5858 
5859   for (size_t i = 1; i < N_LINES; ++i) {
5860     textPtr += snprintf(textPtr, SIZE_PER_LINE, "  <!ENTITY s%lu '&s%lu;'>\n",
5861                         (long unsigned)i, (long unsigned)(i - 1));
5862   }
5863 
5864   snprintf(textPtr, SIZE_PER_LINE, "]> <foo name='&s%lu;'>mainText</foo>\n",
5865            (long unsigned)(N_LINES - 1));
5866 
5867   AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}};
5868   ElementInfo info[]
5869       = {{XCS("foo"), 1, 0, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
5870 
5871   XML_Parser parser = XML_ParserCreate(NULL);
5872   ParserAndElementInfo parserPlusElemenInfo = {parser, info};
5873 
5874   XML_SetStartElementHandler(parser, counting_start_element_handler);
5875   XML_SetUserData(parser, &parserPlusElemenInfo);
5876 
5877   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5878       == XML_STATUS_ERROR)
5879     xml_failure(parser);
5880 
5881   XML_ParserFree(parser);
5882   free(text);
5883 }
5884 END_TEST
5885 
5886 START_TEST(test_deep_nested_entity_delayed_interpretation) {
5887   const size_t N_LINES = 70000;
5888   const size_t SIZE_PER_LINE = 100;
5889 
5890   char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE);
5891   if (text == NULL) {
5892     fail("malloc failed");
5893   }
5894 
5895   char *textPtr = text;
5896 
5897   // Create the XML
5898   textPtr += snprintf(textPtr, SIZE_PER_LINE,
5899                       "<!DOCTYPE foo [\n"
5900                       "	<!ENTITY %% s0 'deepText'>\n");
5901 
5902   for (size_t i = 1; i < N_LINES; ++i) {
5903     textPtr += snprintf(textPtr, SIZE_PER_LINE,
5904                         "  <!ENTITY %% s%lu '&#37;s%lu;'>\n", (long unsigned)i,
5905                         (long unsigned)(i - 1));
5906   }
5907 
5908   snprintf(textPtr, SIZE_PER_LINE,
5909            "  <!ENTITY %% define_g \"<!ENTITY g '&#37;s%lu;'>\">\n"
5910            "  %%define_g;\n"
5911            "]>\n"
5912            "<foo/>\n",
5913            (long unsigned)(N_LINES - 1));
5914 
5915   XML_Parser parser = XML_ParserCreate(NULL);
5916 
5917   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5918   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5919       == XML_STATUS_ERROR)
5920     xml_failure(parser);
5921 
5922   XML_ParserFree(parser);
5923   free(text);
5924 }
5925 END_TEST
5926 
5927 START_TEST(test_nested_entity_suspend) {
5928   const char *const text = "<!DOCTYPE a [\n"
5929                            "  <!ENTITY e1 '<!--e1-->'>\n"
5930                            "  <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5931                            "  <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5932                            "]>\n"
5933                            "<a><!--start-->&e3;<!--end--></a>";
5934   const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5935       XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5936   CharData storage;
5937   CharData_Init(&storage);
5938   XML_Parser parser = XML_ParserCreate(NULL);
5939   ParserPlusStorage parserPlusStorage = {parser, &storage};
5940 
5941   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5942   XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5943   XML_SetUserData(parser, &parserPlusStorage);
5944 
5945   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5946   while (status == XML_STATUS_SUSPENDED) {
5947     status = XML_ResumeParser(parser);
5948   }
5949   if (status != XML_STATUS_OK)
5950     xml_failure(parser);
5951 
5952   CharData_CheckXMLChars(&storage, expected);
5953   XML_ParserFree(parser);
5954 }
5955 END_TEST
5956 
5957 START_TEST(test_nested_entity_suspend_2) {
5958   const char *const text = "<!DOCTYPE doc [\n"
5959                            "  <!ENTITY ge1 'head1Ztail1'>\n"
5960                            "  <!ENTITY ge2 'head2&ge1;tail2'>\n"
5961                            "  <!ENTITY ge3 'head3&ge2;tail3'>\n"
5962                            "]>\n"
5963                            "<doc>&ge3;</doc>";
5964   const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1")
5965       XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3");
5966   CharData storage;
5967   CharData_Init(&storage);
5968   XML_Parser parser = XML_ParserCreate(NULL);
5969   ParserPlusStorage parserPlusStorage = {parser, &storage};
5970 
5971   XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend);
5972   XML_SetUserData(parser, &parserPlusStorage);
5973 
5974   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5975   while (status == XML_STATUS_SUSPENDED) {
5976     status = XML_ResumeParser(parser);
5977   }
5978   if (status != XML_STATUS_OK)
5979     xml_failure(parser);
5980 
5981   CharData_CheckXMLChars(&storage, expected);
5982   XML_ParserFree(parser);
5983 }
5984 END_TEST
5985 
5986 /* Regression test for quadratic parsing on large tokens */
5987 START_TEST(test_big_tokens_scale_linearly) {
5988   const struct {
5989     const char *pre;
5990     const char *post;
5991   } text[] = {
5992       {"<a>", "</a>"},                      // assumed good, used as baseline
5993       {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5994       {"<c attr='", "'></c>"},              // big attribute, used to be O(N²)
5995       {"<d><!-- ", " --></d>"},             // long comment, used to be O(N²)
5996       {"<e><", "/></e>"},                   // big elem name, used to be O(N²)
5997   };
5998   const int num_cases = sizeof(text) / sizeof(text[0]);
5999   char aaaaaa[4096];
6000   const int fillsize = (int)sizeof(aaaaaa);
6001   const int fillcount = 100;
6002   const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
6003   const unsigned max_factor = 4;
6004   const unsigned max_scanned = max_factor * approx_bytes;
6005 
6006   memset(aaaaaa, 'a', fillsize);
6007 
6008   if (! g_reparseDeferralEnabledDefault) {
6009     return; // heuristic is disabled; we would get O(n^2) and fail.
6010   }
6011 
6012   for (int i = 0; i < num_cases; ++i) {
6013     XML_Parser parser = XML_ParserCreate(NULL);
6014     assert_true(parser != NULL);
6015     enum XML_Status status;
6016     set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
6017 
6018     // parse the start text
6019     g_bytesScanned = 0;
6020     status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
6021                                      (int)strlen(text[i].pre), XML_FALSE);
6022     if (status != XML_STATUS_OK) {
6023       xml_failure(parser);
6024     }
6025 
6026     // parse lots of 'a', failing the test early if it takes too long
6027     unsigned past_max_count = 0;
6028     for (int f = 0; f < fillcount; ++f) {
6029       status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
6030       if (status != XML_STATUS_OK) {
6031         xml_failure(parser);
6032       }
6033       if (g_bytesScanned > max_scanned) {
6034         // We're not done, and have already passed the limit -- the test will
6035         // definitely fail. This block allows us to save time by failing early.
6036         const unsigned pushed
6037             = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
6038         fprintf(
6039             stderr,
6040             "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
6041             f + 1, fillcount, pushed, g_bytesScanned,
6042             g_bytesScanned / (double)pushed, max_scanned, max_factor);
6043         past_max_count++;
6044         // We are failing, but allow a few log prints first. If we don't reach
6045         // a count of five, the test will fail after the loop instead.
6046         assert_true(past_max_count < 5);
6047       }
6048     }
6049 
6050     // parse the end text
6051     status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
6052                                      (int)strlen(text[i].post), XML_TRUE);
6053     if (status != XML_STATUS_OK) {
6054       xml_failure(parser);
6055     }
6056 
6057     assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
6058     if (g_bytesScanned > max_scanned) {
6059       fprintf(
6060           stderr,
6061           "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
6062           g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
6063           max_factor);
6064       fail("scanned too many bytes");
6065     }
6066 
6067     XML_ParserFree(parser);
6068   }
6069 }
6070 END_TEST
6071 
6072 START_TEST(test_set_reparse_deferral) {
6073   const char *const pre = "<d>";
6074   const char *const start = "<x attr='";
6075   const char *const end = "'></x>";
6076   char eeeeee[100];
6077   const int fillsize = (int)sizeof(eeeeee);
6078   memset(eeeeee, 'e', fillsize);
6079 
6080   for (int enabled = 0; enabled <= 1; enabled += 1) {
6081     set_subtest("deferral=%d", enabled);
6082 
6083     XML_Parser parser = XML_ParserCreate(NULL);
6084     assert_true(parser != NULL);
6085     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
6086     // pre-grow the buffer to avoid reparsing due to almost-fullness
6087     assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
6088 
6089     CharData storage;
6090     CharData_Init(&storage);
6091     XML_SetUserData(parser, &storage);
6092     XML_SetStartElementHandler(parser, start_element_event_handler);
6093 
6094     enum XML_Status status;
6095     // parse the start text
6096     status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
6097     if (status != XML_STATUS_OK) {
6098       xml_failure(parser);
6099     }
6100     CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
6101 
6102     // ..and the start of the token
6103     status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
6104     if (status != XML_STATUS_OK) {
6105       xml_failure(parser);
6106     }
6107     CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
6108 
6109     // try to parse lots of 'e', but the token isn't finished
6110     for (int c = 0; c < 100; ++c) {
6111       status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
6112       if (status != XML_STATUS_OK) {
6113         xml_failure(parser);
6114       }
6115     }
6116     CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
6117 
6118     // end the <x> token.
6119     status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
6120     if (status != XML_STATUS_OK) {
6121       xml_failure(parser);
6122     }
6123 
6124     if (enabled) {
6125       // In general, we may need to push more data to trigger a reparse attempt,
6126       // but in this test, the data is constructed to always require it.
6127       CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
6128       // 2x the token length should suffice; the +1 covers the start and end.
6129       for (int c = 0; c < 101; ++c) {
6130         status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
6131         if (status != XML_STATUS_OK) {
6132           xml_failure(parser);
6133         }
6134       }
6135     }
6136     CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
6137 
6138     XML_ParserFree(parser);
6139   }
6140 }
6141 END_TEST
6142 
6143 struct element_decl_data {
6144   XML_Parser parser;
6145   int count;
6146 };
6147 
6148 static void
6149 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
6150   UNUSED_P(name);
6151   struct element_decl_data *testdata = (struct element_decl_data *)userData;
6152   testdata->count += 1;
6153   XML_FreeContentModel(testdata->parser, model);
6154 }
6155 
6156 static int
6157 external_inherited_parser(XML_Parser p, const XML_Char *context,
6158                           const XML_Char *base, const XML_Char *systemId,
6159                           const XML_Char *publicId) {
6160   UNUSED_P(base);
6161   UNUSED_P(systemId);
6162   UNUSED_P(publicId);
6163   const char *const pre = "<!ELEMENT document ANY>\n";
6164   const char *const start = "<!ELEMENT ";
6165   const char *const end = " ANY>\n";
6166   const char *const post = "<!ELEMENT xyz ANY>\n";
6167   const int enabled = *(int *)XML_GetUserData(p);
6168   char eeeeee[100];
6169   char spaces[100];
6170   const int fillsize = (int)sizeof(eeeeee);
6171   assert_true(fillsize == (int)sizeof(spaces));
6172   memset(eeeeee, 'e', fillsize);
6173   memset(spaces, ' ', fillsize);
6174 
6175   XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
6176   assert_true(parser != NULL);
6177   // pre-grow the buffer to avoid reparsing due to almost-fullness
6178   assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
6179 
6180   struct element_decl_data testdata;
6181   testdata.parser = parser;
6182   testdata.count = 0;
6183   XML_SetUserData(parser, &testdata);
6184   XML_SetElementDeclHandler(parser, element_decl_counter);
6185 
6186   enum XML_Status status;
6187   // parse the initial text
6188   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
6189   if (status != XML_STATUS_OK) {
6190     xml_failure(parser);
6191   }
6192   assert_true(testdata.count == 1); // first element should be done
6193 
6194   // ..and the start of the big token
6195   status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
6196   if (status != XML_STATUS_OK) {
6197     xml_failure(parser);
6198   }
6199   assert_true(testdata.count == 1); // still just the first one
6200 
6201   // try to parse lots of 'e', but the token isn't finished
6202   for (int c = 0; c < 100; ++c) {
6203     status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
6204     if (status != XML_STATUS_OK) {
6205       xml_failure(parser);
6206     }
6207   }
6208   assert_true(testdata.count == 1); // *still* just the first one
6209 
6210   // end the big token.
6211   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
6212   if (status != XML_STATUS_OK) {
6213     xml_failure(parser);
6214   }
6215 
6216   if (enabled) {
6217     // In general, we may need to push more data to trigger a reparse attempt,
6218     // but in this test, the data is constructed to always require it.
6219     assert_true(testdata.count == 1); // or the test is incorrect
6220     // 2x the token length should suffice; the +1 covers the start and end.
6221     for (int c = 0; c < 101; ++c) {
6222       status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
6223       if (status != XML_STATUS_OK) {
6224         xml_failure(parser);
6225       }
6226     }
6227   }
6228   assert_true(testdata.count == 2); // the big token should be done
6229 
6230   // parse the final text
6231   status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
6232   if (status != XML_STATUS_OK) {
6233     xml_failure(parser);
6234   }
6235   assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
6236 
6237   XML_ParserFree(parser);
6238   return XML_STATUS_OK;
6239 }
6240 
6241 START_TEST(test_reparse_deferral_is_inherited) {
6242   const char *const text
6243       = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
6244   for (int enabled = 0; enabled <= 1; ++enabled) {
6245     set_subtest("deferral=%d", enabled);
6246 
6247     XML_Parser parser = XML_ParserCreate(NULL);
6248     assert_true(parser != NULL);
6249     XML_SetUserData(parser, (void *)&enabled);
6250     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
6251     // this handler creates a sub-parser and checks that its deferral behavior
6252     // is what we expected, based on the value of `enabled` (in userdata).
6253     XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
6254     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
6255     if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
6256       xml_failure(parser);
6257 
6258     XML_ParserFree(parser);
6259   }
6260 }
6261 END_TEST
6262 
6263 START_TEST(test_set_reparse_deferral_on_null_parser) {
6264   assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
6265   assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
6266   assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
6267   assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
6268   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
6269               == XML_FALSE);
6270   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
6271               == XML_FALSE);
6272 }
6273 END_TEST
6274 
6275 START_TEST(test_set_reparse_deferral_on_the_fly) {
6276   const char *const pre = "<d><x attr='";
6277   const char *const end = "'></x>";
6278   char iiiiii[100];
6279   const int fillsize = (int)sizeof(iiiiii);
6280   memset(iiiiii, 'i', fillsize);
6281 
6282   XML_Parser parser = XML_ParserCreate(NULL);
6283   assert_true(parser != NULL);
6284   assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
6285 
6286   CharData storage;
6287   CharData_Init(&storage);
6288   XML_SetUserData(parser, &storage);
6289   XML_SetStartElementHandler(parser, start_element_event_handler);
6290 
6291   enum XML_Status status;
6292   // parse the start text
6293   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
6294   if (status != XML_STATUS_OK) {
6295     xml_failure(parser);
6296   }
6297   CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
6298 
6299   // try to parse some 'i', but the token isn't finished
6300   status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
6301   if (status != XML_STATUS_OK) {
6302     xml_failure(parser);
6303   }
6304   CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
6305 
6306   // end the <x> token.
6307   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
6308   if (status != XML_STATUS_OK) {
6309     xml_failure(parser);
6310   }
6311   CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
6312 
6313   // now change the heuristic setting and add *no* data
6314   assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
6315   // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
6316   status = XML_Parse(parser, "", 0, XML_FALSE);
6317   if (status != XML_STATUS_OK) {
6318     xml_failure(parser);
6319   }
6320   CharData_CheckXMLChars(&storage, XCS("dx"));
6321 
6322   XML_ParserFree(parser);
6323 }
6324 END_TEST
6325 
6326 START_TEST(test_set_bad_reparse_option) {
6327   XML_Parser parser = XML_ParserCreate(NULL);
6328   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
6329   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
6330   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
6331   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
6332   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
6333   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
6334   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
6335   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
6336   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
6337   XML_ParserFree(parser);
6338 }
6339 END_TEST
6340 
6341 static size_t g_totalAlloc = 0;
6342 static size_t g_biggestAlloc = 0;
6343 
6344 static void *
6345 counting_realloc(void *ptr, size_t size) {
6346   g_totalAlloc += size;
6347   if (size > g_biggestAlloc) {
6348     g_biggestAlloc = size;
6349   }
6350   return realloc(ptr, size);
6351 }
6352 
6353 static void *
6354 counting_malloc(size_t size) {
6355   return counting_realloc(NULL, size);
6356 }
6357 
6358 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
6359   if (g_chunkSize != 0) {
6360     // this test does not use SINGLE_BYTES, because it depends on very precise
6361     // buffer fills.
6362     return;
6363   }
6364   if (! g_reparseDeferralEnabledDefault) {
6365     return; // this test is irrelevant when the deferral heuristic is disabled.
6366   }
6367 
6368   const int document_length = 65536;
6369   char *const document = malloc(document_length);
6370   assert_true(document != NULL);
6371 
6372   const XML_Memory_Handling_Suite memfuncs = {
6373       counting_malloc,
6374       counting_realloc,
6375       free,
6376   };
6377 
6378   const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
6379   const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
6380   const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
6381 
6382   for (const int *leading = leading_list; *leading >= 0; leading++) {
6383     for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
6384       for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
6385         set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
6386                     *fillsize);
6387         // start by checking that the test looks reasonably valid
6388         assert_true(*leading + *bigtoken <= document_length);
6389 
6390         // put 'x' everywhere; some will be overwritten by elements.
6391         memset(document, 'x', document_length);
6392         // maybe add an initial tag
6393         if (*leading) {
6394           assert_true(*leading >= 3); // or the test case is invalid
6395           memcpy(document, "<a>", 3);
6396         }
6397         // add the large token
6398         document[*leading + 0] = '<';
6399         document[*leading + 1] = 'b';
6400         memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
6401         document[*leading + *bigtoken - 1] = '>';
6402 
6403         // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
6404         const int expected_elem_total = 1 + (*leading ? 1 : 0);
6405 
6406         XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
6407         assert_true(parser != NULL);
6408 
6409         CharData storage;
6410         CharData_Init(&storage);
6411         XML_SetUserData(parser, &storage);
6412         XML_SetStartElementHandler(parser, start_element_event_handler);
6413 
6414         g_biggestAlloc = 0;
6415         g_totalAlloc = 0;
6416         int offset = 0;
6417         // fill data until the big token is covered (but not necessarily parsed)
6418         while (offset < *leading + *bigtoken) {
6419           assert_true(offset + *fillsize <= document_length);
6420           const enum XML_Status status
6421               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6422           if (status != XML_STATUS_OK) {
6423             xml_failure(parser);
6424           }
6425           offset += *fillsize;
6426         }
6427         // Now, check that we've had a buffer allocation that could fit the
6428         // context bytes and our big token. In order to detect a special case,
6429         // we need to know how many bytes of our big token were included in the
6430         // first push that contained _any_ bytes of the big token:
6431         const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
6432         if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
6433           // Special case: we aren't saving any context, and the whole big token
6434           // was covered by a single fill, so Expat may have parsed directly
6435           // from our input pointer, without allocating an internal buffer.
6436         } else if (*leading < XML_CONTEXT_BYTES) {
6437           assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
6438         } else {
6439           assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
6440         }
6441         // fill data until the big token is actually parsed
6442         while (storage.count < expected_elem_total) {
6443           const size_t alloc_before = g_totalAlloc;
6444           assert_true(offset + *fillsize <= document_length);
6445           const enum XML_Status status
6446               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6447           if (status != XML_STATUS_OK) {
6448             xml_failure(parser);
6449           }
6450           offset += *fillsize;
6451           // since all the bytes of the big token are already in the buffer,
6452           // the bufsize ceiling should make us finish its parsing without any
6453           // further buffer allocations. We assume that there will be no other
6454           // large allocations in this test.
6455           assert_true(g_totalAlloc - alloc_before < 4096);
6456         }
6457         // test-the-test: was our alloc even called?
6458         assert_true(g_totalAlloc > 0);
6459         // test-the-test: there shouldn't be any extra start elements
6460         assert_true(storage.count == expected_elem_total);
6461 
6462         XML_ParserFree(parser);
6463       }
6464     }
6465   }
6466   free(document);
6467 }
6468 END_TEST
6469 
6470 START_TEST(test_varying_buffer_fills) {
6471   const int KiB = 1024;
6472   const int MiB = 1024 * KiB;
6473   const int document_length = 16 * MiB;
6474   const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
6475 
6476   if (g_chunkSize != 0) {
6477     return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
6478   }
6479 
6480   char *const document = malloc(document_length);
6481   assert_true(document != NULL);
6482   memset(document, 'x', document_length);
6483   document[0] = '<';
6484   document[1] = 't';
6485   memset(&document[2], ' ', big - 2); // a very spacy token
6486   document[big - 1] = '>';
6487 
6488   // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
6489   // When reparse deferral is enabled, the final (negated) value is the expected
6490   // maximum number of bytes scanned in parse attempts.
6491   const int testcases[][30] = {
6492       {8 * MiB, -8 * MiB},
6493       {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
6494       // zero-size fills shouldn't trigger the bypass
6495       {4 * MiB, 0, 4 * MiB, -12 * MiB},
6496       {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
6497       {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
6498       // try to hit the buffer ceiling only once (at the end)
6499       {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
6500       // try to hit the same buffer ceiling multiple times
6501       {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
6502 
6503       // try to hit every ceiling, by always landing 1K shy of the buffer size
6504       {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
6505        128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
6506 
6507       // try to avoid every ceiling, by always landing 1B past the buffer size
6508       // the normal 2x heuristic threshold still forces parse attempts.
6509       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
6510        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
6511        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
6512        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
6513        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6514        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6515        2 * MiB, 4 * MiB,     // will attempt 8MiB + 1 ==> total 10M + 682K + 7
6516        -(10 * MiB + 682 * KiB + 7)},
6517       // try to avoid every ceiling again, except on our last fill.
6518       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
6519        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
6520        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
6521        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
6522        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6523        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6524        2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
6525        -(10 * MiB + 682 * KiB + 6)},
6526 
6527       // try to hit ceilings on the way multiple times
6528       {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
6529        512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
6530        1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1,   // 4 MiB buffer
6531        2 * MiB + 1, 1 * MiB, 512 * KiB,                    // 8 MiB buffer
6532        // we'll make a parse attempt at every parse call
6533        -(45 * MiB + 12)},
6534   };
6535   const int testcount = sizeof(testcases) / sizeof(testcases[0]);
6536   for (int test_i = 0; test_i < testcount; test_i++) {
6537     const int *fillsize = testcases[test_i];
6538     set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
6539                 fillsize[2], fillsize[3]);
6540     XML_Parser parser = XML_ParserCreate(NULL);
6541     assert_true(parser != NULL);
6542 
6543     CharData storage;
6544     CharData_Init(&storage);
6545     XML_SetUserData(parser, &storage);
6546     XML_SetStartElementHandler(parser, start_element_event_handler);
6547 
6548     g_bytesScanned = 0;
6549     int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
6550     int offset = 0;
6551     while (*fillsize >= 0) {
6552       assert_true(offset + *fillsize <= document_length); // or test is invalid
6553       const enum XML_Status status
6554           = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6555       if (status != XML_STATUS_OK) {
6556         xml_failure(parser);
6557       }
6558       offset += *fillsize;
6559       fillsize++;
6560       assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
6561       worstcase_bytes += offset; // we might've tried to parse all pending bytes
6562     }
6563     assert_true(storage.count == 1); // the big token should've been parsed
6564     assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
6565     if (g_reparseDeferralEnabledDefault) {
6566       // heuristic is enabled; some XML_Parse calls may have deferred reparsing
6567       const unsigned max_bytes_scanned = -*fillsize;
6568       if (g_bytesScanned > max_bytes_scanned) {
6569         fprintf(stderr,
6570                 "bytes scanned in parse attempts: actual=%u limit=%u \n",
6571                 g_bytesScanned, max_bytes_scanned);
6572         fail("too many bytes scanned in parse attempts");
6573       }
6574     }
6575     assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
6576 
6577     XML_ParserFree(parser);
6578   }
6579   free(document);
6580 }
6581 END_TEST
6582 
6583 START_TEST(test_empty_ext_param_entity_in_value) {
6584   const char *text = "<!DOCTYPE r SYSTEM \"ext.dtd\"><r/>";
6585   ExtOption options[] = {
6586       {XCS("ext.dtd"), "<!ENTITY % pe SYSTEM \"empty\">"
6587                        "<!ENTITY ge \"%pe;\">"},
6588       {XCS("empty"), ""},
6589       {NULL, NULL},
6590   };
6591 
6592   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
6593   XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner);
6594   XML_SetUserData(g_parser, options);
6595   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
6596       == XML_STATUS_ERROR)
6597     xml_failure(g_parser);
6598 }
6599 END_TEST
6600 
6601 void
6602 make_basic_test_case(Suite *s) {
6603   TCase *tc_basic = tcase_create("basic tests");
6604 
6605   suite_add_tcase(s, tc_basic);
6606   tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
6607 
6608   tcase_add_test(tc_basic, test_nul_byte);
6609   tcase_add_test(tc_basic, test_u0000_char);
6610   tcase_add_test(tc_basic, test_siphash_self);
6611   tcase_add_test(tc_basic, test_siphash_spec);
6612   tcase_add_test(tc_basic, test_bom_utf8);
6613   tcase_add_test(tc_basic, test_bom_utf16_be);
6614   tcase_add_test(tc_basic, test_bom_utf16_le);
6615   tcase_add_test(tc_basic, test_nobom_utf16_le);
6616   tcase_add_test(tc_basic, test_hash_collision);
6617   tcase_add_test(tc_basic, test_hash_salt_setter);
6618   tcase_add_test(tc_basic, test_illegal_utf8);
6619   tcase_add_test(tc_basic, test_utf8_auto_align);
6620   tcase_add_test(tc_basic, test_utf16);
6621   tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
6622   tcase_add_test(tc_basic, test_not_utf16);
6623   tcase_add_test(tc_basic, test_bad_encoding);
6624   tcase_add_test(tc_basic, test_latin1_umlauts);
6625   tcase_add_test(tc_basic, test_long_utf8_character);
6626   tcase_add_test(tc_basic, test_long_latin1_attribute);
6627   tcase_add_test(tc_basic, test_long_ascii_attribute);
6628   /* Regression test for SF bug #491986. */
6629   tcase_add_test(tc_basic, test_danish_latin1);
6630   /* Regression test for SF bug #514281. */
6631   tcase_add_test(tc_basic, test_french_charref_hexidecimal);
6632   tcase_add_test(tc_basic, test_french_charref_decimal);
6633   tcase_add_test(tc_basic, test_french_latin1);
6634   tcase_add_test(tc_basic, test_french_utf8);
6635   tcase_add_test(tc_basic, test_utf8_false_rejection);
6636   tcase_add_test(tc_basic, test_line_number_after_parse);
6637   tcase_add_test(tc_basic, test_column_number_after_parse);
6638   tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
6639   tcase_add_test(tc_basic, test_line_number_after_error);
6640   tcase_add_test(tc_basic, test_column_number_after_error);
6641   tcase_add_test(tc_basic, test_really_long_lines);
6642   tcase_add_test(tc_basic, test_really_long_encoded_lines);
6643   tcase_add_test(tc_basic, test_end_element_events);
6644   tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
6645   tcase_add_test(tc_basic, test_attr_whitespace_normalization);
6646   tcase_add_test(tc_basic, test_xmldecl_misplaced);
6647   tcase_add_test(tc_basic, test_xmldecl_invalid);
6648   tcase_add_test(tc_basic, test_xmldecl_missing_attr);
6649   tcase_add_test(tc_basic, test_xmldecl_missing_value);
6650   tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
6651   tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
6652   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
6653   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
6654   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
6655   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
6656   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
6657   tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
6658   tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
6659   tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
6660   tcase_add_test(tc_basic,
6661                  test_wfc_undeclared_entity_with_external_subset_standalone);
6662   tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
6663   tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
6664   tcase_add_test(tc_basic, test_not_standalone_handler_reject);
6665   tcase_add_test(tc_basic, test_not_standalone_handler_accept);
6666   tcase_add_test(tc_basic, test_entity_start_tag_level_greater_than_one);
6667   tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
6668   tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs);
6669   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
6670   tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
6671   tcase_add_test(tc_basic, test_dtd_attr_handling);
6672   tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
6673   tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
6674   tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
6675   tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
6676   tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
6677   tcase_add_test(tc_basic, test_good_cdata_ascii);
6678   tcase_add_test(tc_basic, test_good_cdata_utf16);
6679   tcase_add_test(tc_basic, test_good_cdata_utf16_le);
6680   tcase_add_test(tc_basic, test_long_cdata_utf16);
6681   tcase_add_test(tc_basic, test_multichar_cdata_utf16);
6682   tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
6683   tcase_add_test(tc_basic, test_bad_cdata);
6684   tcase_add_test(tc_basic, test_bad_cdata_utf16);
6685   tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
6686   tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
6687   tcase_add_test(tc_basic, test_memory_allocation);
6688   tcase_add_test__if_xml_ge(tc_basic, test_default_current);
6689   tcase_add_test(tc_basic, test_dtd_elements);
6690   tcase_add_test(tc_basic, test_dtd_elements_nesting);
6691   tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
6692   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
6693   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
6694   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
6695   tcase_add_test__ifdef_xml_dtd(tc_basic,
6696                                 test_foreign_dtd_without_external_subset);
6697   tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
6698   tcase_add_test(tc_basic, test_set_base);
6699   tcase_add_test(tc_basic, test_attributes);
6700   tcase_add_test(tc_basic, test_duplicate_cdata_attribute);
6701   tcase_add_test(tc_basic, test_duplicate_id_attribute_1);
6702   tcase_add_test(tc_basic, test_duplicate_id_attribute_2);
6703   tcase_add_test(tc_basic, test_duplicate_cdata_attribute_multiple_attlistdecl);
6704   tcase_add_test(tc_basic,
6705                  test_duplicate_cdata_attribute_multiple_attlistdecl_2);
6706   tcase_add_test(tc_basic,
6707                  test_duplicate_cdata_attribute_multiple_attlistdecl_3);
6708   tcase_add_test(tc_basic, test_duplicate_id_attribute_multiple_attlistdecl);
6709   tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
6710   tcase_add_test(tc_basic, test_resume_invalid_parse);
6711   tcase_add_test(tc_basic, test_resume_resuspended);
6712   tcase_add_test(tc_basic, test_cdata_default);
6713   tcase_add_test(tc_basic, test_subordinate_reset);
6714   tcase_add_test(tc_basic, test_subordinate_suspend);
6715   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
6716   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
6717   tcase_add_test__ifdef_xml_dtd(tc_basic,
6718                                 test_ext_entity_invalid_suspended_parse);
6719   tcase_add_test(tc_basic, test_explicit_encoding);
6720   tcase_add_test(tc_basic, test_trailing_cr);
6721   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
6722   tcase_add_test(tc_basic, test_trailing_rsqb);
6723   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
6724   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
6725   tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
6726   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
6727   tcase_add_test(tc_basic, test_empty_parse);
6728   tcase_add_test(tc_basic, test_negative_len_parse);
6729   tcase_add_test(tc_basic, test_negative_len_parse_buffer);
6730   tcase_add_test(tc_basic, test_get_buffer_1);
6731   tcase_add_test(tc_basic, test_get_buffer_2);
6732 #if XML_CONTEXT_BYTES > 0
6733   tcase_add_test(tc_basic, test_get_buffer_3_overflow);
6734 #endif
6735   tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
6736   tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
6737   tcase_add_test(tc_basic, test_byte_info_at_end);
6738   tcase_add_test(tc_basic, test_byte_info_at_error);
6739   tcase_add_test(tc_basic, test_byte_info_at_cdata);
6740   tcase_add_test(tc_basic, test_predefined_entities);
6741   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
6742   tcase_add_test(tc_basic, test_not_predefined_entities);
6743   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
6744   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
6745   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
6746   tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
6747   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
6748   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
6749   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
6750   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
6751   tcase_add_test(tc_basic, test_bad_public_doctype);
6752   tcase_add_test(tc_basic, test_attribute_enum_value);
6753   tcase_add_test(tc_basic, test_predefined_entity_redefinition);
6754   tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
6755   tcase_add_test(tc_basic, test_public_notation_no_sysid);
6756   tcase_add_test(tc_basic, test_nested_groups);
6757   tcase_add_test(tc_basic, test_group_choice);
6758   tcase_add_test(tc_basic, test_standalone_parameter_entity);
6759   tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
6760   tcase_add_test__ifdef_xml_dtd(tc_basic,
6761                                 test_recursive_external_parameter_entity);
6762   tcase_add_test__ifdef_xml_dtd(tc_basic,
6763                                 test_recursive_external_parameter_entity_2);
6764   tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
6765   tcase_add_test(tc_basic, test_suspend_xdecl);
6766   tcase_add_test(tc_basic, test_abort_epilog);
6767   tcase_add_test(tc_basic, test_abort_epilog_2);
6768   tcase_add_test(tc_basic, test_suspend_epilog);
6769   tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
6770   tcase_add_test(tc_basic, test_unfinished_epilog);
6771   tcase_add_test(tc_basic, test_partial_char_in_epilog);
6772   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6773   tcase_add_test__ifdef_xml_dtd(tc_basic,
6774                                 test_suspend_resume_internal_entity_issue_629);
6775   tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6776   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6777   tcase_add_test(tc_basic, test_restart_on_error);
6778   tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6779   tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6780   tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6781   tcase_add_test(tc_basic, test_standalone_internal_entity);
6782   tcase_add_test(tc_basic, test_skipped_external_entity);
6783   tcase_add_test__ifdef_xml_dtd(
6784       tc_basic, test_scaff_index_shared_across_external_entity_parser);
6785   tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6786   tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6787   tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6788   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6789   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6790   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6791   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6792   tcase_add_test(tc_basic, test_pi_handled_in_default);
6793   tcase_add_test(tc_basic, test_comment_handled_in_default);
6794   tcase_add_test(tc_basic, test_pi_yml);
6795   tcase_add_test(tc_basic, test_pi_xnl);
6796   tcase_add_test(tc_basic, test_pi_xmm);
6797   tcase_add_test(tc_basic, test_utf16_pi);
6798   tcase_add_test(tc_basic, test_utf16_be_pi);
6799   tcase_add_test(tc_basic, test_utf16_be_comment);
6800   tcase_add_test(tc_basic, test_utf16_le_comment);
6801   tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6802   tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6803   tcase_add_test(tc_basic, test_unknown_encoding_success);
6804   tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6805   tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6806   tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6807   tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6808   tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6809   tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6810   tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6811   tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6812   tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6813   tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6814   tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6815   tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6816   tcase_add_test(tc_basic, test_unknown_encoding_user_data_primary);
6817   tcase_add_test(tc_basic, test_unknown_encoding_user_data_secondary);
6818   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6819   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6820   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6821   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6822   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6823   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6824   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6825   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6826   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6827   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6828   tcase_add_test(tc_basic, test_utf8_in_start_tags);
6829   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6830   tcase_add_test(tc_basic, test_utf16_attribute);
6831   tcase_add_test(tc_basic, test_utf16_second_attr);
6832   tcase_add_test(tc_basic, test_attr_after_solidus);
6833   tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6834   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6835   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6836   tcase_add_test(tc_basic, test_bad_doctype);
6837   tcase_add_test(tc_basic, test_bad_doctype_utf8);
6838   tcase_add_test(tc_basic, test_bad_doctype_utf16);
6839   tcase_add_test(tc_basic, test_bad_doctype_plus);
6840   tcase_add_test(tc_basic, test_bad_doctype_star);
6841   tcase_add_test(tc_basic, test_bad_doctype_query);
6842   tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6843   tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6844   tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6845   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6846   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6847   tcase_add_test(tc_basic, test_short_doctype);
6848   tcase_add_test(tc_basic, test_short_doctype_2);
6849   tcase_add_test(tc_basic, test_short_doctype_3);
6850   tcase_add_test(tc_basic, test_long_doctype);
6851   tcase_add_test(tc_basic, test_bad_entity);
6852   tcase_add_test(tc_basic, test_bad_entity_2);
6853   tcase_add_test(tc_basic, test_bad_entity_3);
6854   tcase_add_test(tc_basic, test_bad_entity_4);
6855   tcase_add_test(tc_basic, test_bad_notation);
6856   tcase_add_test(tc_basic, test_default_doctype_handler);
6857   tcase_add_test(tc_basic, test_empty_element_abort);
6858   tcase_add_test__ifdef_xml_dtd(tc_basic,
6859                                 test_pool_integrity_with_unfinished_attr);
6860   tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_ext_param_entity_in_value);
6861   tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements);
6862   tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity);
6863   tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity);
6864   tcase_add_test__if_xml_ge(tc_basic,
6865                             test_deep_nested_entity_delayed_interpretation);
6866   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6867   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2);
6868   tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6869   tcase_add_test(tc_basic, test_set_reparse_deferral);
6870   tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6871   tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6872   tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6873   tcase_add_test(tc_basic, test_set_bad_reparse_option);
6874   tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6875   tcase_add_test(tc_basic, test_varying_buffer_fills);
6876 }
6877